2 from TagStrip import strip, strip_readable
3 from Components.Scanner import ScanFile
6 """Base-class for all Feeds. Initializes needed Elements."""
7 MAX_HISTORY_ELEMENTS = 100
9 def __init__(self, uri, autoupdate):
10 # Set URI (used as Identifier)
14 self.autoupdate = autoupdate
17 self.title = uri.encode("UTF-8")
19 self.last_update = None
23 class AtomFeed(BaseFeed):
24 """Parses an Atom-Feed into expected format."""
25 def gotDom(self, dom):
27 # Try to read when feed was last updated, if time equals return empty list. else fetch new items
28 updated = dom.getElementsByTagName("updated")[0].childNodes[0].data
29 if self.last_update == updated:
31 self.last_update = updated
34 return AtomFeed.parse(self, dom.getElementsByTagName("entry"))
36 def parse(self, items):
42 # Try to read title, continue if none found
44 title = strip(item.getElementsByTagName("title")[0].childNodes[0].data)
48 # Try to read id, continue if none found (invalid feed, should be handled differently) or to be excluded
50 id = item.getElementsByTagName("id")[0].childNodes[0].data
51 if id in self.last_ids:
56 # Read out enclosures and link
57 for current in item.getElementsByTagName("link"):
59 if current.getAttribute("rel") == "enclosure":
60 href = current.getAttribute("href").encode("UTF-8")
61 type = current.getAttribute("type").encode("UTF-8")
62 if current.hasAttribute("length"):
63 size = int(current.getAttribute("length")) / 1048576
67 # Workaround so PicturePlayer does not try to open these
68 if type in ["image/jpeg", "image/png", "image/gif", "image/bmp"]:
70 enclosure.append(ScanFile(href, mimetype = type, size = size, autodetect = False))
71 # No Enclosure, assume its a link to the item
73 link = current.getAttribute("href")
75 # Try to read summary, empty if none
77 summary = strip_readable(item.getElementsByTagName("summary")[0].childNodes[0].data)
83 title.encode("UTF-8"),
85 summary.encode("UTF-8"),
90 # Append known Items to new Items and eventually cut it
91 self.history = new_items + self.history
92 self.history[:self.MAX_HISTORY_ELEMENTS]
96 class RSSFeed(BaseFeed):
97 """Parses an RSS-Feed into expected format."""
98 def gotDom(self, dom):
99 # Try to read when feed was last updated, if time equals return empty list. else fetch new items
101 updated = dom.getElementsByTagName("lastBuildDate")[0].childNodes[0].data
102 if self.last_update == updated:
104 self.last_update = updated
107 return RSSFeed.parse(self, dom.getElementsByTagName("item"))
109 def parse(self, items):
114 # Try to read title, continue if none found
116 title = strip(item.getElementsByTagName("title")[0].childNodes[0].data)
120 # Try to read link, empty if none
122 link = item.getElementsByTagName("link")[0].childNodes[0].data
126 # Try to read guid, link if none (RSS 1.0 or invalid RSS 2.0)
128 guid = item.getElementsByTagName("guid")[0].childNodes[0].data
132 # Continue if item is to be excluded
133 if guid in self.last_ids:
136 # Try to read summary (description element), empty if none
138 summary = strip_readable(item.getElementsByTagName("description")[0].childNodes[0].data)
142 # Read out enclosures
143 for current in item.getElementsByTagName("enclosure"):
144 href = current.getAttribute("url").encode("UTF-8")
145 type = current.getAttribute("type").encode("UTF-8")
146 if current.hasAttribute("length"):
147 size = int(current.getAttribute("length")) / 1048576
151 # Workaround so PicturePlayer does not try to open these
152 if type in ["image/jpeg", "image/png", "image/gif", "image/bmp"]:
154 enclosure.append(ScanFile(href, mimetype = type, size = size, autodetect = False))
158 title.encode("UTF-8"),
159 link.encode("UTF-8"),
160 summary.encode("UTF-8"),
164 self.last_ids.add(guid)
166 # Append known Items to new Items and eventually cut it
167 self.history = new_items + self.history
168 self.history[:self.MAX_HISTORY_ELEMENTS]
172 class UniversalFeed(BaseFeed, RSSFeed, AtomFeed):
173 """Universal Feed which on first run determines its type and calls the correct parsing-functions"""
174 def __init__(self, uri, autoupdate):
175 BaseFeed.__init__(self, uri, autoupdate)
178 def gotDom(self, dom):
179 if self.type == "rss":
180 print "[SimpleRSS] type is rss"
181 return RSSFeed.gotDom(self, dom)
182 elif self.type == "atom":
183 print "[SimpleRSS] type is atom"
184 return AtomFeed.gotDom(self, dom)
185 elif self.type is None:
187 if dom.documentElement.getAttribute("version") in ["2.0", "0.94", "0.93", "0.92", "0.91"]:
190 self.title = dom.getElementsByTagName("channel")[0].getElementsByTagName("title")[0].childNodes[0].data
191 self.description = dom.getElementsByTagName("channel")[0].getElementsByTagName("description")[0].childNodes[0].data
194 # RSS 1.0 (NS: http://www.w3.org/1999/02/22-rdf-syntax-ns#)
195 elif dom.documentElement.localName == "RDF":
198 self.title = dom.getElementsByTagName("channel")[0].getElementsByTagName("title")[0].childNodes[0].data
199 self.description = dom.getElementsByTagName("channel")[0].getElementsByTagName("description")[0].childNodes[0].data
202 # Atom (NS: http://www.w3.org/2005/Atom)
203 elif dom.documentElement.localName == "feed":
206 self.title = dom.getElementsByTagName("title")[0].childNodes[0].data
207 self.description = dom.getElementsByTagName("subtitle")[0].childNodes[0].data
211 self.type = "unknown"
212 raise NotImplementedError, 'Unsupported Feed: %s' % dom.documentElement.localName
213 self.title = strip(self.title).encode("UTF-8")
214 self.description = strip_readable(self.description).encode("UTF-8")
216 # Re-run function to parse dom
217 return self.gotDom(dom)