add "preview" type update_notification to notifications too instead of just displaying it
[enigma2-plugins.git] / simplerss / src / RSSFeed.py
1 from sets import Set
2 from TagStrip import strip, strip_readable
3 from Components.Scanner import ScanFile
4
5 class BaseFeed:
6         """Base-class for all Feeds. Initializes needed Elements."""
7         MAX_HISTORY_ELEMENTS = 100
8
9         def __init__(self, uri, autoupdate, title = "", description = ""):
10                 # Set URI (used as Identifier)
11                 self.uri = uri
12
13                 # Set Autoupdate
14                 self.autoupdate = autoupdate
15
16                 # Initialize
17                 self.title = title or uri.encode("UTF-8")
18                 self.description = description
19                 self.last_update = None
20                 self.last_ids = set()
21                 self.history = []
22
23 class AtomFeed:
24         """Parses an Atom-Feed into expected format."""
25         def gotDom(self, dom):
26                 try:
27                         # Try to read when feed was last updated, if time equals return empty list. else fetch new items
28                         updated = dom.getElementsByTagName("updated")[0].childNodes[0].data
29                         if self.last_update == updated:
30                                 return [ ]
31                         self.last_update = updated
32                 except:
33                         pass
34                 return AtomFeed.parse(self, dom.getElementsByTagName("entry"))
35
36         def parse(self, items):
37                 idx = 0
38                 for item in items:
39                         enclosure = []
40                         link = ""
41                         
42                         # Try to read title, continue if none found
43                         try:
44                                 title = strip(item.getElementsByTagName("title")[0].childNodes[0].data)
45                         except:
46                                 continue
47
48                         # Try to read id, continue if none found (invalid feed, should be handled differently) or to be excluded
49                         try:
50                                 id = item.getElementsByTagName("id")[0].childNodes[0].data
51                                 if id in self.last_ids:
52                                         continue
53                         except:
54                                 continue
55
56                         # Read out enclosures and link
57                         for current in item.getElementsByTagName("link"):
58                                 # Enclosure
59                                 if current.getAttribute("rel") == "enclosure":
60                                         href = current.getAttribute("href").encode("UTF-8")
61                                         type = current.getAttribute("type").encode("UTF-8")
62                                         if current.hasAttribute("length"):
63                                                 size = int(current.getAttribute("length")) / 1048576
64                                         else:
65                                                 size = None
66
67                                         enclosure.append(ScanFile(href, mimetype = type, size = size, autodetect = False))
68                                 # No Enclosure, assume its a link to the item
69                                 else:
70                                         link = current.getAttribute("href")
71                         
72                         # Try to read summary, empty if none
73                         try:
74                                 summary = strip_readable(item.getElementsByTagName("summary")[0].childNodes[0].data)
75                         except:
76                                 summary = ""
77
78                         # Update Lists
79                         self.history.insert(idx, (
80                                         title.encode("UTF-8"),
81                                         link.encode("UTF-8"),
82                                         summary.encode("UTF-8"),
83                                         enclosure
84                         ))
85                         self.last_ids.add(id)
86                         
87                         idx += 1
88
89                 # Eventually cut history
90                 del self.history[self.MAX_HISTORY_ELEMENTS:]
91
92                 return self.history[:idx]
93
94 class RSSFeed:
95         """Parses an RSS-Feed into expected format."""
96         def gotDom(self, dom):
97                 # Try to read when feed was last updated, if time equals return empty list. else fetch new items
98                 try:
99                         updated = dom.getElementsByTagName("lastBuildDate")[0].childNodes[0].data
100                         if self.last_update == updated:
101                                 return [ ]
102                         self.last_update = updated
103                 except:
104                         pass
105                 return RSSFeed.parse(self, dom.getElementsByTagName("item"))
106
107         def parse(self, items):
108                 idx = 0
109                 for item in items:
110                         enclosure = []
111
112                         # Try to read title, continue if none found
113                         try:
114                                 title = strip(item.getElementsByTagName("title")[0].childNodes[0].data)
115                         except:
116                                 continue
117
118                         # Try to read link, empty if none
119                         try:
120                                 link = item.getElementsByTagName("link")[0].childNodes[0].data
121                         except:
122                                 link = ""
123                         
124                         # Try to read guid, link if none (RSS 1.0 or invalid RSS 2.0)
125                         try:
126                                 guid = item.getElementsByTagName("guid")[0].childNodes[0].data
127                         except:
128                                 guid = link
129
130                         # Continue if item is to be excluded
131                         if guid in self.last_ids:
132                                 continue
133
134                         # Try to read summary (description element), empty if none
135                         try:
136                                 summary = strip_readable(item.getElementsByTagName("description")[0].childNodes[0].data)
137                         except:
138                                 summary = ""
139
140                         # Read out enclosures
141                         for current in item.getElementsByTagName("enclosure"):
142                                 href = current.getAttribute("url").encode("UTF-8")
143                                 type = current.getAttribute("type").encode("UTF-8")
144                                 if current.hasAttribute("length"):
145                                         size = int(current.getAttribute("length")) / 1048576
146                                 else:
147                                         size = None
148
149                                 enclosure.append(ScanFile(href, mimetype = type, size = size, autodetect = False))
150
151                         # Update Lists
152                         self.history.insert(idx, (
153                                         title.encode("UTF-8"),
154                                         link.encode("UTF-8"),
155                                         summary.encode("UTF-8"),
156                                         enclosure
157                         ))
158                         self.last_ids.add(guid)
159
160                         idx += 1
161
162                 # Eventually cut history
163                 del self.history[self.MAX_HISTORY_ELEMENTS:]
164
165                 return self.history[:idx]
166
167 class UniversalFeed(BaseFeed, RSSFeed, AtomFeed):
168         """Universal Feed which on first run determines its type and calls the correct parsing-functions"""
169         def __init__(self, uri, autoupdate):
170                 BaseFeed.__init__(self, uri, autoupdate)
171                 self.type = None
172
173         def gotDom(self, dom):
174                 if self.type == "rss":
175                         return RSSFeed.gotDom(self, dom)
176                 elif self.type == "atom":
177                         return AtomFeed.gotDom(self, dom)
178                 elif self.type is None:
179                         # RSS 2.0 and RSS 1.0 (NS: http://www.w3.org/1999/02/22-rdf-syntax-ns#)
180                         if dom.documentElement.getAttribute("version") in ["2.0", "2.00", "0.94", "0.93", "0.92", "0.91"] \
181                                 or dom.documentElement.localName == "RDF":
182                                 self.type = "rss"
183                                 try:
184                                         channel = dom.getElementsByTagName("channel")[0]
185                                         self.title = channel.getElementsByTagName("title")[0].childNodes[0].data
186                                         self.description = channel.getElementsByTagName("description")[0].childNodes[0].data
187                                 except:
188                                         pass
189                         # Atom (NS: http://www.w3.org/2005/Atom)
190                         elif dom.documentElement.localName == "feed":
191                                 self.type = "atom"
192                                 try:
193                                         self.title = dom.getElementsByTagName("title")[0].childNodes[0].data
194                                         self.description = dom.getElementsByTagName("subtitle")[0].childNodes[0].data
195                                 except:
196                                         pass
197                         else:
198                                 self.type = "unknown"
199                                 raise NotImplementedError, 'Unsupported Feed: %s' % dom.documentElement.localName
200                         self.title = strip(self.title).encode("UTF-8")
201                         self.description = strip_readable(self.description).encode("UTF-8")
202
203                         # Re-run function to parse dom
204                         return self.gotDom(dom)