Add poller to RSSBaseView,
[enigma2-plugins.git] / simplerss / src / RSSFeed.py
1 from sets import Set
2 from TagStrip import strip, strip_readable
3
4 class BaseFeed:
5         """Base-class for all Feeds. Initializes needed Elements."""
6         MAX_HISTORY_ELEMENTS = 100
7
8         def __init__(self, uri, autoupdate):
9                 # Set URI (used as Identifier)
10                 self.uri = uri
11
12                 # Set Autoupdate
13                 self.autoupdate = autoupdate
14
15                 # Initialize
16                 self.title = uri.encode("UTF-8")
17                 self.description = ""
18                 self.last_update = None
19                 self.last_ids = set()
20                 self.history = []
21
22 class AtomFeed(BaseFeed):
23         """Parses an Atom-Feed into expected format."""
24         def gotDom(self, dom):
25                 try:
26                         # Try to read when feed was last updated, if time equals return empty list. else fetch new items
27                         updated = dom.getElementsByTagName("updated")[0].childNodes[0].data
28                         if self.last_update == updated:
29                                 return [ ]
30                         self.last_update = updated
31                 except:
32                         pass
33                 return AtomFeed.parse(self, dom.getElementsByTagName("entry"))
34
35         def parse(self, items):
36                 new_items = []
37                 for item in items:
38                         enclosure = []
39                         link = ""
40                         
41                         # Try to read title, continue if none found
42                         try:
43                                 title = strip(item.getElementsByTagName("title")[0].childNodes[0].data)
44                         except:
45                                 continue
46
47                         # Try to read id, continue if none found (invalid feed, should be handled differently) or to be excluded
48                         try:
49                                 id = item.getElementsByTagName("id")[0].childNodes[0].data
50                                 if id in self.last_ids:
51                                         continue
52                         except:
53                                 continue
54
55                         # Read out enclosures and link
56                         for current in item.getElementsByTagName("link"):
57                                 # Enclosure
58                                 if current.getAttribute("rel") == "enclosure":
59                                         href = current.getAttribute("href").encode("UTF-8")
60                                         type = current.getAttribute("type").encode("UTF-8")
61                                         enclosure.append((href, type))
62                                 # No Enclosure, assume its a link to the item
63                                 else:
64                                         link = current.getAttribute("href")
65                         
66                         # Try to read summary, empty if none
67                         try:
68                                 summary = strip_readable(item.getElementsByTagName("summary")[0].childNodes[0].data)
69                         except:
70                                 summary = ""
71
72                         # Update Lists
73                         new_items.append((
74                                         title.encode("UTF-8"),
75                                         link.encode("UTF-8"),
76                                         summary.encode("UTF-8"),
77                                         enclosure
78                         ))
79                         self.last_ids.add(id)
80
81                  # Append known Items to new Items and eventually cut it
82                 self.history = new_items + self.history
83                 self.history[:self.MAX_HISTORY_ELEMENTS]
84
85                 return new_items
86
87 class RSSFeed(BaseFeed):
88         """Parses an RSS-Feed into expected format."""
89         def gotDom(self, dom):
90                 # Try to read when feed was last updated, if time equals return empty list. else fetch new items
91                 try:
92                         updated = dom.getElementsByTagName("lastBuildDate")[0].childNodes[0].data
93                         if self.last_update == updated:
94                                 return [ ]
95                         self.last_update = updated
96                 except:
97                         pass
98                 return RSSFeed.parse(self, dom.getElementsByTagName("item"))
99
100         def parse(self, items):
101                 new_items = []
102                 for item in items:
103                         enclosure = []
104
105                         # Try to read title, continue if none found
106                         try:
107                                 title = strip(item.getElementsByTagName("title")[0].childNodes[0].data)
108                         except:
109                                 continue
110
111                         # Try to read link, empty if none
112                         try:
113                                 link = item.getElementsByTagName("link")[0].childNodes[0].data
114                         except:
115                                 link = ""
116                         
117                         # Try to read guid, link if none (RSS 1.0 or invalid RSS 2.0)
118                         try:
119                                 guid = item.getElementsByTagName("guid")[0].childNodes[0].data
120                         except:
121                                 guid = link
122
123                         # Continue if item is to be excluded
124                         if guid in self.last_ids:
125                                 continue
126
127                         # Try to read summary (description element), empty if none
128                         try:
129                                 summary = strip_readable(item.getElementsByTagName("description")[0].childNodes[0].data)
130                         except:
131                                 summary = ""
132
133                         # Read out enclosures
134                         for current in item.getElementsByTagName("enclosure"):
135                                 href = current.getAttribute("url").encode("UTF-8")
136                                 type = current.getAttribute("type").encode("UTF-8")
137                                 enclosure.append((href, type))
138
139                         # Update Lists
140                         new_items.append((
141                                         title.encode("UTF-8"),
142                                         link.encode("UTF-8"),
143                                         summary.encode("UTF-8"),
144                                         enclosure
145                         ))
146                         
147                         self.last_ids.add(guid)
148
149                 # Append known Items to new Items and eventually cut it
150                 self.history = new_items + self.history
151                 self.history[:self.MAX_HISTORY_ELEMENTS]
152
153                 return new_items
154
155 class UniversalFeed(BaseFeed, RSSFeed, AtomFeed):
156         """Universal Feed which on first run determines its type and calls the correct parsing-functions"""
157         def __init__(self, uri, autoupdate):
158                 BaseFeed.__init__(self, uri, autoupdate)
159                 self.type = None
160
161         def gotDom(self, dom):
162                 if self.type is None:
163                         # RSS 2.0
164                         if dom.documentElement.getAttribute("version") in ["2.0", "0.94", "0.93", "0.92", "0.91"]:
165                                 self.type = "rss"
166                                 try:
167                                         self.title = dom.getElementsByTagName("channel")[0].getElementsByTagName("title")[0].childNodes[0].data
168                                         self.description = dom.getElementsByTagName("channel")[0].getElementsByTagName("description")[0].childNodes[0].data
169                                 except:
170                                         pass
171                         # RSS 1.0 (NS: http://www.w3.org/1999/02/22-rdf-syntax-ns#)
172                         elif dom.documentElement.localName == "RDF":
173                                 self.type = "rss"
174                                 try:
175                                         self.title = dom.getElementsByTagName("channel")[0].getElementsByTagName("title")[0].childNodes[0].data
176                                         self.description = dom.getElementsByTagName("channel")[0].getElementsByTagName("description")[0].childNodes[0].data
177                                 except:
178                                         pass
179                         # Atom (NS: http://www.w3.org/2005/Atom)
180                         elif dom.documentElement.localName == "feed":
181                                 self.type = "atom"
182                                 try:
183                                         self.title = dom.getElementsByTagName("title")[0].childNodes[0].data
184                                         self.description = dom.getElementsByTagName("subtitle")[0].childNodes[0].data
185                                 except:
186                                         pass
187                         else:
188                                 raise NotImplementedError, 'Unsupported Feed: %s' % dom.documentElement.localName
189                         self.title = strip(self.title).encode("UTF-8")
190                         self.description = strip_readable(self.description).encode("UTF-8")
191                 if self.type == "rss":
192                         print "[SimpleRSS] type is rss"
193                         return RSSFeed.gotDom(self, dom)
194                 elif self.type == "atom":
195                         print "[SimpleRSS] type is atom"
196                         return AtomFeed.gotDom(self, dom)