initial checkin of MerlinSkinThemes
[enigma2-plugins.git] / simplerss / src / RSSFeed.py
1 from Plugins.SystemPlugins.Toolkit.TagStrip import strip, strip_readable
2 from Components.Scanner import ScanFile
3
4 NS_RDF = "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}"
5 NS_RSS_09 = "{http://my.netscape.com/rdf/simple/0.9/}"
6 NS_RSS_10 = "{http://purl.org/rss/1.0/}"
7
8 # based on http://effbot.org/zone/element-rss-wrapper.htm
9 class ElementWrapper:
10         def __init__(self, element, ns = ""):
11                 self._element = element
12                 self._ns = ns
13
14         def __getattr__(self, tag):
15                 if tag.startswith('__'):
16                         raise AttributeError(tag)
17                 return self._element.findtext(self._ns + tag)
18
19 class RSSEntryWrapper(ElementWrapper):
20         def __getattr__(self, tag):
21                 if tag == "enclosures":
22                         myl = []
23                         for elem in self._element.findall(self._ns + 'enclosure'):
24                                 length = elem.get("length")
25                                 if length:
26                                         length = int(length) / 1048576
27                                 myl.append(ScanFile(
28                                         elem.get("url"),
29                                         mimetype = elem.get("type"),
30                                         size = length,
31                                         autodetect = False)
32                                 )
33                         return myl
34                 elif tag == "id":
35                         return self._element.findtext(self._ns + 'guid', self.title + self.link)
36                 elif tag == "updated":
37                         tag = "lastBuildDate"
38                 elif tag == "summary":
39                         tag = "description"
40                 return ElementWrapper.__getattr__(self, tag)
41
42 class PEAEntryWrapper(ElementWrapper):
43         def __getattr__(self, tag):
44                 if tag == "link":
45                         for elem in self._element.findall(self._ns + tag):
46                                 if not elem.get("rel") == "enclosure":
47                                         return elem.get("href")
48                         return ''
49                 elif tag == "enclosures":
50                         myl = []
51                         for elem in self._element.findall(self._ns + 'link'):
52                                 if elem.get("rel") == "enclosure":
53                                         length = elem.get("length")
54                                         if length:
55                                                 length = int(length) / 1048576
56                                         myl.append(ScanFile(
57                                                 elem.get("href"),
58                                                 mimetype = elem.get("type"),
59                                                 size = length,
60                                                 autodetect = False
61                                         ))
62                         return myl
63                 elif tag == "summary":
64                         text = self._element.findtext(self._ns + 'summary')
65                         if not text:
66                                 # NOTE: if we don't have a summary we use the full content instead
67                                 elem = self._element.find(self._ns + 'content')
68                                 if elem is not None and elem.get('type') == "html":
69                                         text = elem.text
70                         return text
71
72                 return ElementWrapper.__getattr__(self, tag)
73
74 class RSSWrapper(ElementWrapper):
75         def __init__(self, channel, items, ns = ""):
76                 self._items = items
77                 ElementWrapper.__init__(self, channel, ns)
78
79         def __iter__(self):
80                 self.idx = 0
81                 self.len = len(self)-1
82                 return self
83
84         def __next__(self):
85                 return self.next()
86
87         def next(self):
88                 idx = self.idx
89                 if idx > self.len:
90                         raise StopIteration
91                 self.idx = idx+1
92                 return self[idx]
93
94         def __len__(self):
95                 return len(self._items)
96
97         def __getitem__(self, index):
98                 return RSSEntryWrapper(self._items[index], self._ns)
99
100 class RSS1Wrapper(RSSWrapper):
101         def __init__(self, feed, ns):
102                 RSSWrapper.__init__(
103                         self, feed.find(ns + 'channel'),
104                         feed.findall(ns + 'item'), ns
105                 )
106
107         def __getattr__(self, tag):
108                 if tag == 'logo': # XXX: afaik not officially part of older rss, but can't hurt
109                         tag = 'image'
110                 return ElementWrapper.__getattr__(self, tag)
111
112 class RSS2Wrapper(RSSWrapper):
113         def __init__(self, feed, ns):
114                 channel = feed.find("channel")
115                 RSSWrapper.__init__(
116                         self, channel, channel.findall("item")
117                 )
118
119         def __getattr__(self, tag):
120                 if tag == 'logo':
121                         tag = 'image'
122                 return ElementWrapper.__getattr__(self, tag)
123
124 class PEAWrapper(RSSWrapper):
125         def __init__(self, feed, ns):
126                 ns = feed.tag[:feed.tag.index("}")+1]
127                 RSSWrapper.__init__(
128                         self, feed, feed.findall(ns + 'entry'), ns
129                 )
130
131         def __getitem__(self, index):
132                 return PEAEntryWrapper(self._items[index], self._ns)
133
134         def __getattr__(self, tag):
135                 if tag == "description":
136                         tag = "subtitle"
137                 return ElementWrapper.__getattr__(self, tag)
138
139 class BaseFeed:
140         """Base-class for all Feeds. Initializes needed Elements."""
141         MAX_HISTORY_ELEMENTS = 100
142
143         def __init__(self, uri, title = "", description = ""):
144                 # Set URI (used as Identifier)
145                 self.uri = uri
146
147                 # Initialize
148                 self.title = title or uri.encode("UTF-8")
149                 self.description = description
150                 self.logoUrl = ''
151                 self.history = []
152
153         def __str__(self):
154                 return "<%s, \"%s\", \"%s\", %d items>" % (self.__class__, self.title, self.description, len(self.history))
155
156 class UniversalFeed(BaseFeed):
157         """Feed which can handle rdf, rss and atom feeds utilizing abstraction wrappers."""
158         def __init__(self, uri, autoupdate, sync = False):
159                 BaseFeed.__init__(self, uri)
160
161                 # Set Autoupdate
162                 self.autoupdate = autoupdate
163                 
164                 # Is this a synced feed?
165                 self.sync = sync
166
167                 # Initialize
168                 self.last_update = None
169                 self.last_ids = set()
170                 self.wrapper = None
171                 self.ns = ""
172
173         def gotWrapper(self, wrapper):
174                 updated = wrapper.updated
175                 if updated and self.last_update == updated:
176                         return []
177
178                 idx = 0
179                 ids = self.last_ids
180                 for item in wrapper:
181                         # Try to read title, continue if none found
182                         title = strip(item.title)
183                         if not title:
184                                 continue
185
186                         # Try to read id, continue if none found (invalid feed or internal error) or to be excluded
187                         id = item.id
188                         if not id or id in ids:
189                                 continue
190
191                         # Link
192                         link = item.link
193
194                         # Try to read summary, empty if none
195                         summary = strip_readable(item.summary or "")
196
197                         # Update Lists
198                         self.history.insert(idx, (
199                                         title.encode("UTF-8"),
200                                         link.encode("UTF-8"),
201                                         summary.encode("UTF-8"),
202                                         item.enclosures
203                         ))
204                         ids.add(id)
205
206                         idx += 1
207
208                 # Eventually cut history
209                 del self.history[self.MAX_HISTORY_ELEMENTS:]
210
211                 return self.history[:idx]
212
213         def gotFeed(self, feed):
214                 if self.wrapper is not None:
215                         wrapper = self.wrapper(feed, self.ns)
216                 else:
217                         if feed.tag == "rss":
218                                 self.wrapper = RSS2Wrapper
219                         elif feed.tag.startswith(NS_RDF):
220                                 self.ns = NS_RDF
221                                 self.wrapper = RSS1Wrapper
222                         elif feed.tag.startswith(NS_RSS_09):
223                                 self.ns = NS_RSS_09
224                                 self.wrapper = RSS1Wrapper
225                         elif feed.tag.startswith(NS_RSS_10):
226                                 self.ns = NS_RSS_10
227                                 self.wrapper = RSS1Wrapper
228                         elif feed.tag.endswith("feed"):
229                                 self.wrapper = PEAWrapper
230                         else:
231                                 raise NotImplementedError('Unsupported Feed: %s' % feed.tag)
232
233                         wrapper = self.wrapper(feed, self.ns)
234
235                         self.title = strip(wrapper.title).encode("UTF-8")
236                         self.description = strip_readable(wrapper.description or "").encode("UTF-8")
237                         self.logoUrl = wrapper.logo
238
239                 return self.gotWrapper(wrapper)
240