Dutch translation by Benny
[enigma2-plugins.git] / simplerss / src / RSSFeed.py
1 from TagStrip import strip, strip_readable
2 from Components.Scanner import ScanFile
3
4 NS_RDF = "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}"
5 NS_RSS_09 = "{http://my.netscape.com/rdf/simple/0.9/}"
6 NS_RSS_10 = "{http://purl.org/rss/1.0/}"
7
8 # based on http://effbot.org/zone/element-rss-wrapper.htm
9 class ElementWrapper:
10         def __init__(self, element, ns = ""):
11                 self._element = element
12                 self._ns = ns
13
14         def __getattr__(self, tag):
15                 if tag.startswith('__'):
16                         raise AttributeError(tag)
17                 return self._element.findtext(self._ns + tag)
18
19 class RSSEntryWrapper(ElementWrapper):
20         def __getattr__(self, tag):
21                 if tag == "enclosures":
22                         myl = []
23                         for elem in self._element.findall(self._ns + 'enclosure'):
24                                 length = elem.get("length")
25                                 if length:
26                                         length = int(length) / 1048576
27                                 myl.append(ScanFile(
28                                         elem.get("url"),
29                                         mimetype = elem.get("type"),
30                                         size = length,
31                                         autodetect = False)
32                                 )
33                         return myl
34                 elif tag == "id":
35                         return self._element.findtext(self._ns + 'guid', self.title + self.link)
36                 elif tag == "updated":
37                         tag = "lastBuildDate"
38                 elif tag == "summary":
39                         tag = "description"
40                 return ElementWrapper.__getattr__(self, tag)
41
42 class PEAEntryWrapper(ElementWrapper):
43         def __getattr__(self, tag):
44                 if tag == "link":
45                         for elem in self._element.findall(self._ns + tag):
46                                 if not elem.get("rel") == "enclosure":
47                                         return elem.get("href")
48                         return ''
49                 elif tag == "enclosures":
50                         myl = []
51                         for elem in self._element.findall(self._ns + 'link'):
52                                 if elem.get("rel") == "enclosure":
53                                         length = elem.get("length")
54                                         if length:
55                                                 length = int(length) / 1048576
56                                         myl.append(ScanFile(
57                                                 elem.get("href"),
58                                                 mimetype = elem.get("type"),
59                                                 size = length,
60                                                 autodetect = False
61                                         ))
62                         return myl
63                 return ElementWrapper.__getattr__(self, tag)
64
65 class RSSWrapper(ElementWrapper):
66         def __init__(self, channel, items, ns = ""):
67                 self._items = items
68                 ElementWrapper.__init__(self, channel, ns)
69
70         def __iter__(self):
71                 self.idx = 0
72                 self.len = len(self)-1
73                 return self
74
75         def next(self):
76                 idx = self.idx
77                 if idx > self.len:
78                         raise StopIteration
79                 self.idx = idx+1
80                 return self[idx]
81
82         def __len__(self):
83                 return len(self._items)
84
85         def __getitem__(self, index):
86                 return RSSEntryWrapper(self._items[index], self._ns)
87
88 class RSS1Wrapper(RSSWrapper):
89         def __init__(self, feed, ns):
90                 RSSWrapper.__init__(
91                         self, feed.find(ns + 'channel'),
92                         feed.findall(ns + 'item'), ns
93                 )
94
95 class RSS2Wrapper(RSSWrapper):
96         def __init__(self, feed, ns):
97                 channel = feed.find("channel")
98                 RSSWrapper.__init__(
99                         self, channel, channel.findall("item")
100                 )
101
102 class PEAWrapper(RSSWrapper):
103         def __init__(self, feed, ns):
104                 ns = feed.tag[:feed.tag.index("}")+1]
105                 RSSWrapper.__init__(
106                         self, feed, feed.findall(ns + 'entry'), ns
107                 )
108
109         def __getitem__(self, index):
110                 return PEAEntryWrapper(self._items[index], self._ns)
111
112         def __getattr__(self, tag):
113                 if tag == "description":
114                         tag = "subtitle"
115                 return ElementWrapper.__getattr__(self, tag)
116
117 class BaseFeed:
118         """Base-class for all Feeds. Initializes needed Elements."""
119         MAX_HISTORY_ELEMENTS = 100
120
121         def __init__(self, uri, title = "", description = ""):
122                 # Set URI (used as Identifier)
123                 self.uri = uri
124
125                 # Initialize
126                 self.title = title or uri.encode("UTF-8")
127                 self.description = description
128                 self.history = []
129
130         def __str__(self):
131                 return "<%s, \"%s\", \"%s\", %d items>" % (self.__class__, self.title, self.description, len(self.history))
132
133 class UniversalFeed(BaseFeed):
134         """Feed which can handle rdf, rss and atom feeds utilizing abstraction wrappers."""
135         def __init__(self, uri, autoupdate):
136                 BaseFeed.__init__(self, uri)
137
138                 # Set Autoupdate
139                 self.autoupdate = autoupdate
140
141                 # Initialize
142                 self.last_update = None
143                 self.last_ids = set()
144                 self.wrapper = None
145                 self.ns = ""
146
147         def gotWrapper(self, wrapper):
148                 updated = wrapper.updated
149                 if updated and self.last_update == updated:
150                         return []
151
152                 idx = 0
153                 ids = self.last_ids
154                 for item in wrapper:
155                         # Try to read title, continue if none found
156                         title = strip(item.title)
157                         if not title:
158                                 continue
159
160                         # Try to read id, continue if none found (invalid feed or internal error) or to be excluded
161                         id = item.id
162                         if not id or id in ids:
163                                 continue
164
165                         # Link
166                         link = item.link
167
168                         # Try to read summary, empty if none
169                         summary = strip_readable(item.summary or "")
170
171                         # Update Lists
172                         self.history.insert(idx, (
173                                         title.encode("UTF-8"),
174                                         link.encode("UTF-8"),
175                                         summary.encode("UTF-8"),
176                                         item.enclosures
177                         ))
178                         ids.add(id)
179
180                         idx += 1
181
182                 # Eventually cut history
183                 del self.history[self.MAX_HISTORY_ELEMENTS:]
184
185                 return self.history[:idx]
186
187         def gotFeed(self, feed):
188                 if self.wrapper is not None:
189                         wrapper = self.wrapper(feed, self.ns)
190                 else:
191                         if feed.tag == "rss":
192                                 self.wrapper = RSS2Wrapper
193                         elif feed.tag.startswith(NS_RDF):
194                                 self.ns = NS_RDF
195                                 self.wrapper = RSS1Wrapper
196                         elif feed.tag.startswith(NS_RSS_09):
197                                 self.ns = NS_RSS_09
198                                 self.wrapper = RSS1Wrapper
199                         elif feed.tag.startswith(NS_RSS_10):
200                                 self.ns = NS_RSS_10
201                                 self.wrapper = RSS1Wrapper
202                         elif feed.tag.endswith("feed"):
203                                 self.wrapper = PEAWrapper
204                         else:
205                                 raise NotImplementedError, 'Unsupported Feed: %s' % feed.tag
206
207                         wrapper = self.wrapper(feed, self.ns)
208
209                         self.title = strip(wrapper.title).encode("UTF-8")
210                         self.description = strip_readable(wrapper.description or "").encode("UTF-8")
211
212                 return self.gotWrapper(wrapper)
213