remove built-in set type instead of the one from sets module
[enigma2-plugins.git] / simplerss / src / RSSFeed.py
1 from TagStrip import strip, strip_readable
2 from Components.Scanner import ScanFile
3
4 NS_RDF = "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}"
5 NS_RSS_09 = "{http://my.netscape.com/rdf/simple/0.9/}"
6 NS_RSS_10 = "{http://purl.org/rss/1.0/}"
7
8 # based on http://effbot.org/zone/element-rss-wrapper.htm
9 class ElementWrapper:
10         def __init__(self, element, ns = ""):
11                 self._element = element
12                 self._ns = ns
13
14         def __getattr__(self, tag):
15                 if tag.startswith("__"):
16                         raise AttributeError(tag)
17                 return self._element.findtext(self._ns + tag)
18
19 class RSSEntryWrapper(ElementWrapper):
20         def __getattr__(self, tag):
21                 if tag == "enclosures":
22                         myl = []
23                         for elem in self._element.findall(self._ns + "enclosure"):
24                                 length = elem.get("length")
25                                 if length:
26                                         length = int(length) / 1048576
27                                 myl.append({
28                                         "href": elem.get("url"),
29                                         "type": elem.get("type"),
30                                         "length": length
31                                         })
32                         return myl
33                 if tag == "id":
34                         possibleId = self._element.findtext(self._ns + "guid")
35                         if not possibleId:
36                                 possibleId = ''.join([self.title, self.link])
37                         return possibleId
38                 if tag == "updated":
39                         tag = "lastBuildDate"
40                 elif tag == "summary":
41                         tag = "description"
42                 return ElementWrapper.__getattr__(self, tag)
43
44 class PEAEntryWrapper(ElementWrapper):
45         def __getattr__(self, tag):
46                 if tag == "link":
47                         for elem in self._element.findall(self._ns + tag):
48                                 if not elem.get("rel") == "enclosure":
49                                         return elem.get("href")
50                         return ""
51                 if tag == "enclosures":
52                         myl = []
53                         for elem in self._element.findall(self._ns + "link"):
54                                 if elem.get("rel") == "enclosure":
55                                         length = elem.get("length")
56                                         if length:
57                                                 length = int(length) / 1048576
58                                         myl.append({
59                                                 "href": elem.get("href"),
60                                                 "type": elem.get("type"),
61                                                 "length": length
62                                                 })
63                         return myl
64                 return ElementWrapper.__getattr__(self, tag)
65
66 class RSSWrapper(ElementWrapper):
67         def __init__(self, channel, items, ns = ""):
68                 self._items = items
69                 ElementWrapper.__init__(self, channel, ns)
70
71         def __iter__(self):
72                 return iter([self[i] for i in range(len(self))])
73
74         def __len__(self):
75                 return len(self._items)
76
77         def __getitem__(self, index):
78                 return RSSEntryWrapper(self._items[index], self._ns)
79
80 class RSS1Wrapper(RSSWrapper):
81         def __init__(self, feed, ns):
82                 RSSWrapper.__init__(
83                         self, feed.find(ns + "channel"),
84                         feed.findall(ns + "item"), ns
85                         )
86
87 class RSS2Wrapper(RSSWrapper):
88         def __init__(self, feed, ns):
89                 channel = feed.find("channel")
90                 RSSWrapper.__init__(
91                         self, channel, channel.findall("item")
92                         )
93
94 class PEAWrapper(RSSWrapper):
95         def __init__(self, feed, ns):
96                 ns = feed.tag[:feed.tag.index("}")+1]
97                 RSSWrapper.__init__(
98                         self, feed, feed.findall(ns + "entry"), ns
99                         )
100
101         def __getitem__(self, index):
102                 return PEAEntryWrapper(self._items[index], self._ns)
103
104         def __getattr__(self, tag):
105                 if tag == "description":
106                         tag = "subtitle"
107                 return ElementWrapper.__getattr__(self, tag)
108
109 class BaseFeed:
110         """Base-class for all Feeds. Initializes needed Elements."""
111         MAX_HISTORY_ELEMENTS = 100
112
113         def __init__(self, uri, title = "", description = ""):
114                 # Set URI (used as Identifier)
115                 self.uri = uri
116
117                 # Initialize
118                 self.title = title or uri.encode("UTF-8")
119                 self.description = description
120                 self.history = []
121
122         def __str__(self):
123                 return "<%s, \"%s\", \"%s\", %d items>" % (self.__class__, self.title, self.description, len(self.history))
124
125 class UniversalFeed(BaseFeed):
126         """Feed which can handle rdf, rss and atom feeds utilizing abstraction wrappers."""
127         def __init__(self, uri, autoupdate):
128                 BaseFeed.__init__(self, uri)
129
130                 # Set Autoupdate
131                 self.autoupdate = autoupdate
132
133                 # Initialize
134                 self.last_update = None
135                 self.last_ids = set()
136                 self.wrapper = None
137                 self.ns = ""
138
139         def gotWrapper(self, wrapper):
140                 updated = wrapper.updated
141                 if updated and self.last_update == updated:
142                         return []
143
144                 idx = 0
145                 for item in wrapper:
146                         enclosures = []
147                         link = ""
148                         
149                         # Try to read title, continue if none found
150                         title = strip(item.title)
151                         if not title:
152                                 continue
153
154                         # Try to read id, continue if none found (invalid feed or internal error) or to be excluded
155                         id = item.id
156                         if not id or id in self.last_ids:
157                                 continue
158
159                         # Link
160                         link = item.link
161
162                         # Read out enclosures and link
163                         for enclosure in item.enclosures:
164                                 enclosures.append(ScanFile(enclosure["href"], mimetype = enclosure["type"], size = enclosure["length"], autodetect = False))
165                         
166                         # Try to read summary, empty if none
167                         summary = strip_readable(item.summary)
168
169                         # Update Lists
170                         self.history.insert(idx, (
171                                         title.encode("UTF-8"),
172                                         link.encode("UTF-8"),
173                                         summary.encode("UTF-8"),
174                                         enclosures
175                         ))
176                         self.last_ids.add(id)
177                         
178                         idx += 1
179
180                 # Eventually cut history
181                 del self.history[self.MAX_HISTORY_ELEMENTS:]
182
183                 return self.history[:idx]
184
185         def gotFeed(self, feed):
186                 if self.wrapper is not None:
187                         wrapper = self.wrapper(feed, self.ns)
188                 else:
189                         if feed.tag == "rss":
190                                 self.wrapper = RSS2Wrapper
191                         elif feed.tag.startswith(NS_RDF):
192                                 self.ns = NS_RDF
193                                 self.wrapper = RSS1Wrapper
194                         elif feed.tag.startswith(NS_RSS_09):
195                                 self.ns = NS_RSS_09
196                                 self.wrapper = RSS1Wrapper
197                         elif feed.tag.startswith(NS_RSS_10):
198                                 self.ns = NS_RSS_10
199                                 self.wrapper = RSS1Wrapper
200                         elif feed.tag.endswith("feed"):
201                                 self.wrapper = PEAWrapper
202                         else:
203                                 raise NotImplementedError, 'Unsupported Feed: %s' % feed.tag
204
205                         wrapper = self.wrapper(feed, self.ns)
206
207                         self.title = strip(wrapper.title).encode("UTF-8")
208                         self.description = strip_readable(wrapper.description or "").encode("UTF-8")
209
210                 return self.gotWrapper(wrapper)
211