use new async picture ePicLoad facilities and save temporary downloads to /tmp rather...
[enigma2-plugins.git] / simplerss / src / RSSFeed.py
1 from sets import Set
2 from TagStrip import strip, strip_readable
3 from Components.Scanner import ScanFile
4
5 NS_RDF = "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}"
6 NS_RSS_09 = "{http://my.netscape.com/rdf/simple/0.9/}"
7 NS_RSS_10 = "{http://purl.org/rss/1.0/}"
8
9 # based on http://effbot.org/zone/element-rss-wrapper.htm
10 class ElementWrapper:
11         def __init__(self, element, ns = ""):
12                 self._element = element
13                 self._ns = ns
14
15         def __getattr__(self, tag):
16                 if tag.startswith("__"):
17                         raise AttributeError(tag)
18                 return self._element.findtext(self._ns + tag)
19
20 class RSSEntryWrapper(ElementWrapper):
21         def __getattr__(self, tag):
22                 if tag == "enclosures":
23                         myl = []
24                         for elem in self._element.findall(self._ns + "enclosure"):
25                                 length = elem.get("length")
26                                 if length:
27                                         length = int(length) / 1048576
28                                 myl.append({
29                                         "href": elem.get("url"),
30                                         "type": elem.get("type"),
31                                         "length": length
32                                         })
33                         return myl
34                 if tag == "id":
35                         possibleId = self._element.findtext(self._ns + "guid")
36                         if not possibleId:
37                                 possibleId = ''.join([self.title, self.link])
38                         return possibleId
39                 if tag == "updated":
40                         tag = "lastBuildDate"
41                 elif tag == "summary":
42                         tag = "description"
43                 return ElementWrapper.__getattr__(self, tag)
44
45 class PEAEntryWrapper(ElementWrapper):
46         def __getattr__(self, tag):
47                 if tag == "link":
48                         for elem in self._element.findall(self._ns + tag):
49                                 if not elem.get("rel") == "enclosure":
50                                         return elem.get("href")
51                         return ""
52                 if tag == "enclosures":
53                         myl = []
54                         for elem in self._element.findall(self._ns + "link"):
55                                 if elem.get("rel") == "enclosure":
56                                         length = elem.get("length")
57                                         if length:
58                                                 length = int(length) / 1048576
59                                         myl.append({
60                                                 "href": elem.get("href"),
61                                                 "type": elem.get("type"),
62                                                 "length": length
63                                                 })
64                         return myl
65                 return ElementWrapper.__getattr__(self, tag)
66
67 class RSSWrapper(ElementWrapper):
68         def __init__(self, channel, items, ns = ""):
69                 self._items = items
70                 ElementWrapper.__init__(self, channel, ns)
71
72         def __iter__(self):
73                 return iter([self[i] for i in range(len(self))])
74
75         def __len__(self):
76                 return len(self._items)
77
78         def __getitem__(self, index):
79                 return RSSEntryWrapper(self._items[index], self._ns)
80
81 class RSS1Wrapper(RSSWrapper):
82         def __init__(self, feed, ns):
83                 RSSWrapper.__init__(
84                         self, feed.find(ns + "channel"),
85                         feed.findall(ns + "item"), ns
86                         )
87
88 class RSS2Wrapper(RSSWrapper):
89         def __init__(self, feed, ns):
90                 channel = feed.find("channel")
91                 RSSWrapper.__init__(
92                         self, channel, channel.findall("item")
93                         )
94
95 class PEAWrapper(RSSWrapper):
96         def __init__(self, feed, ns):
97                 ns = feed.tag[:feed.tag.index("}")+1]
98                 RSSWrapper.__init__(
99                         self, feed, feed.findall(ns + "entry"), ns
100                         )
101
102         def __getitem__(self, index):
103                 return PEAEntryWrapper(self._items[index], self._ns)
104
105         def __getattr__(self, tag):
106                 if tag == "description":
107                         tag = "subtitle"
108                 return ElementWrapper.__getattr__(self, tag)
109
110 class BaseFeed:
111         """Base-class for all Feeds. Initializes needed Elements."""
112         MAX_HISTORY_ELEMENTS = 100
113
114         def __init__(self, uri, title = "", description = ""):
115                 # Set URI (used as Identifier)
116                 self.uri = uri
117
118                 # Initialize
119                 self.title = title or uri.encode("UTF-8")
120                 self.description = description
121                 self.history = []
122
123         def __str__(self):
124                 return "<%s, \"%s\", \"%s\", %d items>" % (self.__class__, self.title, self.description, len(self.history))
125
126 class UniversalFeed(BaseFeed):
127         """Feed which can handle rdf, rss and atom feeds utilizing abstraction wrappers."""
128         def __init__(self, uri, autoupdate):
129                 BaseFeed.__init__(self, uri)
130
131                 # Set Autoupdate
132                 self.autoupdate = autoupdate
133
134                 # Initialize
135                 self.last_update = None
136                 self.last_ids = set()
137                 self.wrapper = None
138                 self.ns = ""
139
140         def gotWrapper(self, wrapper):
141                 updated = wrapper.updated
142                 if updated and self.last_update == updated:
143                         return []
144
145                 idx = 0
146                 for item in wrapper:
147                         enclosures = []
148                         link = ""
149                         
150                         # Try to read title, continue if none found
151                         title = strip(item.title)
152                         if not title:
153                                 continue
154
155                         # Try to read id, continue if none found (invalid feed or internal error) or to be excluded
156                         id = item.id
157                         if not id or id in self.last_ids:
158                                 continue
159
160                         # Link
161                         link = item.link
162
163                         # Read out enclosures and link
164                         for enclosure in item.enclosures:
165                                 enclosures.append(ScanFile(enclosure["href"], mimetype = enclosure["type"], size = enclosure["length"], autodetect = False))
166                         
167                         # Try to read summary, empty if none
168                         summary = strip_readable(item.summary)
169
170                         # Update Lists
171                         self.history.insert(idx, (
172                                         title.encode("UTF-8"),
173                                         link.encode("UTF-8"),
174                                         summary.encode("UTF-8"),
175                                         enclosures
176                         ))
177                         self.last_ids.add(id)
178                         
179                         idx += 1
180
181                 # Eventually cut history
182                 del self.history[self.MAX_HISTORY_ELEMENTS:]
183
184                 return self.history[:idx]
185
186         def gotFeed(self, feed):
187                 if self.wrapper is not None:
188                         wrapper = self.wrapper(feed, self.ns)
189                 else:
190                         if feed.tag == "rss":
191                                 self.wrapper = RSS2Wrapper
192                         elif feed.tag.startswith(NS_RDF):
193                                 self.ns = NS_RDF
194                                 self.wrapper = RSS1Wrapper
195                         elif feed.tag.startswith(NS_RSS_09):
196                                 self.ns = NS_RSS_09
197                                 self.wrapper = RSS1Wrapper
198                         elif feed.tag.startswith(NS_RSS_10):
199                                 self.ns = NS_RSS_10
200                                 self.wrapper = RSS1Wrapper
201                         elif feed.tag.endswith("feed"):
202                                 self.wrapper = PEAWrapper
203                         else:
204                                 raise NotImplementedError, 'Unsupported Feed: %s' % feed.tag
205
206                         wrapper = self.wrapper(feed, self.ns)
207
208                         self.title = strip(wrapper.title).encode("UTF-8")
209                         self.description = strip_readable(wrapper.description or "").encode("UTF-8")
210
211                 return self.gotWrapper(wrapper)
212