plugin.py split up,\nfeed overview and single feed view in different screens,\nyellow...
[enigma2-plugins.git] / simplerss / src / Feed.py
1 from sets import Set
2
3 class Feed:
4         MAX_HISTORY_ELEMENTS = 100
5
6         def __init__(self, uri, autoupdate):
7                 self.uri = uri
8                 self.autoupdate = autoupdate
9                 self.type = None
10                 self.title = uri.encode("UTF-8")
11                 self.description = ""
12                 self.last_update = None
13                 self.last_ids = set()
14                 self.history = []
15
16         def gotDom(self, dom):
17                 if self.type is None:
18                         # RSS 2.0
19                         if dom.documentElement.getAttribute("version") in ["2.0", "0.94", "0.93", "0.92", "0.91"]:
20                                 self.type = "rss"
21                                 try:
22                                         self.title = dom.getElementsByTagName("channel")[0].getElementsByTagName("title")[0].childNodes[0].data.encode("UTF-8")
23                                         self.description = dom.getElementsByTagName("channel")[0].getElementsByTagName("description")[0].childNodes[0].data.encode("UTF-8")
24                                 except:
25                                         pass
26                         # RSS 1.0 (NS: http://www.w3.org/1999/02/22-rdf-syntax-ns#)
27                         elif dom.documentElement.localName == "RDF":
28                                 self.type = "rss"
29                                 try:
30                                         self.title = dom.getElementsByTagName("channel")[0].getElementsByTagName("title")[0].childNodes[0].data.encode("UTF-8")
31                                         self.description = dom.getElementsByTagName("channel")[0].getElementsByTagName("description")[0].childNodes[0].data.encode("UTF-8")
32                                 except:
33                                         pass
34                         # Atom (NS: http://www.w3.org/2005/Atom)
35                         elif dom.documentElement.localName == "feed":
36                                 self.type = "atom"
37                                 try:
38                                         self.title = dom.getElementsByTagName("title")[0].childNodes[0].data.encode("UTF-8")
39                                         self.description = dom.getElementsByTagName("subtitle")[0].childNodes[0].data.encode("UTF-8")
40                                 except:
41                                         pass
42                         else:
43                                 raise NotImplementedError, 'Unsupported Feed: %s' % dom.documentElement.localName
44                 if self.type == "rss":
45                         print "[SimpleRSS] type is rss"
46                         return self.gotRSSDom(dom)
47                 elif self.type == "atom":
48                         print "[SimpleRSS] type is atom"
49                         return self.gotAtomDom(dom)
50
51         def gotRSSDom(self, dom):
52                 # Try to read when feed was last updated, if time equals return empty list. else fetch new items
53                 try:
54                         updated = dom.getElementsByTagName("lastBuildDate")[0].childNodes[0].data
55                         if not self.last_update == updated:
56                                 self.last_update = updated
57                                 return self.parseRSS(dom.getElementsByTagName("item"))
58                         else:
59                                 return [ ]
60                 except:
61                         return self.parseRSS(dom.getElementsByTagName("item"))
62
63         def parseRSS(self, items):
64                 new_items = []
65                 for item in items:
66                         enclosure = []
67
68                         # Try to read title, continue if none found
69                         try:
70                                 title = item.getElementsByTagName("title")[0].childNodes[0].data
71                         except:
72                                 continue
73
74                         # Try to read link, empty if none
75                         try:
76                                 link = item.getElementsByTagName("link")[0].childNodes[0].data
77                         except:
78                                 link = ""
79                         
80                         # Try to read guid, link if none (RSS 1.0 or invalid RSS 2.0)
81                         try:
82                                 guid = item.getElementsByTagName("guid")[0].childNodes[0].data
83                         except:
84                                 guid = link
85
86                         # Continue if item is to be excluded
87                         if guid in self.last_ids:
88                                 continue
89
90                         # Try to read summary (description element), empty if none
91                         try:
92                                 summary = item.getElementsByTagName("description")[0].childNodes[0].data
93                         except:
94                                 summary = ""
95
96                         # Read out enclosures
97                         for current in item.getElementsByTagName("enclosure"):
98                                 enclosure.append((current.getAttribute("url").encode("UTF-8"), current.getAttribute("type").encode("UTF-8")))
99
100                         # Update Lists
101                         new_items.append((title.encode("UTF-8").strip(), link.encode("UTF-8").strip(), summary.encode("UTF-8").strip(), enclosure))
102                         self.last_ids.add(guid)
103
104                 # Append known Items to new Items and evenentually cut it
105                 self.history = new_items + self.history
106                 self.history[:self.MAX_HISTORY_ELEMENTS]
107                 
108                 return new_items
109
110         def gotAtomDom(self, dom):
111                 try:
112                         # Try to read when feed was last updated, if time equals return empty list. else fetch new items
113                         updated = dom.getElementsByTagName("updated")[0].childNodes[0].data
114                         if not self.last_update == updated:
115                                 self.last_update = updated
116                                 return self.parseAtom(dom.getElementsByTagName("entry"))
117                         else:
118                                 return [ ]
119                 except:
120                         return self.parseAtom(dom.getElementsByTagName("entry"))
121
122         def parseAtom(self, items):
123                 new_items = []
124                 for item in items:
125                         enclosure = []
126                         link = ""
127                         
128                         # Try to read title, continue if none found
129                         try:
130                                 title = item.getElementsByTagName("title")[0].childNodes[0].data
131                         except:
132                                 continue
133
134                         # Try to read id, continue if none found (invalid feed, should be handled differently) or to be excluded
135                         try:
136                                 id = item.getElementsByTagName("id")[0].childNodes[0].data
137                                 if id in self.last_ids:
138                                         continue
139                         except:
140                                 continue
141
142                         # Read out enclosures and link
143                         for current in item.getElementsByTagName("link"):
144                                 # Enclosure
145                                 if current.getAttribute("rel") == "enclosure":
146                                         enclosure.append((current.getAttribute("href").encode("UTF-8"), current.getAttribute("type").encode("UTF-8")))
147                                 # No Enclosure, assume its a link to the item
148                                 else:
149                                         link = current.getAttribute("href")
150                         
151                         # Try to read summary, empty if none
152                         try:
153                                 summary = item.getElementsByTagName("summary")[0].childNodes[0].data
154                         except:
155                                 summary = ""
156
157                         # Update Lists
158                         new_items.append((title.encode("UTF-8").strip(), link.encode("UTF-8").strip(), summary.encode("UTF-8").strip(), enclosure))
159                         self.last_ids.add(id)
160
161                  # Append known Items to new Items and evenentually cut it
162                 self.history = new_items + self.history
163                 self.history[:self.MAX_HISTORY_ELEMENTS]
164
165                 return new_items