SeriesPlugin: 1.5.8 Fixed encoding, popups, channel list integration
[enigma2-plugins.git] / seriesplugin / src / Identifiers / WunschlisteFeed.py
1 # -*- coding: utf-8 -*-
2 # by betonme @2012
3
4 # Imports
5 from Components.config import config
6
7 from Tools.BoundFunction import boundFunction
8
9 from urllib import urlencode
10
11 from HTMLParser import HTMLParser
12
13 from datetime import datetime
14
15 import re
16 from sys import maxint
17
18 # Internal
19 from Plugins.Extensions.SeriesPlugin.IdentifierBase import IdentifierBase
20 from Plugins.Extensions.SeriesPlugin.Channels import compareChannels
21 from Plugins.Extensions.SeriesPlugin.Logger import splog
22
23 from iso8601 import parse_date
24
25 import codecs
26 utf8_encoder = codecs.getencoder("utf-8")
27
28
29 # Constants
30 SERIESLISTURL     = "http://www.wunschliste.de/ajax/search_dropdown.pl?"
31 EPISODEIDURLATOM  = "http://www.wunschliste.de/xml/atom.pl?"
32 #EPISODEIDURLRSS  = "http://www.wunschliste.de/xml/rss.pl?"
33
34
35 # Series: EpisodeTitle (Season.Episode) - Weekday Date, Time / Channel (Country)
36 # Two and a Half Men: Der Mittwochs-Mann (1.5) - Mi 02.05., 19.50:00 Uhr / TNT Serie (Pay-TV)
37 # Two and a Half Men: Der Mittwochs-Mann (1.5) - Mi 02.05., 19.50:00 Uhr / TNT Serie
38 # Two and a Half Men: Der Mittwochs-Mann (1) (1.5) - Mi 02.05., 19.50:00 Uhr / TNT Serie
39 # Der Troedeltrupp - Das Geld liegt im Keller: Folge 109 (109) - Do 03.05., 16.15:00 Uhr / RTL II
40 # Galileo: U.a.: Die schaerfste Chili der Welt - Fr 04.05., 19.05:00 Uhr / ProSieben
41 # Galileo: Magazin mit Aiman Abdallah, BRD 2012 - Mi 09.05., 06.10:00 Uhr / ProSieben
42 # Gute Zeiten, schlechte Zeiten: Folgen 4985 - 4988 (21.84) - Sa 05.05., 11.00:00 Uhr / RTL
43 # Channel is between last / and ( or line end
44 CompiledRegexpAtomChannel = re.compile('\/(?!.*\/) ([^\(]+)')
45 # Date is between last - and channel
46 CompiledRegexpAtomDate = re.compile('-(?!.*-) (.+)')
47 # Find optional episode
48 CompiledRegexpAtomEpisode = re.compile('\((?!.*\()(.+)\) ')
49 # Series: Title
50 CompiledRegexpAtomTitle = re.compile('.+: (.+)')
51
52 # (Season.Episode) - EpisodeTitle
53 # (21.84) Folge 4985
54 # (105) Folge 105
55 # (4.11/4.11) Mama ist die Beste/Rund um die Uhr
56 # Galileo: Die schaerfste Chili der Welt
57 # Galileo: Jumbo auf Achse: Muelltonnenkoch
58 # Gute Zeiten, schlechte Zeiten: Folgen 4985 - 4988 (21.84) - Sa 05.05., 11.00:00 Uhr / RTL
59 #CompiledRegexpPrintTitle = re.compile( '(\(.*\) )?(.+)')
60
61 CompiledRegexpEpisode = re.compile( '((\d+)[\.x])?(\d+)')
62
63
64 def str_to_utf8(s):
65         # Convert a byte string with unicode escaped characters
66         splog("WL: str_to_utf8: s: ", repr(s))
67         unicode_str = s.decode('unicode-escape')
68         splog("WL: str_to_utf8: s: ", repr(unicode_str))
69         # Python 2.x can't convert the special chars nativly
70         utf8_str = utf8_encoder(unicode_str)[0]
71         splog("WL: str_to_utf8: s: ", repr(utf8_str))
72         return utf8_str
73
74
75 class WLAtomParser(HTMLParser):
76         def __init__(self):
77                 HTMLParser.__init__(self)
78                 self.title = False
79                 self.updated = False
80                 self.titlestr = ''
81                 self.updatedstr = ''
82                 self.list = []
83
84         def handle_starttag(self, tag, attributes):
85                 if tag == 'title':
86                         self.title = True
87                 elif tag == 'updated':
88                         self.updated = True
89
90         def handle_endtag(self, tag):
91                 if tag == 'title':
92                         self.title = False
93                 elif tag == 'updated':
94                         self.updated = False
95                 elif tag == 'entry':
96                         self.list.append( (self.titlestr, self.updatedstr) )
97                         self.titlestr = ''
98                         self.updatedstr = ''
99
100         def handle_data(self, data):
101                 if self.title:
102                         self.titlestr += data
103                 elif self.updated:
104                         self.updatedstr = data
105
106
107 class WunschlisteFeed(IdentifierBase):
108         def __init__(self):
109                 IdentifierBase.__init__(self)
110
111         @classmethod
112         def knowsToday(cls):
113                 return False
114
115         @classmethod
116         def knowsFuture(cls):
117                 return True
118
119         def getEpisode(self, name, begin, end=None, service=None, channels=[]):
120                 # On Success: Return a single season, episode, title tuple
121                 # On Failure: Return a empty list or String or None
122                 
123                 self.begin = begin
124                 self.end = end
125                 self.service = service
126                 self.channels = channels
127                 
128                 self.knownids = []
129                 self.returnvalue = None
130                 
131                 # Check preconditions
132                 if not name:
133                         splog(_("Skip Wunschliste: No show name specified"))
134                         return _("Skip Wunschliste: No show name specified")
135                 if not begin:
136                         splog(_("Skip Wunschliste: No begin timestamp specified"))
137                         return _("Skip Wunschliste: No begin timestamp specified")
138                 
139                 splog("WunschlisteFeed getEpisode")
140                 
141                 while name:     
142                         ids = self.getSeries(name)
143                         
144                         while ids:
145                                 idserie = ids.pop()
146                                 
147                                 if idserie and len(idserie) == 2:
148                                         id, idname = idserie
149                                         
150                                         # Handle encodings
151                                         self.series = str_to_utf8(idname)
152                                         
153                                         result = self.getNextPage( id )
154                                         if result:
155                                                 return result
156                                         
157                         else:
158                                 name = self.getAlternativeSeries(name)
159                 
160                 else:
161                         return ( self.returnvalue or _("No matching series found") )
162
163         def getSeries(self, name):
164                 #url = SERIESLISTURL + urlencode({ 'q' : re.sub("[^a-zA-Z0-9-*]", " ", name) })
165                 url = SERIESLISTURL + urlencode({ 'q' : name })
166                 data = self.getPage( url )
167                 
168                 if data and isinstance(data, basestring):
169                         data = self.parseSeries(data)
170                         self.doCache(url, data)
171                 
172                 if data and isinstance(data, list):
173                         splog("WunschlisteFeed ids", data)
174                         return self.filterKnownIds(data)
175
176         def parseSeries(self, data):
177                 serieslist = []
178                 for line in data.splitlines():
179                         values = line.split("|")
180                         if len(values) == 4:
181                                 idname, countryyear, id, temp = values
182                                 splog(id, idname)
183                                 serieslist.append( (id, idname) )
184                         else:
185                                 splog("WunschlisteFeed: ParseError: " + str(line))
186                 serieslist.reverse()
187                 return serieslist
188
189         def parseNextPage(self, data):
190                 # Handle malformed HTML issues
191                 data = data.replace('&','&')  # target=\"_blank\"&
192                 parser = WLAtomParser()
193                 parser.feed(data)
194                 #splog(parser.list)
195                 return parser.list
196         
197         def getNextPage(self, id):
198                 splog("WunschlisteFeed getNextPage")
199                 
200                 url = EPISODEIDURLATOM + urlencode({ 's' : id })
201                 data = self.getPage( url )
202                 
203                 if data and isinstance(data, basestring):
204                         data = self.parseNextPage(data)
205                         self.doCache(url, data)
206                 
207                 if data and isinstance(data, list):
208                         trs = data
209                         
210                         yepisode = None
211                         ydelta = maxint
212                         
213                         for tds in trs:
214                                 if tds and len(tds) == 2:
215                                         xtitle, xupdated = tds
216                                         if xtitle is not None and xupdated is not None:
217                                                 #import iso8601
218                                                 #http://code.google.com/p/pyiso8601/
219                                                 xbegin = parse_date(xupdated)
220                                                 xbegin = xbegin.replace(tzinfo=None)
221                                                 
222                                                 #"2014-11-10T20:15:00+01:00"
223                                                 #xbegin =  datetime.strptime(xupdated[0:-6], "%Y-%m-%dT%H:%M:%S");
224                                                 
225                                                 #Py2.6
226                                                 delta = abs(self.begin - xbegin)
227                                                 delta = delta.seconds + delta.days * 24 * 3600
228                                                 #Py2.7 delta = abs(self.begin - xbegin).total_seconds()
229                                                 splog(self.begin, xbegin, delta, int(config.plugins.seriesplugin.max_time_drift.value)*60)
230                                                 
231                                                 if delta <= int(config.plugins.seriesplugin.max_time_drift.value) * 60:
232                                                         result = CompiledRegexpAtomChannel.search(xtitle)
233                                                         if result and len(result.groups()) >= 1:
234                                                                 
235                                                                 if compareChannels(self.channels, result.group(1), self.service):
236                                                                         
237                                                                         if delta < ydelta:
238                                                                                 # Slice string to remove channel
239                                                                                 xtitle = xtitle[:result.start()]
240                                                                                 result = CompiledRegexpAtomDate.search(xtitle)
241                                                                                 
242                                                                                 if result and len(result.groups()) >= 1:
243                                                                                         # Slice string to remove date
244                                                                                         xtitle = xtitle[:result.start()]
245                                                                                         result = CompiledRegexpAtomEpisode.search(xtitle)
246                                                                                         
247                                                                                         if result and len(result.groups()) >= 1:
248                                                                                                 # Extract season and episode
249                                                                                                 xepisode = result.group(1)
250                                                                                                 # Slice string to remove season and episode
251                                                                                                 xtitle = xtitle[:result.start()]
252                                                                                                 
253                                                                                                 result = CompiledRegexpEpisode.search(xepisode)
254                                                                                                 if result and len(result.groups()) >= 3:
255                                                                                                         xseason = result and result.group(2) or "1"
256                                                                                                         xepisode = result and result.group(3) or "0"
257                                                                                                 else:
258                                                                                                         splog("WunschlisteFeed wrong episode format", xepisode)
259                                                                                                         xseason = "1"
260                                                                                                         xepisode = "0"
261                                                                                         else:
262                                                                                                 splog("WunschlisteFeed wrong title format", xtitle)
263                                                                                                 xseason = "0"
264                                                                                                 xepisode = "0"
265                                                                                         result = CompiledRegexpAtomTitle.search(xtitle)
266                                                                                         
267                                                                                         if result and len(result.groups()) >= 1:
268                                                                                                 # Extract episode title
269                                                                                                 xtitle = result.group(1)
270                                                                                                 
271                                                                                                 # Handle encodings
272                                                                                                 xtitle = str_to_utf8(xtitle)
273                                                                                                 
274                                                                                                 yepisode = (xseason, xepisode, xtitle, self.series)
275                                                                                                 
276                                                                                                 ydelta = delta
277                                                                         
278                                                                         else: #if delta >= ydelta:
279                                                                                 break
280                                                                 
281                                                                 else:
282                                                                         self.returnvalue = _("Check the channel name")
283                                                                 
284                                                 elif yepisode:
285                                                         break
286                         
287                         if yepisode:
288                                 return ( yepisode )