SeriesPlugin 2.2.2: Bulk update
[enigma2-plugins.git] / seriesplugin / src / Identifiers / WunschlisteFeed.py
1 # -*- coding: utf-8 -*-
2 # by betonme @2012
3
4 # Imports
5 from Components.config import config
6
7 from Tools.BoundFunction import boundFunction
8
9 from urllib import urlencode
10
11 from HTMLParser import HTMLParser
12
13 from datetime import datetime
14
15 import re
16 from sys import maxint
17
18 # Internal
19 from Plugins.Extensions.SeriesPlugin.IdentifierBase import IdentifierBase
20 from Plugins.Extensions.SeriesPlugin.Logger import splog
21
22 from iso8601 import parse_date
23
24 import codecs
25 utf8_encoder = codecs.getencoder("utf-8")
26
27
28 # Constants
29 SERIESLISTURL     = "http://www.wunschliste.de/ajax/search_dropdown.pl?"
30 EPISODEIDURLATOM  = "http://www.wunschliste.de/xml/atom.pl?"
31 #EPISODEIDURLRSS  = "http://www.wunschliste.de/xml/rss.pl?"
32
33
34 # Series: EpisodeTitle (Season.Episode) - Weekday Date, Time / Channel (Country)
35 # Two and a Half Men: Der Mittwochs-Mann (1.5) - Mi 02.05., 19.50:00 Uhr / TNT Serie (Pay-TV)
36 # Two and a Half Men: Der Mittwochs-Mann (1.5) - Mi 02.05., 19.50:00 Uhr / TNT Serie
37 # Two and a Half Men: Der Mittwochs-Mann (1) (1.5) - Mi 02.05., 19.50:00 Uhr / TNT Serie
38 # Der Troedeltrupp - Das Geld liegt im Keller: Folge 109 (109) - Do 03.05., 16.15:00 Uhr / RTL II
39 # Galileo: U.a.: Die schaerfste Chili der Welt - Fr 04.05., 19.05:00 Uhr / ProSieben
40 # Galileo: Magazin mit Aiman Abdallah, BRD 2012 - Mi 09.05., 06.10:00 Uhr / ProSieben
41 # Gute Zeiten, schlechte Zeiten: Folgen 4985 - 4988 (21.84) - Sa 05.05., 11.00:00 Uhr / RTL
42 # Channel is between last / and ( or line end
43 CompiledRegexpAtomChannel = re.compile('\/(?!.*\/) ([^\(]+)')
44 # Date is between last - and channel
45 CompiledRegexpAtomDate = re.compile('-(?!.*-) (.+)')
46 # Find optional episode
47 CompiledRegexpAtomEpisode = re.compile('\((?!.*\()(.+)\) ')
48 # Series: Title
49 CompiledRegexpAtomTitle = re.compile('.+: (.+)')
50
51 # (Season.Episode) - EpisodeTitle
52 # (21.84) Folge 4985
53 # (105) Folge 105
54 # (4.11/4.11) Mama ist die Beste/Rund um die Uhr
55 # Galileo: Die schaerfste Chili der Welt
56 # Galileo: Jumbo auf Achse: Muelltonnenkoch
57 # Gute Zeiten, schlechte Zeiten: Folgen 4985 - 4988 (21.84) - Sa 05.05., 11.00:00 Uhr / RTL
58 #CompiledRegexpPrintTitle = re.compile( '(\(.*\) )?(.+)')
59
60 CompiledRegexpEpisode = re.compile( '((\d+)[\.x])?(\d+)')
61
62
63 def str_to_utf8(s):
64         # Convert a byte string with unicode escaped characters
65         splog("WL: str_to_utf8: s: ", repr(s))
66         unicode_str = s.decode('unicode-escape')
67         splog("WL: str_to_utf8: s: ", repr(unicode_str))
68         # Python 2.x can't convert the special chars nativly
69         utf8_str = utf8_encoder(unicode_str)[0]
70         splog("WL: str_to_utf8: s: ", repr(utf8_str))
71         return utf8_str
72
73
74 class WLAtomParser(HTMLParser):
75         def __init__(self):
76                 HTMLParser.__init__(self)
77                 self.title = False
78                 self.updated = False
79                 self.titlestr = ''
80                 self.updatedstr = ''
81                 self.list = []
82
83         def handle_starttag(self, tag, attributes):
84                 if tag == 'title':
85                         self.title = True
86                 elif tag == 'updated':
87                         self.updated = True
88
89         def handle_endtag(self, tag):
90                 if tag == 'title':
91                         self.title = False
92                 elif tag == 'updated':
93                         self.updated = False
94                 elif tag == 'entry':
95                         self.list.append( (self.titlestr, self.updatedstr) )
96                         self.titlestr = ''
97                         self.updatedstr = ''
98
99         def handle_data(self, data):
100                 if self.title:
101                         self.titlestr += data
102                 elif self.updated:
103                         self.updatedstr = data
104
105
106 class WunschlisteFeed(IdentifierBase):
107         def __init__(self):
108                 IdentifierBase.__init__(self)
109
110         @classmethod
111         def knowsToday(cls):
112                 return False
113
114         @classmethod
115         def knowsFuture(cls):
116                 return True
117
118         def getEpisode(self, name, begin, end=None, service=None):
119                 # On Success: Return a single season, episode, title tuple
120                 # On Failure: Return a empty list or String or None
121                 
122                 self.begin = begin
123                 self.end = end
124                 self.service = service
125                 
126                 self.knownids = []
127                 self.returnvalue = None
128                 
129                 # Check preconditions
130                 if not name:
131                         splog(_("Skip Wunschliste: No show name specified"))
132                         return _("Skip Wunschliste: No show name specified")
133                 if not begin:
134                         splog(_("Skip Wunschliste: No begin timestamp specified"))
135                         return _("Skip Wunschliste: No begin timestamp specified")
136                 
137                 splog("WunschlisteFeed getEpisode")
138                 
139                 while name:     
140                         ids = self.getSeries(name)
141                         
142                         while ids:
143                                 idserie = ids.pop()
144                                 
145                                 if idserie and len(idserie) == 2:
146                                         id, idname = idserie
147                                         
148                                         # Handle encodings
149                                         self.series = str_to_utf8(idname)
150                                         
151                                         result = self.getNextPage( id )
152                                         if result:
153                                                 return result
154                                         
155                         else:
156                                 name = self.getAlternativeSeries(name)
157                 
158                 else:
159                         return ( self.returnvalue or _("No matching series found") )
160
161         def getSeries(self, name):
162                 #url = SERIESLISTURL + urlencode({ 'q' : re.sub("[^a-zA-Z0-9-*]", " ", name) })
163                 url = SERIESLISTURL + urlencode({ 'q' : name })
164                 data = self.getPage( url )
165                 
166                 if data and isinstance(data, basestring):
167                         data = self.parseSeries(data)
168                         self.doCacheList(url, data)
169                 
170                 if data and isinstance(data, list):
171                         splog("WunschlisteFeed ids", data)
172                         return self.filterKnownIds(data)
173
174         def parseSeries(self, data):
175                 serieslist = []
176                 for line in data.splitlines():
177                         values = line.split("|")
178                         if len(values) == 4:
179                                 idname, countryyear, id, temp = values
180                                 splog(id, idname)
181                                 serieslist.append( (id, idname) )
182                         else:
183                                 splog("WunschlisteFeed: ParseError: " + str(line))
184                 serieslist.reverse()
185                 return serieslist
186
187         def parseNextPage(self, data):
188                 # Handle malformed HTML issues
189                 data = data.replace('&','&')  # target=\"_blank\"&
190                 parser = WLAtomParser()
191                 parser.feed(data)
192                 #splog(parser.list)
193                 return parser.list
194         
195         def getNextPage(self, id):
196                 splog("WunschlisteFeed getNextPage")
197                 
198                 url = EPISODEIDURLATOM + urlencode({ 's' : id })
199                 data = self.getPage( url )
200                 
201                 if data and isinstance(data, basestring):
202                         data = self.parseNextPage(data)
203                         self.doCacheList(url, data)
204                 
205                 if data and isinstance(data, list):
206                         trs = data
207                         
208                         yepisode = None
209                         ydelta = maxint
210                         
211                         for tds in trs:
212                                 if tds and len(tds) == 2:
213                                         xtitle, xupdated = tds
214                                         if xtitle is not None and xupdated is not None:
215                                                 #import iso8601
216                                                 #http://code.google.com/p/pyiso8601/
217                                                 xbegin = parse_date(xupdated)
218                                                 xbegin = xbegin.replace(tzinfo=None)
219                                                 
220                                                 #"2014-11-10T20:15:00+01:00"
221                                                 #xbegin =  datetime.strptime(xupdated[0:-6], "%Y-%m-%dT%H:%M:%S");
222                                                 
223                                                 #Py2.6
224                                                 delta = abs(self.begin - xbegin)
225                                                 delta = delta.seconds + delta.days * 24 * 3600
226                                                 #Py2.7 delta = abs(self.begin - xbegin).total_seconds()
227                                                 splog(self.begin, xbegin, delta, self.max_time_drift)
228                                                 
229                                                 if delta <= self.max_time_drift:
230                                                         result = CompiledRegexpAtomChannel.search(xtitle)
231                                                         if result and len(result.groups()) >= 1:
232                                                                 
233                                                                 if self.compareChannels(self.service, result.group(1)):
234                                                                         
235                                                                         if delta < ydelta:
236                                                                                 # Slice string to remove channel
237                                                                                 xtitle = xtitle[:result.start()]
238                                                                                 result = CompiledRegexpAtomDate.search(xtitle)
239                                                                                 
240                                                                                 if result and len(result.groups()) >= 1:
241                                                                                         # Slice string to remove date
242                                                                                         xtitle = xtitle[:result.start()]
243                                                                                         result = CompiledRegexpAtomEpisode.search(xtitle)
244                                                                                         
245                                                                                         if result and len(result.groups()) >= 1:
246                                                                                                 # Extract season and episode
247                                                                                                 xepisode = result.group(1)
248                                                                                                 # Slice string to remove season and episode
249                                                                                                 xtitle = xtitle[:result.start()]
250                                                                                                 
251                                                                                                 result = CompiledRegexpEpisode.search(xepisode)
252                                                                                                 if result and len(result.groups()) >= 3:
253                                                                                                         xseason = result and result.group(2) or "1"
254                                                                                                         xepisode = result and result.group(3) or "0"
255                                                                                                 else:
256                                                                                                         splog("WunschlisteFeed wrong episode format", xepisode)
257                                                                                                         xseason = "1"
258                                                                                                         xepisode = "0"
259                                                                                         else:
260                                                                                                 splog("WunschlisteFeed wrong title format", xtitle)
261                                                                                                 xseason = "0"
262                                                                                                 xepisode = "0"
263                                                                                         result = CompiledRegexpAtomTitle.search(xtitle)
264                                                                                         
265                                                                                         if result and len(result.groups()) >= 1:
266                                                                                                 # Extract episode title
267                                                                                                 xtitle = result.group(1)
268                                                                                                 
269                                                                                                 # Handle encodings
270                                                                                                 xtitle = str_to_utf8(xtitle)
271                                                                                                 
272                                                                                                 yepisode = (xseason, xepisode, xtitle, self.series)
273                                                                                                 
274                                                                                                 ydelta = delta
275                                                                         
276                                                                         else: #if delta >= ydelta:
277                                                                                 break
278                                                                 
279                                                                 else:
280                                                                         self.returnvalue = _("Check the channel name")
281                                                                 
282                                                 elif yepisode:
283                                                         break
284                         
285                         if yepisode:
286                                 return ( yepisode )