SeriesPlugin 1.2.3: Fixed Wunschliste Series Lookup
[enigma2-plugins.git] / seriesplugin / src / Identifiers / WunschlisteFeed.py
1 # by betonme @2012
2
3 # Imports
4 from Components.config import config
5
6 from Tools.BoundFunction import boundFunction
7
8 from urllib import urlencode
9
10 from HTMLParser import HTMLParser
11
12 from datetime import datetime
13
14 import re
15 from sys import maxint
16
17 # Internal
18 from Plugins.Extensions.SeriesPlugin.IdentifierBase import IdentifierBase
19 from Plugins.Extensions.SeriesPlugin.Channels import compareChannels
20 from Plugins.Extensions.SeriesPlugin.Logger import splog
21
22 from iso8601 import parse_date
23
24 # Constants
25 SERIESLISTURL     = "http://www.wunschliste.de/ajax/search_dropdown.pl?"
26 EPISODEIDURLATOM  = "http://www.wunschliste.de/xml/atom.pl?"
27 #EPISODEIDURLRSS  = "http://www.wunschliste.de/xml/rss.pl?"
28
29 # Series: EpisodeTitle (Season.Episode) - Weekday Date, Time / Channel (Country)
30 # Two and a Half Men: Der Mittwochs-Mann (1.5) - Mi 02.05., 19.50:00 Uhr / TNT Serie (Pay-TV)
31 # Two and a Half Men: Der Mittwochs-Mann (1.5) - Mi 02.05., 19.50:00 Uhr / TNT Serie
32 # Two and a Half Men: Der Mittwochs-Mann (1) (1.5) - Mi 02.05., 19.50:00 Uhr / TNT Serie
33 # Der Troedeltrupp - Das Geld liegt im Keller: Folge 109 (109) - Do 03.05., 16.15:00 Uhr / RTL II
34 # Galileo: U.a.: Die schaerfste Chili der Welt - Fr 04.05., 19.05:00 Uhr / ProSieben
35 # Galileo: Magazin mit Aiman Abdallah, BRD 2012 - Mi 09.05., 06.10:00 Uhr / ProSieben
36 # Gute Zeiten, schlechte Zeiten: Folgen 4985 - 4988 (21.84) - Sa 05.05., 11.00:00 Uhr / RTL
37 # Channel is between last / and ( or line end
38 CompiledRegexpAtomChannel = re.compile('\/(?!.*\/) ([^\(]+)')
39 # Date is between last - and channel
40 CompiledRegexpAtomDate = re.compile('-(?!.*-) (.+)')
41 # Find optional episode
42 CompiledRegexpAtomEpisode = re.compile('\((?!.*\()(.+)\) ')
43 # Series: Title
44 CompiledRegexpAtomTitle = re.compile('.+: (.+)')
45
46 # (Season.Episode) - EpisodeTitle
47 # (21.84) Folge 4985
48 # (105) Folge 105
49 # (4.11/4.11) Mama ist die Beste/Rund um die Uhr
50 # Galileo: Die schaerfste Chili der Welt
51 # Galileo: Jumbo auf Achse: Muelltonnenkoch
52 # Gute Zeiten, schlechte Zeiten: Folgen 4985 - 4988 (21.84) - Sa 05.05., 11.00:00 Uhr / RTL
53 #CompiledRegexpPrintTitle = re.compile( '(\(.*\) )?(.+)')
54
55 CompiledRegexpEpisode = re.compile( '((\d+)[\.x])?(\d+)')
56
57
58 class WLAtomParser(HTMLParser):
59         def __init__(self):
60                 HTMLParser.__init__(self)
61                 self.title = False
62                 self.updated = False
63                 self.titlestr = ''
64                 self.updatedstr = ''
65                 self.list = []
66
67         def handle_starttag(self, tag, attributes):
68                 if tag == 'title':
69                         self.title = True
70                 elif tag == 'updated':
71                         self.updated = True
72
73         def handle_endtag(self, tag):
74                 if tag == 'title':
75                         self.title = False
76                 elif tag == 'updated':
77                         self.updated = False
78                 elif tag == 'entry':
79                         self.list.append( (self.titlestr, self.updatedstr) )
80                         self.titlestr = ''
81                         self.updatedstr = ''
82
83         def handle_data(self, data):
84                 if self.title:
85                         self.titlestr += data
86                 elif self.updated:
87                         self.updatedstr = data
88
89
90 class WunschlisteFeed(IdentifierBase):
91         def __init__(self):
92                 IdentifierBase.__init__(self)
93
94         @classmethod
95         def knowsToday(cls):
96                 return False
97
98         @classmethod
99         def knowsFuture(cls):
100                 return True
101
102         def getEpisode(self, name, begin, end=None, service=None, channels=[]):
103                 # On Success: Return a single season, episode, title tuple
104                 # On Failure: Return a empty list or String or None
105                 
106                 self.begin = begin
107                 self.end = end
108                 self.service = service
109                 self.channels = channels
110                 
111                 self.returnvalue = None
112                 
113                 # Check preconditions
114                 if not name:
115                         splog(_("Skip Wunschliste: No show name specified"))
116                         return _("Skip Wunschliste: No show name specified")
117                 if not begin:
118                         splog(_("Skip Wunschliste: No begin timestamp specified"))
119                         return _("Skip Wunschliste: No begin timestamp specified")
120                 
121                 splog("WunschlisteFeed getEpisode")
122                 
123                 while name:     
124                         ids = self.getSeries(name)
125                         
126                         while ids:
127                                 idserie = ids.pop()
128                                 
129                                 if idserie and len(idserie) == 2:
130                                         id, self.series = idserie
131                                         
132                                         result = self.getNextPage( id )
133                                         if result:
134                                                 return result
135                                         
136                         else:
137                                 name = self.getAlternativeSeries(name)
138                 
139                 else:
140                         return ( self.returnvalue or _("No matching series found") )
141
142         def getSeries(self, name):
143                 url = SERIESLISTURL + urlencode({ 'q' : re.sub("[^a-zA-Z0-9-*]", " ", name) })
144                 data = self.getPage( url )
145                 
146                 if data and isinstance(data, basestring):
147                         data = self.parseSeries(data)
148                         self.doCache(url, data)
149                 
150                 if data and isinstance(data, list):
151                         splog("WunschlisteFeed ids", data)
152                         return data
153
154         def parseSeries(self, data):
155                 serieslist = []
156                 for line in data.splitlines():
157                         values = line.split("|")
158                         if len(values) == 4:
159                                 idname, countryyear, id, temp = values
160                                 splog(id, idname)
161                                 serieslist.append( (id, idname) )
162                         else:
163                                 splog("WunschlisteFeed: ParseError: " + str(line))
164                 serieslist.reverse()
165                 return serieslist
166
167         def parseNextPage(self, data):
168                 # Handle malformed HTML issues
169                 data = data.replace('&','&')  # target=\"_blank\"&
170                 parser = WLAtomParser()
171                 parser.feed(data)
172                 #splog(parser.list)
173                 return parser.list
174         
175         def getNextPage(self, id):
176                 splog("WunschlisteFeed getNextPage")
177                 
178                 url = EPISODEIDURLATOM + urlencode({ 's' : id })
179                 data = self.getPage( url )
180                 
181                 if data and isinstance(data, basestring):
182                         data = self.parseNextPage(data)
183                         self.doCache(url, data)
184                 
185                 if data and isinstance(data, list):
186                         trs = data
187                         
188                         yepisode = None
189                         ydelta = maxint
190                         
191                         for tds in trs:
192                                 if tds and len(tds) == 2:
193                                         xtitle, xupdated = tds
194                                         if xtitle is not None and xupdated is not None:
195                                                 #import iso8601
196                                                 #http://code.google.com/p/pyiso8601/
197                                                 xbegin = parse_date(xupdated)
198                                                 xbegin = xbegin.replace(tzinfo=None)
199                                                 
200                                                 #"2014-11-10T20:15:00+01:00"
201                                                 #xbegin =  datetime.strptime(xupdated[0:-6], "%Y-%m-%dT%H:%M:%S");
202                                                 
203                                                 #Py2.6
204                                                 delta = abs(self.begin - xbegin)
205                                                 delta = delta.seconds + delta.days * 24 * 3600
206                                                 #Py2.7 delta = abs(self.begin - xbegin).total_seconds()
207                                                 splog(self.begin, xbegin, delta, int(config.plugins.seriesplugin.max_time_drift.value)*60)
208                                                 
209                                                 if delta <= int(config.plugins.seriesplugin.max_time_drift.value) * 60:
210                                                         result = CompiledRegexpAtomChannel.search(xtitle)
211                                                         if result and len(result.groups()) >= 1:
212                                                                 
213                                                                 if compareChannels(self.channels, result.group(1), self.service):
214                                                                         
215                                                                         if delta < ydelta:
216                                                                                 # Slice string to remove channel
217                                                                                 xtitle = xtitle[:result.start()]
218                                                                                 result = CompiledRegexpAtomDate.search(xtitle)
219                                                                                 
220                                                                                 if result and len(result.groups()) >= 1:
221                                                                                         # Slice string to remove date
222                                                                                         xtitle = xtitle[:result.start()]
223                                                                                         result = CompiledRegexpAtomEpisode.search(xtitle)
224                                                                                         
225                                                                                         if result and len(result.groups()) >= 1:
226                                                                                                 # Extract season and episode
227                                                                                                 xepisode = result.group(1)
228                                                                                                 # Slice string to remove season and episode
229                                                                                                 xtitle = xtitle[:result.start()]
230                                                                                                 
231                                                                                                 result = CompiledRegexpEpisode.search(xepisode)
232                                                                                                 if result and len(result.groups()) >= 3:
233                                                                                                         xseason = result and result.group(2) or "1"
234                                                                                                         xepisode = result and result.group(3) or "0"
235                                                                                                 else:
236                                                                                                         splog("WunschlisteFeed wrong episode format", xepisode)
237                                                                                                         xseason = "1"
238                                                                                                         xepisode = "0"
239                                                                                         else:
240                                                                                                 splog("WunschlisteFeed wrong title format", xtitle)
241                                                                                                 xseason = "0"
242                                                                                                 xepisode = "0"
243                                                                                         result = CompiledRegexpAtomTitle.search(xtitle)
244                                                                                         
245                                                                                         if result and len(result.groups()) >= 1:
246                                                                                                 # Extract episode title
247                                                                                                 xtitle = result.group(1)
248                                                                                                 yepisode = (xseason, xepisode, xtitle.decode('ISO-8859-1').encode('utf8'), self.series.decode('ISO-8859-1').encode('utf8'))
249                                                                                                 ydelta = delta
250                                                                         
251                                                                         else: #if delta >= ydelta:
252                                                                                 break
253                                                                 
254                                                                 else:
255                                                                         self.returnvalue = _("Check the channel name")
256                                                                 
257                                                 elif yepisode:
258                                                         break
259                         
260                         if yepisode:
261                                 return ( yepisode )