SeriesPlugin 1.2.4: Improved episode lookup if we have to search for alternative...
[enigma2-plugins.git] / seriesplugin / src / Identifiers / Fernsehserien.py
1 # by betonme @2012
2
3 import os, sys
4 import math
5 from sys import maxint
6
7 from Components.config import config
8 from Tools.BoundFunction import boundFunction
9
10 # Imports
11 from urllib import urlencode
12
13 #from HTMLParser import HTMLParser
14
15 from time import time
16 from datetime import datetime, timedelta
17
18 import json
19
20 import re
21
22 # Internal
23 from Plugins.Extensions.SeriesPlugin.IdentifierBase import IdentifierBase
24 from Plugins.Extensions.SeriesPlugin.Channels import compareChannels
25 from Plugins.Extensions.SeriesPlugin.Logger import splog
26
27 #sys.path.append(os.path.dirname( os.path.realpath( __file__ ) ) + '/bs4')
28 #sys.path.append(os.path.dirname( os.path.realpath( __file__ ) ) + '/bs4/builder')
29 from bs4 import BeautifulSoup
30
31 # Constants
32 SERIESLISTURL = "http://www.fernsehserien.de/suche?"
33 EPISODEIDURL = 'http://www.fernsehserien.de%s/sendetermine/%d'
34
35 max_time_drift = int(config.plugins.seriesplugin.max_time_drift.value) * 60
36
37 Headers = {
38                 'User-Agent' : 'Mozilla/5.0',
39                 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
40                 'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
41                 'Accept-Encoding':'',
42                 'Accept-Language':'de-DE,de;q=0.8,en-US;q=0.6,en;q=0.4',
43                 'Cache-Control':'no-cache',
44                 'Connection':'keep-alive',
45                 'Host':'www.fernsehserien.de',
46                 'Pragma':'no-cache'
47         }
48
49
50 class Fernsehserien(IdentifierBase):
51         def __init__(self):
52                 IdentifierBase.__init__(self)
53
54         @classmethod
55         def knowsElapsed(cls):
56                 return True
57
58         @classmethod
59         def knowsToday(cls):
60                 return True
61
62         @classmethod
63         def knowsFuture(cls):
64                 return True
65
66         def getEpisode(self, name, begin, end=None, service=None, channels=[]):
67                 # On Success: Return a single season, episode, title tuple
68                 # On Failure: Return a empty list or String or None
69                 
70                 self.begin = begin
71                 #self.year = datetime.fromtimestamp(begin).year
72                 self.end = end
73                 self.service = service
74                 self.channels = channels
75                 
76                 self.series = ""
77                 self.first = None
78                 self.last = None
79                 self.page = 0
80                 
81                 self.returnvalue = None
82                 
83                 # Check preconditions
84                 if not name:
85                         splog(_("Skip Fernsehserien: No show name specified"))
86                         return _("Skip Fernsehserien: No show name specified")
87                 if not begin:
88                         splog(_("Skip Fernsehserien: No begin timestamp specified"))
89                         return _("Skip Fernsehserien: No begin timestamp specified")
90                 
91                 if self.begin > datetime.now():
92                         self.future = True
93                 else:
94                         self.future = False
95                 splog("Fernsehserien getEpisode future", self.future)
96         
97                 while name:     
98                         ids = self.getSeries(name)
99                         
100                         while ids:
101                                 idserie = ids.pop()
102                                 
103                                 if idserie and len(idserie) == 2:
104                                         id, self.series = idserie
105                                         
106                                         self.page = 0
107                                         #if self.future:
108                                         #       self.page = 0
109                                         #else:
110                                         #       self.page = -1
111                                         
112                                         self.first = None
113                                         self.last = None
114                                         
115                                         while self.page is not None:
116                                                 result = self.getNextPage(id)
117                                                 if result:
118                                                         return result
119                                         
120                         else:
121                                 name = self.getAlternativeSeries(name)
122                 
123                 else:
124                         return ( self.returnvalue or _("No matching series found") )
125
126         def getSeries(self, name):
127                 parameter =  urlencode({ 'term' : re.sub("[^a-zA-Z0-9*]", " ", name) })
128                 url = SERIESLISTURL + parameter
129                 data = self.getPage(url, Headers)
130                 
131                 if data and isinstance(data, basestring):
132                         data = self.parseSeries(data)
133                         self.doCache(url, data)
134                 
135                 if data and isinstance(data, list):
136                         splog("Fernsehserien ids", data)
137                         return self.filterKnownIds(data)
138
139         def parseSeries(self, data):
140                 serieslist = []
141                 for line in json.loads(data):
142                         id = line['id']
143                         idname = line['value']
144                         splog(id, idname)
145                         serieslist.append( (id, idname) )
146                 serieslist.reverse()
147                 return serieslist
148
149         def parseNextPage(self, data):
150                 trs = []
151                 
152                 # Handle malformed HTML issues
153                 data = data.replace('\\"','"')  # target=\"_blank\"
154                 data = data.replace('\'+\'','') # document.write('<scr'+'ipt
155                 
156                 soup = BeautifulSoup(data)
157                 
158                 table = soup.find('table', 'sendetermine')
159                 if table:
160                         for trnode in table.find_all('tr'):
161                                 # TODO skip first header row
162                                 tdnodes = trnode and trnode.find_all('td')
163                                 
164                                 if tdnodes:
165                                         # Filter for known rows
166                                         #if len(tdnodes) == 7 and len(tdnodes[2].string) >= 15:
167                                         
168                                         if len(tdnodes) >= 6 and tdnodes[2].string and len(tdnodes[2].string) >= 15:
169                                                 tds = []
170                                                 for tdnode in tdnodes:
171                                                         tds.append(tdnode.string or "")
172                                                 trs.append( tds )
173                                         # This row belongs to the previous
174                                         elif trs and len(tdnodes) == 5:
175                                                 #if trs[-1][5] and tdnodes[3].string:
176                                                 trs[-1][5] += ' ' + (tdnodes[3].string or "")
177                                                 #if trs[-1][6] and tdnodes[4].string:
178                                                 trs[-1][6] += ' ' + (tdnodes[4].string or "")
179                                         #else:
180                                         #       splog( "tdnodes", len(tdnodes), tdnodes )
181                                 
182                                 #else:
183                                 #       splog( "tdnodes", tdnodes )
184                 
185                 #splog(trs)
186                 return trs
187
188         def getNextPage(self, id):
189                 url = EPISODEIDURL % (id, self.page)
190                 data = self.getPage(url, Headers)
191                 
192                 if data and isinstance(data, basestring):
193                         splog("getNextPage: basestring")
194                         data = self.parseNextPage(data)
195                         self.doCache(url, data)
196                 
197                 if data and isinstance(data, list):
198                         splog("getNextPage: list")
199                         
200                         trs = data
201                         # trs[x] = [None, u'31.10.2012', u'20:15\u201321:15 Uhr', u'ProSieben', u'8.', u'15', u'Richtungswechsel']
202
203                         yepisode = None
204                         ydelta = maxint
205                         
206                         #first = trs[0][2]
207                         #last = trs[-1][2]
208                         #print first[0:5]
209                         #print last[6:11] 
210                         
211                         # trs[0] first line [2] second element = timestamps [a:b] use first time
212                         first = datetime.strptime( trs[0][2][0:5] + trs[0][1], "%H:%M%d.%m.%Y" )
213                         
214                         # trs[-1] last line [2] second element = timestamps [a:b] use second time
215                         #last = datetime.strptime( trs[-1][2][6:11] + trs[-1][1], "%H:%M%d.%m.%Y" )
216                         # Problem with wrap around use also start time
217                         # Sa 30.11.2013 23:35 - 01:30 Uhr ProSieben 46 3. 13 Showdown 3
218                         last = datetime.strptime( trs[-1][2][0:5] + trs[-1][1], "%H:%M%d.%m.%Y" )
219                         
220                         #first = first - timedelta(seconds=max_time_drift)
221                         #last = last + timedelta(seconds=max_time_drift)
222                         
223                         splog("getNextPage: self.first, first, self.last, last, if: ", self.first, first, self.last, last, (self.first != first and self.last != last))
224                         if self.first != first and self.last != last:
225                                 self.first = first
226                                 self.last = last
227                                 
228                                 splog("first, self.begin, last, if, if:", first, self.begin, last, ( first <= self.begin and self.begin <= last ), ( first >= self.begin and self.begin >= last ) )
229                                 if ( ( first <= self.begin and self.begin <= last ) or ( first >= self.begin and self.begin >= last ) ):
230                                         #search in page for matching datetime
231                                         for tds in trs:
232                                                 if tds and len(tds) >= 6:  #7:
233                                                         # Grey's Anathomy
234                                                         # [None, u'31.10.2012', u'20:15\u201321:15 Uhr', u'ProSieben', u'8.', u'15', u'Richtungswechsel']
235                                                         # 
236                                                         # Gute Zeiten 
237                                                         # [None, u'20.11.2012', u'06:40\u201307:20 Uhr', u'NDR', None, u'4187', u'Folge 4187']
238                                                         # [None, u'01.12.2012', u'10:45\u201313:15 Uhr', u'RTL', None, u'5131', u'Folge 5131']
239                                                         # [None, u'\xa0', None, u'5132', u'Folge 5132']
240                                                         # [None, u'\xa0', None, u'5133', u'Folge 5133']
241                                                         # [None, u'\xa0', None, u'5134', u'Folge 5134']
242                                                         # [None, u'\xa0', None, u'5135', u'Folge 5135']
243                                                         
244                                                         # Wahnfried
245                                                         # [u'Sa', u'26.12.1987', u'\u2013', u'So', u'27.12.1987', u'1Plus', None]
246                                                         
247                                                         # First part: date, times, channel
248                                                         xdate, xbegin = tds[1:3]
249                                                         #splog( "tds", tds )
250                                                         
251                                                         #xend = xbegin[6:11]
252                                                         xbegin = xbegin[0:5]
253                                                         xbegin = datetime.strptime( xbegin+xdate, "%H:%M%d.%m.%Y" )
254                                                         #xend = datetime.strptime( xend+xdate, "%H:%M%d.%m.%Y" )
255                                                         #print "xbegin", xbegin
256                                                         
257                                                         #Py2.6
258                                                         delta = abs(self.begin - xbegin)
259                                                         delta = delta.seconds + delta.days * 24 * 3600
260                                                         #Py2.7 delta = abs(self.begin - xbegin).total_seconds()
261                                                         splog(self.begin, xbegin, delta, max_time_drift)
262                                                         
263                                                         if delta <= max_time_drift:
264                                                                 
265                                                                 if compareChannels(self.channels, tds[3], self.service):
266                                                                         
267                                                                         if delta < ydelta:
268                                                                                 
269                                                                                 splog( "tds", len(tds), tds )
270                                                                                 if len(tds) >= 10:
271                                                                                         # Second part: s1e1, s1e2,
272                                                                                         xseason = tds[7] or "1"
273                                                                                         xepisode = tds[8]
274                                                                                         xtitle = " ".join(tds[10:])  # Use all available titles
275                                                                                 elif len(tds) >= 7:
276                                                                                         # Second part: s1e1, s1e2,
277                                                                                         xseason = tds[4]
278                                                                                         xepisode = tds[5]
279                                                                                         if xseason and xseason.find(".") != -1:
280                                                                                                 xseason = xseason[:-1]
281                                                                                                 xtitle = " ".join(tds[6:])  # Use all available titles
282                                                                                         else:
283                                                                                                 xseason = "1"
284                                                                                                 xtitle = " ".join(tds[6:])  # Use all available titles
285                                                                                 elif len(tds) == 6:
286                                                                                         xseason = "0"
287                                                                                         xepisode = "0"
288                                                                                         xtitle = tds[5]
289                                                                                 if xseason and xepisode and xtitle and self.series:
290                                                                                         yepisode = (xseason, xepisode, xtitle, self.series)
291                                                                                         ydelta = delta
292                                                                         
293                                                                         else: #if delta >= ydelta:
294                                                                                 break
295                                                                 
296                                                                 else:
297                                                                         self.returnvalue = _("Check the channel name")
298                                                                 
299                                                         elif yepisode:
300                                                                 break
301                                         
302                                         if yepisode:
303                                                 return ( yepisode )
304                                 
305                                 else:
306                                         # TODO calculate next page : use firstrow lastrow datetime
307                                         if not self.future:
308                                                 if first > self.begin:
309                                                         self.page -= 1
310                                                         return
311                                         
312                                         else:
313                                                 if self.begin > last:
314                                                         self.page += 1
315                                                         return
316                 
317                 self.page = None
318                 return