SeriesPlugin: 1.5.8 Fixed encoding, popups, channel list integration
[enigma2-plugins.git] / seriesplugin / src / Identifiers / Fernsehserien.py
1 # -*- coding: utf-8 -*-
2 # by betonme @2012
3
4 import os, sys
5 import json
6 import re
7 import math
8
9 from sys import maxint
10
11 from Components.config import config
12 from Tools.BoundFunction import boundFunction
13
14 # Imports
15 from urllib import urlencode
16
17 #from HTMLParser import HTMLParser
18
19 from time import time
20 from datetime import datetime, timedelta
21
22 # Internal
23 from Plugins.Extensions.SeriesPlugin.IdentifierBase import IdentifierBase
24 from Plugins.Extensions.SeriesPlugin.Channels import compareChannels
25 from Plugins.Extensions.SeriesPlugin.Logger import splog
26
27 #sys.path.append(os.path.dirname( os.path.realpath( __file__ ) ) + '/bs4')
28 #sys.path.append(os.path.dirname( os.path.realpath( __file__ ) ) + '/bs4/builder')
29 from bs4 import BeautifulSoup
30
31 import codecs
32 utf8_encoder = codecs.getencoder("utf-8")
33
34
35 # Constants
36 SERIESLISTURL = "http://www.fernsehserien.de/suche?"
37 EPISODEIDURL = 'http://www.fernsehserien.de%s/sendetermine/%d'
38
39 max_time_drift = int(config.plugins.seriesplugin.max_time_drift.value) * 60
40
41 Headers = {
42                 'User-Agent' : 'Mozilla/5.0',
43                 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
44                 'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
45                 'Accept-Encoding':'',
46                 'Accept-Language':'de-DE,de;q=0.8,en-US;q=0.6,en;q=0.4',
47                 'Cache-Control':'no-cache',
48                 'Connection':'keep-alive',
49                 'Host':'www.fernsehserien.de',
50                 'Pragma':'no-cache'
51         }
52
53
54 def str_to_utf8(s):
55         # Convert a byte string with unicode escaped characters
56         splog("FS: str_to_utf8: s: ", repr(s))
57         # Python 2.x can't convert the special chars nativly
58         utf8_str = utf8_encoder(s)[0]
59         splog("FS: str_to_utf8: s: ", repr(utf8_str))
60         return utf8_str
61
62
63 class Fernsehserien(IdentifierBase):
64         def __init__(self):
65                 IdentifierBase.__init__(self)
66
67         @classmethod
68         def knowsElapsed(cls):
69                 return True
70
71         @classmethod
72         def knowsToday(cls):
73                 return True
74
75         @classmethod
76         def knowsFuture(cls):
77                 return True
78
79         def getEpisode(self, name, begin, end=None, service=None, channels=[]):
80                 # On Success: Return a single season, episode, title tuple
81                 # On Failure: Return a empty list or String or None
82                 
83                 self.begin = begin
84                 #self.year = datetime.fromtimestamp(begin).year
85                 self.end = end
86                 self.service = service
87                 self.channels = channels
88                 
89                 self.series = ""
90                 self.first = None
91                 self.last = None
92                 self.page = 0
93                 
94                 self.knownids = []
95                 self.returnvalue = None
96                 
97                 # Check preconditions
98                 if not name:
99                         splog(_("Skip Fernsehserien: No show name specified"))
100                         return _("Skip Fernsehserien: No show name specified")
101                 if not begin:
102                         splog(_("Skip Fernsehserien: No begin timestamp specified"))
103                         return _("Skip Fernsehserien: No begin timestamp specified")
104                 
105                 if self.begin > datetime.now():
106                         self.future = True
107                 else:
108                         self.future = False
109                 splog("Fernsehserien getEpisode future", self.future)
110         
111                 while name:     
112                         ids = self.getSeries(name)
113                         
114                         while ids:
115                                 idserie = ids.pop()
116                                 
117                                 if idserie and len(idserie) == 2:
118                                         id, idname = idserie
119                                         
120                                         # Handle encodings
121                                         self.series = str_to_utf8(idname)
122                                         
123                                         self.page = 0
124                                         #if self.future:
125                                         #       self.page = 0
126                                         #else:
127                                         #       self.page = -1
128                                         
129                                         self.first = None
130                                         self.last = None
131                                         
132                                         while self.page is not None:
133                                                 result = self.getNextPage(id)
134                                                 if result:
135                                                         return result
136                                         
137                         else:
138                                 name = self.getAlternativeSeries(name)
139                 
140                 else:
141                         return ( self.returnvalue or _("No matching series found") )
142
143         def getSeries(self, name):
144                 parameter =  urlencode({ 'term' : re.sub("[^a-zA-Z0-9*]", " ", name) })
145                 url = SERIESLISTURL + parameter
146                 data = self.getPage(url, Headers)
147                 
148                 if data and isinstance(data, basestring):
149                         data = self.parseSeries(data)
150                         self.doCache(url, data)
151                 
152                 if data and isinstance(data, list):
153                         splog("Fernsehserien ids", data)
154                         return self.filterKnownIds(data)
155
156         def parseSeries(self, data):
157                 serieslist = []
158                 for line in json.loads(data):
159                         id = line['id']
160                         idname = line['value']
161                         splog(id, idname)
162                         serieslist.append( ( id, idname ) )
163                 serieslist.reverse()
164                 return serieslist
165
166         def parseNextPage(self, data):
167                 trs = []
168                 
169                 # Handle malformed HTML issues
170                 data = data.replace('\\"','"')  # target=\"_blank\"
171                 data = data.replace('\'+\'','') # document.write('<scr'+'ipt
172                 
173                 soup = BeautifulSoup(data)
174                 
175                 table = soup.find('table', 'sendetermine')
176                 if table:
177                         for trnode in table.find_all('tr'):
178                                 # TODO skip first header row
179                                 tdnodes = trnode and trnode.find_all('td')
180                                 
181                                 if tdnodes:
182                                         # Filter for known rows
183                                         #if len(tdnodes) == 7 and len(tdnodes[2].string) >= 15:
184                                         
185                                         if len(tdnodes) >= 6 and tdnodes[2].string and len(tdnodes[2].string) >= 15:
186                                                 tds = []
187                                                 for tdnode in tdnodes:
188                                                         tds.append(tdnode.string or "")
189                                                 trs.append( tds )
190                                         # This row belongs to the previous
191                                         elif trs and len(tdnodes) == 5:
192                                                 #if trs[-1][5] and tdnodes[3].string:
193                                                 trs[-1][5] += ' ' + (tdnodes[3].string or "")
194                                                 #if trs[-1][6] and tdnodes[4].string:
195                                                 trs[-1][6] += ' ' + (tdnodes[4].string or "")
196                                         #else:
197                                         #       splog( "tdnodes", len(tdnodes), tdnodes )
198                                 
199                                 #else:
200                                 #       splog( "tdnodes", tdnodes )
201                 
202                 #splog(trs)
203                 return trs
204
205         def getNextPage(self, id):
206                 url = EPISODEIDURL % (id, self.page)
207                 data = self.getPage(url, Headers)
208                 
209                 if data and isinstance(data, basestring):
210                         splog("getNextPage: basestring")
211                         data = self.parseNextPage(data)
212                         self.doCache(url, data)
213                 
214                 if data and isinstance(data, list):
215                         splog("getNextPage: list")
216                         
217                         trs = data
218                         # trs[x] = [None, u'31.10.2012', u'20:15\u201321:15 Uhr', u'ProSieben', u'8.', u'15', u'Richtungswechsel']
219
220                         yepisode = None
221                         ydelta = maxint
222                         
223                         #first = trs[0][2]
224                         #last = trs[-1][2]
225                         #print first[0:5]
226                         #print last[6:11] 
227                         
228                         # trs[0] first line [2] second element = timestamps [a:b] use first time
229                         first = datetime.strptime( trs[0][2][0:5] + trs[0][1], "%H:%M%d.%m.%Y" )
230                         
231                         # trs[-1] last line [2] second element = timestamps [a:b] use second time
232                         #last = datetime.strptime( trs[-1][2][6:11] + trs[-1][1], "%H:%M%d.%m.%Y" )
233                         # Problem with wrap around use also start time
234                         # Sa 30.11.2013 23:35 - 01:30 Uhr ProSieben 46 3. 13 Showdown 3
235                         last = datetime.strptime( trs[-1][2][0:5] + trs[-1][1], "%H:%M%d.%m.%Y" )
236                         
237                         #first = first - timedelta(seconds=max_time_drift)
238                         #last = last + timedelta(seconds=max_time_drift)
239                         
240                         new_page = (self.first != first or self.last != last)
241                         splog("getNextPage: first_on_prev_page, first, last_on_prev_page, last, if: ", self.first, first, self.last, last, new_page)
242                         if new_page:
243                                 self.first = first
244                                 self.last = last
245                                 
246                                 test_future_timespan = ( (first-timedelta(seconds=max_time_drift)) <= self.begin and self.begin <= (last+timedelta(seconds=max_time_drift)) ) 
247                                 test_past_timespan = ( (first+timedelta(seconds=max_time_drift)) >= self.begin and self.begin >= (last+timedelta(seconds=max_time_drift)) )
248                                 splog("first_on_page, self.begin, last_on_page, if, if:", first, self.begin, last, test_future_timespan, test_past_timespan )
249                                 if ( test_future_timespan or test_past_timespan ):
250                                         #search in page for matching datetime
251                                         for tds in trs:
252                                                 if tds and len(tds) >= 6:  #7:
253                                                         # Grey's Anathomy
254                                                         # [None, u'31.10.2012', u'20:15\u201321:15 Uhr', u'ProSieben', u'8.', u'15', u'Richtungswechsel']
255                                                         # 
256                                                         # Gute Zeiten 
257                                                         # [None, u'20.11.2012', u'06:40\u201307:20 Uhr', u'NDR', None, u'4187', u'Folge 4187']
258                                                         # [None, u'01.12.2012', u'10:45\u201313:15 Uhr', u'RTL', None, u'5131', u'Folge 5131']
259                                                         # [None, u'\xa0', None, u'5132', u'Folge 5132']
260                                                         # [None, u'\xa0', None, u'5133', u'Folge 5133']
261                                                         # [None, u'\xa0', None, u'5134', u'Folge 5134']
262                                                         # [None, u'\xa0', None, u'5135', u'Folge 5135']
263                                                         
264                                                         # Wahnfried
265                                                         # [u'Sa', u'26.12.1987', u'\u2013', u'So', u'27.12.1987', u'1Plus', None]
266                                                         
267                                                         # First part: date, times, channel
268                                                         xdate, xbegin = tds[1:3]
269                                                         #splog( "tds", tds )
270                                                         
271                                                         #xend = xbegin[6:11]
272                                                         xbegin = xbegin[0:5]
273                                                         xbegin = datetime.strptime( xbegin+xdate, "%H:%M%d.%m.%Y" )
274                                                         #xend = datetime.strptime( xend+xdate, "%H:%M%d.%m.%Y" )
275                                                         #print "xbegin", xbegin
276                                                         
277                                                         #Py2.6
278                                                         delta = abs(self.begin - xbegin)
279                                                         delta = delta.seconds + delta.days * 24 * 3600
280                                                         #Py2.7 delta = abs(self.begin - xbegin).total_seconds()
281                                                         #splog(self.begin, xbegin, delta, max_time_drift)
282                                                         
283                                                         if delta <= max_time_drift:
284                                                                 
285                                                                 if compareChannels(self.channels, tds[3], self.service):
286                                                                         
287                                                                         if delta < ydelta:
288                                                                                 
289                                                                                 splog( "tds", len(tds), tds )
290                                                                                 if len(tds) >= 10:
291                                                                                         # Second part: s1e1, s1e2,
292                                                                                         xseason = tds[7] or "1"
293                                                                                         xepisode = tds[8]
294                                                                                         xtitle = " ".join(tds[10:])  # Use all available titles
295                                                                                 elif len(tds) >= 7:
296                                                                                         # Second part: s1e1, s1e2,
297                                                                                         xseason = tds[4]
298                                                                                         xepisode = tds[5]
299                                                                                         if xseason and xseason.find(".") != -1:
300                                                                                                 xseason = xseason[:-1]
301                                                                                                 xtitle = " ".join(tds[6:])  # Use all available titles
302                                                                                         else:
303                                                                                                 xseason = "1"
304                                                                                                 xtitle = " ".join(tds[6:])  # Use all available titles
305                                                                                 elif len(tds) == 6:
306                                                                                         xseason = "0"
307                                                                                         xepisode = "0"
308                                                                                         xtitle = tds[5]
309                                                                                 if xseason and xepisode and xtitle and self.series:
310                                                                                 
311                                                                                         # Handle encodings
312                                                                                         xtitle = str_to_utf8(xtitle)
313                                                                                         
314                                                                                         yepisode = (xseason, xepisode, xtitle, self.series)
315                                                                                         ydelta = delta
316                                                                         
317                                                                         else: #if delta >= ydelta:
318                                                                                 break
319                                                                 
320                                                                 else:
321                                                                         self.returnvalue = _("Check the channel name")
322                                                                 
323                                                         elif yepisode:
324                                                                 break
325                                         
326                                         if yepisode:
327                                                 return ( yepisode )
328                                 
329                                 else:
330                                         # TODO calculate next page : use firstrow lastrow datetime
331                                         if not self.future:
332                                                 if first > self.begin:
333                                                         self.page -= 1
334                                                         return
335                                         
336                                         else:
337                                                 if self.begin > last:
338                                                         self.page += 1
339                                                         return
340                 
341                 self.page = None
342                 return