SeriesPlugin 1.0: First public version
[enigma2-plugins.git] / seriesplugin / src / identifiers / Fernsehserien.py
1 # by betonme @2012
2
3 import os, sys
4 import math
5 from sys import maxint
6
7 from Components.config import config
8 from Tools.BoundFunction import boundFunction
9
10 # Imports
11 from urllib import urlencode
12
13 #from HTMLParser import HTMLParser
14
15 from time import time
16 from datetime import datetime, timedelta
17
18 import json
19
20 import re
21
22 # Internal
23 from Plugins.Extensions.SeriesPlugin.IdentifierBase import IdentifierBase
24 from Plugins.Extensions.SeriesPlugin.Channels import compareChannels
25 from Plugins.Extensions.SeriesPlugin.Logger import splog
26
27 #sys.path.append(os.path.dirname( os.path.realpath( __file__ ) ) + '/bs4')
28 #sys.path.append(os.path.dirname( os.path.realpath( __file__ ) ) + '/bs4/builder')
29 from bs4 import BeautifulSoup
30
31 # Constants
32 SERIESLISTURL = "http://www.fernsehserien.de/suche?"
33 EPISODEIDURL = 'http://www.fernsehserien.de%s/sendetermine/%d'
34
35 max_time_drift = int(config.plugins.seriesplugin.max_time_drift.value) * 60
36
37 Headers = {
38                 'User-Agent' : 'Mozilla/5.0',
39                 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
40                 'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
41                 'Accept-Encoding':'',
42                 'Accept-Language':'de-DE,de;q=0.8,en-US;q=0.6,en;q=0.4',
43                 'Cache-Control':'no-cache',
44                 'Connection':'keep-alive',
45                 'Host':'www.fernsehserien.de',
46                 'Pragma':'no-cache'
47         }
48
49
50 class Fernsehserien(IdentifierBase):
51         def __init__(self):
52                 IdentifierBase.__init__(self)
53                 
54                 self.license = False
55
56         @classmethod
57         def knowsElapsed(cls):
58                 return True
59
60         @classmethod
61         def knowsToday(cls):
62                 return True
63
64         @classmethod
65         def knowsFuture(cls):
66                 return True
67
68         def getEpisode(self, name, begin, end=None, service=None, channels=[]):
69                 # On Success: Return a single season, episode, title tuple
70                 # On Failure: Return a empty list or String or None
71                 
72                 self.begin = begin
73                 #self.year = datetime.fromtimestamp(begin).year
74                 self.end = end
75                 self.service = service
76                 self.channels = channels
77                 
78                 self.series = ""
79                 self.first = None
80                 self.last = None
81                 self.page = 0
82                 
83                 self.returnvalue = None
84                 
85                 # Check preconditions
86                 if not name:
87                         splog(_("Skip Fernsehserien: No show name specified"))
88                         return _("Skip Fernsehserien: No show name specified")
89                 if not begin:
90                         splog(_("Skip Fernsehserien: No begin timestamp specified"))
91                         return _("Skip Fernsehserien: No begin timestamp specified")
92                 
93                 if self.begin > datetime.now():
94                         self.future = True
95                 else:
96                         self.future = False
97                 splog("Fernsehserien getEpisode future", self.future)
98         
99                 while name:     
100                         ids = self.getSeries(name)
101                         
102                         while ids:
103                                 idserie = ids.pop()
104                                 
105                                 if idserie and len(idserie) == 2:
106                                         id, self.series = idserie
107                                         
108                                         self.page = 0
109                                         #if self.future:
110                                         #       self.page = 0
111                                         #else:
112                                         #       self.page = -1
113                                         
114                                         self.first = None
115                                         self.last = None
116                                         
117                                         while self.page is not None:
118                                                 result = self.getNextPage(id)
119                                                 if result:
120                                                         return result
121                                         
122                         else:
123                                 name = self.getAlternativeSeries(name)
124                 
125                 else:
126                         return ( self.returnvalue or _("No matching series found") )
127
128         def getSeries(self, name):
129                 parameter =  urlencode({ 'term' : re.sub("[^a-zA-Z0-9*]", " ", name) })
130                 url = SERIESLISTURL + parameter
131                 data = self.getPage(url, Headers)
132                 
133                 if data and isinstance(data, basestring):
134                         data = self.parseSeries(data)
135                         self.doCache(url, data)
136                 
137                 if data and isinstance(data, list):
138                         splog("Fernsehserien ids", data)
139                         return data
140
141         def parseSeries(self, data):
142                 serieslist = []
143                 for line in json.loads(data):
144                         id = line['id']
145                         idname = line['value']
146                         splog(id, idname)
147                         serieslist.append( (id, idname) )
148                 serieslist.reverse()
149                 return serieslist
150
151         def parseNextPage(self, data):
152                 trs = []
153                 
154                 # Handle malformed HTML issues
155                 data = data.replace('\\"','"')  # target=\"_blank\"
156                 data = data.replace('\'+\'','') # document.write('<scr'+'ipt
157                 
158                 soup = BeautifulSoup(data)
159                 
160                 table = soup.find('table', 'sendetermine')
161                 if table:
162                         for trnode in table.find_all('tr'):
163                                 # TODO skip first header row
164                                 tdnodes = trnode and trnode.find_all('td')
165                                 
166                                 if tdnodes:
167                                         # Filter for known rows
168                                         #if len(tdnodes) == 7 and len(tdnodes[2].string) >= 15:
169                                         
170                                         if len(tdnodes) >= 6 and tdnodes[2].string and len(tdnodes[2].string) >= 15:
171                                                 tds = []
172                                                 for tdnode in tdnodes:
173                                                         tds.append(tdnode.string or "")
174                                                 trs.append( tds )
175                                         # This row belongs to the previous
176                                         elif trs and len(tdnodes) == 5:
177                                                 #if trs[-1][5] and tdnodes[3].string:
178                                                 trs[-1][5] += ' ' + (tdnodes[3].string or "")
179                                                 #if trs[-1][6] and tdnodes[4].string:
180                                                 trs[-1][6] += ' ' + (tdnodes[4].string or "")
181                                         #else:
182                                         #       splog( "tdnodes", len(tdnodes), tdnodes )
183                                 
184                                 #else:
185                                 #       splog( "tdnodes", tdnodes )
186                 
187                 #splog(trs)
188                 return trs
189
190         def getNextPage(self, id):
191                 url = EPISODEIDURL % (id, self.page)
192                 data = self.getPage(url, Headers)
193                 
194                 if data and isinstance(data, basestring):
195                         data = self.parseNextPage(data)
196                         self.doCache(url, data)
197                 
198                 if data and isinstance(data, list):
199                         
200                         trs = data
201                         # trs[x] = [None, u'31.10.2012', u'20:15\u201321:15 Uhr', u'ProSieben', u'8.', u'15', u'Richtungswechsel']
202
203                         yepisode = None
204                         ydelta = maxint
205                         
206                         #first = trs[0][2]
207                         #last = trs[-1][2]
208                         #print first[0:5]
209                         #print last[6:11] 
210                         
211                         # trs[0] first line [2] second element = timestamps [a:b] use first time
212                         first = datetime.strptime( trs[0][2][0:5] + trs[0][1], "%H:%M%d.%m.%Y" )
213                         
214                         # trs[-1] last line [2] second element = timestamps [a:b] use second time
215                         #last = datetime.strptime( trs[-1][2][6:11] + trs[-1][1], "%H:%M%d.%m.%Y" )
216                         # Problem with wrap around use also start time
217                         # Sa 30.11.2013 23:35 - 01:30 Uhr ProSieben 46 3. 13 Showdown 3
218                         last = datetime.strptime( trs[-1][2][0:5] + trs[-1][1], "%H:%M%d.%m.%Y" )
219                         
220                         first = first - timedelta(seconds=max_time_drift)
221                         last = last + timedelta(seconds=max_time_drift)
222                         
223                         if self.first != first and self.last != last:
224                                 self.first = first
225                                 self.last = last
226                                 
227                                 splog("first, self.begin, last, if ", first, self.begin, last, ( first <= self.begin and self.begin <= last ))
228                                 if ( first <= self.begin and self.begin <= last ):
229                                         #search in page for matching datetime
230                                         for tds in trs:
231                                                 if tds and len(tds) >= 6:  #7:
232                                                         # Grey's Anathomy
233                                                         # [None, u'31.10.2012', u'20:15\u201321:15 Uhr', u'ProSieben', u'8.', u'15', u'Richtungswechsel']
234                                                         # 
235                                                         # Gute Zeiten 
236                                                         # [None, u'20.11.2012', u'06:40\u201307:20 Uhr', u'NDR', None, u'4187', u'Folge 4187']
237                                                         # [None, u'01.12.2012', u'10:45\u201313:15 Uhr', u'RTL', None, u'5131', u'Folge 5131']
238                                                         # [None, u'\xa0', None, u'5132', u'Folge 5132']
239                                                         # [None, u'\xa0', None, u'5133', u'Folge 5133']
240                                                         # [None, u'\xa0', None, u'5134', u'Folge 5134']
241                                                         # [None, u'\xa0', None, u'5135', u'Folge 5135']
242                                                         
243                                                         # Wahnfried
244                                                         # [u'Sa', u'26.12.1987', u'\u2013', u'So', u'27.12.1987', u'1Plus', None]
245                                                         
246                                                         # First part: date, times, channel
247                                                         xdate, xbegin = tds[1:3]
248                                                         #splog( "tds", tds )
249                                                         
250                                                         #xend = xbegin[6:11]
251                                                         xbegin = xbegin[0:5]
252                                                         xbegin = datetime.strptime( xbegin+xdate, "%H:%M%d.%m.%Y" )
253                                                         #xend = datetime.strptime( xend+xdate, "%H:%M%d.%m.%Y" )
254                                                         #print "xbegin", xbegin
255                                                         
256                                                         #Py2.6
257                                                         delta = abs(self.begin - xbegin)
258                                                         delta = delta.seconds + delta.days * 24 * 3600
259                                                         #Py2.7 delta = abs(self.begin - xbegin).total_seconds()
260                                                         splog(self.begin, xbegin, delta, max_time_drift)
261                                                         
262                                                         if delta <= max_time_drift:
263                                                                 
264                                                                 if compareChannels(self.channels, tds[3], self.service):
265                                                                         
266                                                                         if delta < ydelta:
267                                                                                 
268                                                                                 splog( "tds", len(tds), tds )
269                                                                                 if len(tds) >= 10:
270                                                                                         # Second part: s1e1, s1e2,
271                                                                                         xseason = tds[7] or "1"
272                                                                                         xepisode = tds[8]
273                                                                                         xtitle = " ".join(tds[10:])  # Use all available titles
274                                                                                 elif len(tds) >= 7:
275                                                                                         # Second part: s1e1, s1e2,
276                                                                                         xseason = tds[4]
277                                                                                         xepisode = tds[5]
278                                                                                         if xseason and xseason.find(".") != -1:
279                                                                                                 xseason = xseason[:-1]
280                                                                                                 xtitle = " ".join(tds[6:])  # Use all available titles
281                                                                                         else:
282                                                                                                 xseason = "1"
283                                                                                                 xtitle = " ".join(tds[6:])  # Use all available titles
284                                                                                 elif len(tds) == 6:
285                                                                                         xseason = "0"
286                                                                                         xepisode = "0"
287                                                                                         xtitle = tds[5]
288                                                                                 if xseason and xepisode and xtitle and self.series:
289                                                                                         yepisode = (xseason, xepisode, xtitle, self.series)
290                                                                                         ydelta = delta
291                                                                         
292                                                                         else: #if delta >= ydelta:
293                                                                                 break
294                                                                 
295                                                                 else:
296                                                                         self.returnvalue = _("Check the channel name")
297                                                                 
298                                                         elif yepisode:
299                                                                 break
300                                         
301                                         if yepisode:
302                                                 return ( yepisode )
303                                 
304                                 else:
305                                         # TODO calculate next page : use firstrow lastrow datetime
306                                         if not self.future:
307                                                 if first > self.begin:
308                                                         self.page -= 1
309                                                         return
310                                         
311                                         else:
312                                                 if self.begin > last:
313                                                         self.page += 1
314                                                         return
315                 
316                 self.page = None
317                 return