SeriesPlugin 1.2.8: Fixed multiple timer / movie handling
[enigma2-plugins.git] / seriesplugin / src / Identifiers / Fernsehserien.py
1 # by betonme @2012
2
3 import os, sys
4 import math
5 from sys import maxint
6
7 from Components.config import config
8 from Tools.BoundFunction import boundFunction
9
10 # Imports
11 from urllib import urlencode
12
13 #from HTMLParser import HTMLParser
14
15 from time import time
16 from datetime import datetime, timedelta
17
18 import json
19
20 import re
21
22 # Internal
23 from Plugins.Extensions.SeriesPlugin.IdentifierBase import IdentifierBase
24 from Plugins.Extensions.SeriesPlugin.Channels import compareChannels
25 from Plugins.Extensions.SeriesPlugin.Logger import splog
26
27 #sys.path.append(os.path.dirname( os.path.realpath( __file__ ) ) + '/bs4')
28 #sys.path.append(os.path.dirname( os.path.realpath( __file__ ) ) + '/bs4/builder')
29 from bs4 import BeautifulSoup
30
31 # Constants
32 SERIESLISTURL = "http://www.fernsehserien.de/suche?"
33 EPISODEIDURL = 'http://www.fernsehserien.de%s/sendetermine/%d'
34
35 max_time_drift = int(config.plugins.seriesplugin.max_time_drift.value) * 60
36
37 Headers = {
38                 'User-Agent' : 'Mozilla/5.0',
39                 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
40                 'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
41                 'Accept-Encoding':'',
42                 'Accept-Language':'de-DE,de;q=0.8,en-US;q=0.6,en;q=0.4',
43                 'Cache-Control':'no-cache',
44                 'Connection':'keep-alive',
45                 'Host':'www.fernsehserien.de',
46                 'Pragma':'no-cache'
47         }
48
49
50 class Fernsehserien(IdentifierBase):
51         def __init__(self):
52                 IdentifierBase.__init__(self)
53
54         @classmethod
55         def knowsElapsed(cls):
56                 return True
57
58         @classmethod
59         def knowsToday(cls):
60                 return True
61
62         @classmethod
63         def knowsFuture(cls):
64                 return True
65
66         def getEpisode(self, name, begin, end=None, service=None, channels=[]):
67                 # On Success: Return a single season, episode, title tuple
68                 # On Failure: Return a empty list or String or None
69                 
70                 self.begin = begin
71                 #self.year = datetime.fromtimestamp(begin).year
72                 self.end = end
73                 self.service = service
74                 self.channels = channels
75                 
76                 self.series = ""
77                 self.first = None
78                 self.last = None
79                 self.page = 0
80                 
81                 self.knownids = []
82                 self.returnvalue = None
83                 
84                 # Check preconditions
85                 if not name:
86                         splog(_("Skip Fernsehserien: No show name specified"))
87                         return _("Skip Fernsehserien: No show name specified")
88                 if not begin:
89                         splog(_("Skip Fernsehserien: No begin timestamp specified"))
90                         return _("Skip Fernsehserien: No begin timestamp specified")
91                 
92                 if self.begin > datetime.now():
93                         self.future = True
94                 else:
95                         self.future = False
96                 splog("Fernsehserien getEpisode future", self.future)
97         
98                 while name:     
99                         ids = self.getSeries(name)
100                         
101                         while ids:
102                                 idserie = ids.pop()
103                                 
104                                 if idserie and len(idserie) == 2:
105                                         id, self.series = idserie
106                                         
107                                         self.page = 0
108                                         #if self.future:
109                                         #       self.page = 0
110                                         #else:
111                                         #       self.page = -1
112                                         
113                                         self.first = None
114                                         self.last = None
115                                         
116                                         while self.page is not None:
117                                                 result = self.getNextPage(id)
118                                                 if result:
119                                                         return result
120                                         
121                         else:
122                                 name = self.getAlternativeSeries(name)
123                 
124                 else:
125                         return ( self.returnvalue or _("No matching series found") )
126
127         def getSeries(self, name):
128                 parameter =  urlencode({ 'term' : re.sub("[^a-zA-Z0-9*]", " ", name) })
129                 url = SERIESLISTURL + parameter
130                 data = self.getPage(url, Headers)
131                 
132                 if data and isinstance(data, basestring):
133                         data = self.parseSeries(data)
134                         self.doCache(url, data)
135                 
136                 if data and isinstance(data, list):
137                         splog("Fernsehserien ids", data)
138                         return self.filterKnownIds(data)
139
140         def parseSeries(self, data):
141                 serieslist = []
142                 for line in json.loads(data):
143                         id = line['id']
144                         idname = line['value']
145                         splog(id, idname)
146                         serieslist.append( (id, idname) )
147                 serieslist.reverse()
148                 return serieslist
149
150         def parseNextPage(self, data):
151                 trs = []
152                 
153                 # Handle malformed HTML issues
154                 data = data.replace('\\"','"')  # target=\"_blank\"
155                 data = data.replace('\'+\'','') # document.write('<scr'+'ipt
156                 
157                 soup = BeautifulSoup(data)
158                 
159                 table = soup.find('table', 'sendetermine')
160                 if table:
161                         for trnode in table.find_all('tr'):
162                                 # TODO skip first header row
163                                 tdnodes = trnode and trnode.find_all('td')
164                                 
165                                 if tdnodes:
166                                         # Filter for known rows
167                                         #if len(tdnodes) == 7 and len(tdnodes[2].string) >= 15:
168                                         
169                                         if len(tdnodes) >= 6 and tdnodes[2].string and len(tdnodes[2].string) >= 15:
170                                                 tds = []
171                                                 for tdnode in tdnodes:
172                                                         tds.append(tdnode.string or "")
173                                                 trs.append( tds )
174                                         # This row belongs to the previous
175                                         elif trs and len(tdnodes) == 5:
176                                                 #if trs[-1][5] and tdnodes[3].string:
177                                                 trs[-1][5] += ' ' + (tdnodes[3].string or "")
178                                                 #if trs[-1][6] and tdnodes[4].string:
179                                                 trs[-1][6] += ' ' + (tdnodes[4].string or "")
180                                         #else:
181                                         #       splog( "tdnodes", len(tdnodes), tdnodes )
182                                 
183                                 #else:
184                                 #       splog( "tdnodes", tdnodes )
185                 
186                 #splog(trs)
187                 return trs
188
189         def getNextPage(self, id):
190                 url = EPISODEIDURL % (id, self.page)
191                 data = self.getPage(url, Headers)
192                 
193                 if data and isinstance(data, basestring):
194                         splog("getNextPage: basestring")
195                         data = self.parseNextPage(data)
196                         self.doCache(url, data)
197                 
198                 if data and isinstance(data, list):
199                         splog("getNextPage: list")
200                         
201                         trs = data
202                         # trs[x] = [None, u'31.10.2012', u'20:15\u201321:15 Uhr', u'ProSieben', u'8.', u'15', u'Richtungswechsel']
203
204                         yepisode = None
205                         ydelta = maxint
206                         
207                         #first = trs[0][2]
208                         #last = trs[-1][2]
209                         #print first[0:5]
210                         #print last[6:11] 
211                         
212                         # trs[0] first line [2] second element = timestamps [a:b] use first time
213                         first = datetime.strptime( trs[0][2][0:5] + trs[0][1], "%H:%M%d.%m.%Y" )
214                         
215                         # trs[-1] last line [2] second element = timestamps [a:b] use second time
216                         #last = datetime.strptime( trs[-1][2][6:11] + trs[-1][1], "%H:%M%d.%m.%Y" )
217                         # Problem with wrap around use also start time
218                         # Sa 30.11.2013 23:35 - 01:30 Uhr ProSieben 46 3. 13 Showdown 3
219                         last = datetime.strptime( trs[-1][2][0:5] + trs[-1][1], "%H:%M%d.%m.%Y" )
220                         
221                         #first = first - timedelta(seconds=max_time_drift)
222                         #last = last + timedelta(seconds=max_time_drift)
223                         
224                         splog("getNextPage: self.first, first, self.last, last, if: ", self.first, first, self.last, last, (self.first != first and self.last != last))
225                         if self.first != first or self.last != last:
226                                 self.first = first
227                                 self.last = last
228                                 
229                                 splog("first, self.begin, last, if, if:", first, self.begin, last, ( first <= self.begin and self.begin <= last ), ( first >= self.begin and self.begin >= last ) )
230                                 if ( ( first <= self.begin and self.begin <= last ) or ( first >= self.begin and self.begin >= last ) ):
231                                         #search in page for matching datetime
232                                         for tds in trs:
233                                                 if tds and len(tds) >= 6:  #7:
234                                                         # Grey's Anathomy
235                                                         # [None, u'31.10.2012', u'20:15\u201321:15 Uhr', u'ProSieben', u'8.', u'15', u'Richtungswechsel']
236                                                         # 
237                                                         # Gute Zeiten 
238                                                         # [None, u'20.11.2012', u'06:40\u201307:20 Uhr', u'NDR', None, u'4187', u'Folge 4187']
239                                                         # [None, u'01.12.2012', u'10:45\u201313:15 Uhr', u'RTL', None, u'5131', u'Folge 5131']
240                                                         # [None, u'\xa0', None, u'5132', u'Folge 5132']
241                                                         # [None, u'\xa0', None, u'5133', u'Folge 5133']
242                                                         # [None, u'\xa0', None, u'5134', u'Folge 5134']
243                                                         # [None, u'\xa0', None, u'5135', u'Folge 5135']
244                                                         
245                                                         # Wahnfried
246                                                         # [u'Sa', u'26.12.1987', u'\u2013', u'So', u'27.12.1987', u'1Plus', None]
247                                                         
248                                                         # First part: date, times, channel
249                                                         xdate, xbegin = tds[1:3]
250                                                         #splog( "tds", tds )
251                                                         
252                                                         #xend = xbegin[6:11]
253                                                         xbegin = xbegin[0:5]
254                                                         xbegin = datetime.strptime( xbegin+xdate, "%H:%M%d.%m.%Y" )
255                                                         #xend = datetime.strptime( xend+xdate, "%H:%M%d.%m.%Y" )
256                                                         #print "xbegin", xbegin
257                                                         
258                                                         #Py2.6
259                                                         delta = abs(self.begin - xbegin)
260                                                         delta = delta.seconds + delta.days * 24 * 3600
261                                                         #Py2.7 delta = abs(self.begin - xbegin).total_seconds()
262                                                         splog(self.begin, xbegin, delta, max_time_drift)
263                                                         
264                                                         if delta <= max_time_drift:
265                                                                 
266                                                                 if compareChannels(self.channels, tds[3], self.service):
267                                                                         
268                                                                         if delta < ydelta:
269                                                                                 
270                                                                                 splog( "tds", len(tds), tds )
271                                                                                 if len(tds) >= 10:
272                                                                                         # Second part: s1e1, s1e2,
273                                                                                         xseason = tds[7] or "1"
274                                                                                         xepisode = tds[8]
275                                                                                         xtitle = " ".join(tds[10:])  # Use all available titles
276                                                                                 elif len(tds) >= 7:
277                                                                                         # Second part: s1e1, s1e2,
278                                                                                         xseason = tds[4]
279                                                                                         xepisode = tds[5]
280                                                                                         if xseason and xseason.find(".") != -1:
281                                                                                                 xseason = xseason[:-1]
282                                                                                                 xtitle = " ".join(tds[6:])  # Use all available titles
283                                                                                         else:
284                                                                                                 xseason = "1"
285                                                                                                 xtitle = " ".join(tds[6:])  # Use all available titles
286                                                                                 elif len(tds) == 6:
287                                                                                         xseason = "0"
288                                                                                         xepisode = "0"
289                                                                                         xtitle = tds[5]
290                                                                                 if xseason and xepisode and xtitle and self.series:
291                                                                                         yepisode = (xseason, xepisode, xtitle, self.series)
292                                                                                         ydelta = delta
293                                                                         
294                                                                         else: #if delta >= ydelta:
295                                                                                 break
296                                                                 
297                                                                 else:
298                                                                         self.returnvalue = _("Check the channel name")
299                                                                 
300                                                         elif yepisode:
301                                                                 break
302                                         
303                                         if yepisode:
304                                                 return ( yepisode )
305                                 
306                                 else:
307                                         # TODO calculate next page : use firstrow lastrow datetime
308                                         if not self.future:
309                                                 if first > self.begin:
310                                                         self.page -= 1
311                                                         return
312                                         
313                                         else:
314                                                 if self.begin > last:
315                                                         self.page += 1
316                                                         return
317                 
318                 self.page = None
319                 return