SeriesPlugin 1.0: First public version
[enigma2-plugins.git] / seriesplugin / src / identifiers / WunschlisteFeed.py
1 # by betonme @2012
2
3 # Imports
4 from Components.config import config
5
6 from Tools.BoundFunction import boundFunction
7
8 from urllib import urlencode
9
10 from HTMLParser import HTMLParser
11
12 from datetime import datetime
13
14 import re
15 from sys import maxint
16
17 # Internal
18 from Plugins.Extensions.SeriesPlugin.IdentifierBase import IdentifierBase
19 from Plugins.Extensions.SeriesPlugin.Channels import compareChannels
20 from Plugins.Extensions.SeriesPlugin.Logger import splog
21
22 from iso8601 import parse_date
23
24 # Constants
25 SERIESLISTURL     = "http://www.wunschliste.de/ajax/search_dropdown.pl?"
26 EPISODEIDURLATOM  = "http://www.wunschliste.de/xml/atom.pl?"
27 #EPISODEIDURLRSS  = "http://www.wunschliste.de/xml/rss.pl?"
28
29 # Series: EpisodeTitle (Season.Episode) - Weekday Date, Time / Channel (Country)
30 # Two and a Half Men: Der Mittwochs-Mann (1.5) - Mi 02.05., 19.50:00 Uhr / TNT Serie (Pay-TV)
31 # Two and a Half Men: Der Mittwochs-Mann (1.5) - Mi 02.05., 19.50:00 Uhr / TNT Serie
32 # Two and a Half Men: Der Mittwochs-Mann (1) (1.5) - Mi 02.05., 19.50:00 Uhr / TNT Serie
33 # Der Troedeltrupp - Das Geld liegt im Keller: Folge 109 (109) - Do 03.05., 16.15:00 Uhr / RTL II
34 # Galileo: U.a.: Die schaerfste Chili der Welt - Fr 04.05., 19.05:00 Uhr / ProSieben
35 # Galileo: Magazin mit Aiman Abdallah, BRD 2012 - Mi 09.05., 06.10:00 Uhr / ProSieben
36 # Gute Zeiten, schlechte Zeiten: Folgen 4985 - 4988 (21.84) - Sa 05.05., 11.00:00 Uhr / RTL
37 # Channel is between last / and ( or line end
38 CompiledRegexpAtomChannel = re.compile('\/(?!.*\/) ([^\(]+)')
39 # Date is between last - and channel
40 CompiledRegexpAtomDate = re.compile('-(?!.*-) (.+)')
41 # Find optional episode
42 CompiledRegexpAtomEpisode = re.compile('\((?!.*\()(.+)\) ')
43 # Series: Title
44 CompiledRegexpAtomTitle = re.compile('.+: (.+)')
45
46 # (Season.Episode) - EpisodeTitle
47 # (21.84) Folge 4985
48 # (105) Folge 105
49 # (4.11/4.11) Mama ist die Beste/Rund um die Uhr
50 # Galileo: Die schaerfste Chili der Welt
51 # Galileo: Jumbo auf Achse: Muelltonnenkoch
52 # Gute Zeiten, schlechte Zeiten: Folgen 4985 - 4988 (21.84) - Sa 05.05., 11.00:00 Uhr / RTL
53 #CompiledRegexpPrintTitle = re.compile( '(\(.*\) )?(.+)')
54
55 CompiledRegexpEpisode = re.compile( '((\d+)[\.x])?(\d+)')
56
57
58 class WLAtomParser(HTMLParser):
59         def __init__(self):
60                 HTMLParser.__init__(self)
61                 self.title = False
62                 self.updated = False
63                 self.titlestr = ''
64                 self.updatedstr = ''
65                 self.list = []
66
67         def handle_starttag(self, tag, attributes):
68                 if tag == 'title':
69                         self.title = True
70                 elif tag == 'updated':
71                         self.updated = True
72
73         def handle_endtag(self, tag):
74                 if tag == 'title':
75                         self.title = False
76                 elif tag == 'updated':
77                         self.updated = False
78                 elif tag == 'entry':
79                         self.list.append( (self.titlestr, self.updatedstr) )
80                         self.titlestr = ''
81                         self.updatedstr = ''
82
83         def handle_data(self, data):
84                 if self.title:
85                         self.titlestr += data
86                 elif self.updated:
87                         self.updatedstr = data
88
89
90 class WunschlisteFeed(IdentifierBase):
91         def __init__(self):
92                 IdentifierBase.__init__(self)
93                 
94                 self.license = False
95
96         @classmethod
97         def knowsToday(cls):
98                 return False
99
100         @classmethod
101         def knowsFuture(cls):
102                 return True
103
104         def getEpisode(self, name, begin, end=None, service=None, channels=[]):
105                 # On Success: Return a single season, episode, title tuple
106                 # On Failure: Return a empty list or String or None
107                 
108                 self.begin = begin
109                 self.end = end
110                 self.service = service
111                 self.channels = channels
112                 
113                 self.returnvalue = None
114                 
115                 # Check preconditions
116                 if not name:
117                         splog(_("Skip Wunschliste: No show name specified"))
118                         return _("Skip Wunschliste: No show name specified")
119                 if not begin:
120                         splog(_("Skip Wunschliste: No begin timestamp specified"))
121                         return _("Skip Wunschliste: No begin timestamp specified")
122                 
123                 splog("WunschlisteFeed getEpisode")
124                 
125                 while name:     
126                         ids = self.getSeries(name)
127                         
128                         while ids:
129                                 idserie = ids.pop()
130                                 
131                                 if idserie and len(idserie) == 2:
132                                         id, self.series = idserie
133                                         
134                                         result = self.getNextPage( id )
135                                         if result:
136                                                 return result
137                                         
138                         else:
139                                 name = self.getAlternativeSeries(name)
140                 
141                 else:
142                         return ( self.returnvalue or _("No matching series found") )
143
144         def getSeries(self, name):
145                 url = SERIESLISTURL + urlencode({ 'q' : re.sub("[^a-zA-Z0-9*]", " ", name) })
146                 data = self.getPage( url )
147                 
148                 if data and isinstance(data, basestring):
149                         data = self.parseSeries(data)
150                         self.doCache(url, data)
151                 
152                 if data and isinstance(data, list):
153                         splog("WunschlisteFeed ids", data)
154                         return data
155
156         def parseSeries(self, data):
157                 serieslist = []
158                 for line in data.splitlines():
159                         values = line.split("|")
160                         if len(values) == 3:
161                                 idname, countryyear, id = values
162                                 splog(id, idname)
163                                 serieslist.append( (id, idname) )
164                         else:
165                                 splog("WunschlisteFeed: ParseError: " + str(line))
166                 serieslist.reverse()
167                 return serieslist
168
169         def parseNextPage(self, data):
170                 # Handle malformed HTML issues
171                 data = data.replace('&','&')  # target=\"_blank\"&
172                 parser = WLAtomParser()
173                 parser.feed(data)
174                 #splog(parser.list)
175                 return parser.list
176         
177         def getNextPage(self, id):
178                 splog("WunschlisteFeed getNextPage")
179                 
180                 url = EPISODEIDURLATOM + urlencode({ 's' : id })
181                 data = self.getPage( url )
182                 
183                 if data and isinstance(data, basestring):
184                         data = self.parseNextPage(data)
185                         self.doCache(url, data)
186                 
187                 if data and isinstance(data, list):
188                         trs = data
189                         
190                         yepisode = None
191                         ydelta = maxint
192                         
193                         for tds in trs:
194                                 if tds and len(tds) == 2:
195                                         xtitle, xupdated = tds
196                                         if xtitle is not None and xupdated is not None:
197                                                 #import iso8601
198                                                 #http://code.google.com/p/pyiso8601/
199                                                 xbegin = parse_date(xupdated)
200                                                 xbegin = xbegin.replace(tzinfo=None)
201                                                 
202                                                 #Py2.6
203                                                 delta = abs(self.begin - xbegin)
204                                                 delta = delta.seconds + delta.days * 24 * 3600
205                                                 #Py2.7 delta = abs(self.begin - xbegin).total_seconds()
206                                                 splog(self.begin, xbegin, delta, int(config.plugins.seriesplugin.max_time_drift.value)*60)
207                                                 
208                                                 if delta <= int(config.plugins.seriesplugin.max_time_drift.value) * 60:
209                                                         result = CompiledRegexpAtomChannel.search(xtitle)
210                                                         if result and len(result.groups()) >= 1:
211                                                                 
212                                                                 if compareChannels(self.channels, result.group(1), self.service):
213                                                                         
214                                                                         if delta < ydelta:
215                                                                                 # Slice string to remove channel
216                                                                                 xtitle = xtitle[:result.start()]
217                                                                                 result = CompiledRegexpAtomDate.search(xtitle)
218                                                                                 
219                                                                                 if result and len(result.groups()) >= 1:
220                                                                                         # Slice string to remove date
221                                                                                         xtitle = xtitle[:result.start()]
222                                                                                         result = CompiledRegexpAtomEpisode.search(xtitle)
223                                                                                         
224                                                                                         if result and len(result.groups()) >= 1:
225                                                                                                 # Extract season and episode
226                                                                                                 xepisode = result.group(1)
227                                                                                                 # Slice string to remove season and episode
228                                                                                                 xtitle = xtitle[:result.start()]
229                                                                                                 
230                                                                                                 result = CompiledRegexpEpisode.search(xepisode)
231                                                                                                 if result and len(result.groups()) >= 3:
232                                                                                                         xseason = result and result.group(2) or "1"
233                                                                                                         xepisode = result and result.group(3) or "0"
234                                                                                                 else:
235                                                                                                         splog("WunschlisteFeed wrong episode format", xepisode)
236                                                                                                         xseason = "1"
237                                                                                                         xepisode = "0"
238                                                                                         else:
239                                                                                                 splog("WunschlisteFeed wrong title format", xtitle)
240                                                                                                 xseason = "0"
241                                                                                                 xepisode = "0"
242                                                                                         result = CompiledRegexpAtomTitle.search(xtitle)
243                                                                                         
244                                                                                         if result and len(result.groups()) >= 1:
245                                                                                                 # Extract episode title
246                                                                                                 xtitle = result.group(1)
247                                                                                                 yepisode = (xseason, xepisode, xtitle.decode('ISO-8859-1').encode('utf8'), self.series.decode('ISO-8859-1').encode('utf8'))
248                                                                                                 ydelta = delta
249                                                                         
250                                                                         else: #if delta >= ydelta:
251                                                                                 break
252                                                                 
253                                                                 else:
254                                                                         self.returnvalue = _("Check the channel name")
255                                                                 
256                                                 elif yepisode:
257                                                         break
258                         
259                         if yepisode:
260                                 return ( yepisode )