modified: configure.ac
[enigma2-plugins.git] / subsdownloader2 / src / SourceCode / Napisy24_pl.py
1 import httplib
2 import xml.dom.minidom
3 import time
4 import re
5 import os
6 from urllib import quote
7 from operator import itemgetter#, attrgetter
8 from Plugins.Extensions.SubsDownloader2.SourceCode.archives_extractor import zip_extractor
9 from Plugins.Extensions.SubsDownloader2.SourceCode.periscope import SubtitleDatabase
10
11 #  Copyright (C) 2011 Dawid Bankowski <enigma2subsdownloader at gmail.com>
12 #
13 #  This program is free software: you can redistribute it and/or modify
14 #  it under the terms of the GNU General Public License as published by
15 #  the Free Software Foundation, either version 3 of the License, or
16 #  (at your option) any later version.
17 #
18 #  This program is distributed in the hope that it will be useful,
19 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
20 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 #  GNU General Public License for more details.
22 #
23 #  You should have received a copy of the GNU General Public License
24 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
25 #
26
27 class XML_to_Dict():
28     def __init__(self):
29         pass
30     
31     def xmltodict(self, xmlstring):
32         doc = xml.dom.minidom.parseString(xmlstring)
33         self.remove_whilespace_nodes(doc.documentElement)
34         return self.elementtodict(doc.documentElement)
35
36     def elementtodict(self, parent):
37         child = parent.firstChild
38         if (not child):
39                 return None
40         elif (child.nodeType == xml.dom.minidom.Node.TEXT_NODE):
41                 return child.nodeValue
42         
43         d={}
44         while child is not None:
45                 if (child.nodeType == xml.dom.minidom.Node.ELEMENT_NODE):
46                         try:
47                                 d[child.tagName]
48                         except KeyError:
49                                 d[child.tagName]=[]
50                         d[child.tagName].append(self.elementtodict(child))
51                 child = child.nextSibling
52         return d
53
54     def remove_whilespace_nodes(self, node, unlink=True):
55         remove_list = []
56         for child in node.childNodes:
57                 if child.nodeType == xml.dom.Node.TEXT_NODE and not child.data.strip():
58                         remove_list.append(child)
59                 elif child.hasChildNodes():
60                         self.remove_whilespace_nodes(child, unlink)
61         for node in remove_list:
62                 node.parentNode.removeChild(node)
63                 if unlink:
64                         node.unlink()
65
66 class Napisy24_pl(XML_to_Dict,zip_extractor):    
67     def __init__(self,moviePath, movieNameString= None):
68         if movieNameString== None:
69             self.MovieName = ((moviePath.rsplit("/",1))[-1]).rsplit(".",1)[0]
70         else:
71             self.MovieName = (movieNameString)
72         self.MovieDir = (moviePath.rsplit("/",1))[0]
73         self.ZipFilePath = self.MovieDir+'/'+((moviePath.rsplit("/",1))[-1]).rsplit(".",1)[0]+'.zip'
74         self.subtitle_dict = []
75         self.NAPISY24_url = "napisy24.pl"
76     
77     def IMDB_idenifier_search(self):
78         """ Try to find nfo file in movie directory and search in nfo IMDB idetifier"""
79         dir_list = os.listdir(self.MovieDir)
80         dir_count = 0
81         for x in dir_list:
82             if x.split(".")[-1].lower()=="nfo":
83                 print "find NFO in %i list" % dir_count
84                 break
85             dir_count=dir_count+1
86         try:    
87             nfo_file = open(self.MovieDir+"/"+dir_list[dir_count],"r")
88             buffor = nfo_file.read()
89             nfo_file.close
90             #IMDB line in nfo: iMDB: http://www.imdb.com/title/tt1219289/           
91             char_count = 0
92             while (char_count+len("http://www.imdb.com/title/")) < len(buffor):
93                 if buffor[char_count:(char_count+len("http://www.imdb.com/title/"))] == "http://www.imdb.com/title/":
94                     #print "%s" % str(char_count+len("http://www.imdb.com/title/"))
95                     self.dd11 = IMDB_begining = char_count+len("http://www.imdb.com/title/")
96                     break
97                 char_count=char_count+1
98             char_count=IMDB_begining+1   
99             while char_count < len(buffor):
100                 if buffor[char_count:(char_count+1)] == "/":
101                     #print "%s" % str(char_count)
102                     self.dd22 = IMDB_ending = char_count
103                     break
104                 char_count=char_count+1
105             return buffor[IMDB_begining:IMDB_ending]    
106         #tutaj trzeba sprawdzienia IMDB numeru jesli jest oka to zwraca informacje jesli jest nie oka to zwraca blad
107         except:     
108             print "blad IMBN"
109             return False
110     
111     def __connect_with_server(self,get_operatoin,server_reuest_type):
112         """Function connect with server and downloades avaliable subtitle
113         list or avaliable subtitle zip file     
114         """
115         what_is_downloaded = server_reuest_type
116         self.XML_String = None
117         self.zip_string = None
118         try:
119             conn = httplib.HTTPConnection(self.NAPISY24_url)
120             conn.request("GET", get_operatoin)
121             r1 = conn.getresponse()
122             print r1.status, r1.reason
123             if what_is_downloaded == "downloada_subtitle_list_by_film_name" or what_is_downloaded == "downloada_subtitle_list_by_IMDB":
124                 self.XML_String = r1.read()
125             elif what_is_downloaded  == "download_subtilte_zip":                
126                 self.zip_string = r1.read()
127             return r1.status#, r1.reason
128         except (IOError, OSError), e:
129             print >> sys.stderr, "Napisy24.pl server connection error."
130             time.sleep(0.5)
131         
132     def getNapisy24_SubtitleListXML(self, subtitle_list_reuest_type):
133         """Napisy 24 GET request for:
134         - downloada_subtitle_list_by_film_name - downloading subtitle list by movie name,
135         - downloada_subtitle_list_by_IMDB - downloading subtitle list by IMDB identifier found by:
136            IMDB_idenifier_search
137         """
138         repeat = 3
139         if subtitle_list_reuest_type == "downloada_subtitle_list_by_film_name":
140             request_subtitle_list = "/libs/webapi.php?title=%s" % quote(self.MovieName)         
141         elif subtitle_list_reuest_type == "downloada_subtitle_list_by_IMDB":
142             IMDB_search_answer = self.IMDB_idenifier_search()
143             if IMDB_search_answer != False:
144                 request_subtitle_list = "/libs/webapi.php?imdb=%s" % IMDB_search_answer
145             else:
146                 repeat = 0
147                 r1_status = None
148 #           IMDB_search_answer = self.IMDB_idenifier_search()
149 #           if IMDB_search_answer != False:
150 #               request_subtitle_list = "/libs/webapi.php?imdb=%s" % IMDB_search_answer         
151         while repeat > 0:  
152             repeat = repeat - 1
153             r1_status = self.__connect_with_server(request_subtitle_list, "downloada_subtitle_list_by_film_name")            
154             if r1_status != 200 and r1_status != 400:
155                 print  "Fetching subtitle list failed, HTTP code: %s" % (str(r1_status))
156                 time.sleep(0.5)
157                 continue
158             elif r1_status == 400:
159                 print "Fetching subtitle list failed, HTTP code: %s \n Bad request in string: %s." % (str(r1_status), request_subtitle_list)
160                 repeat = -1
161             else:
162                 repeat = 0
163     
164             if self.XML_String == ('brak wynikow'):
165                 print  "Subtitle list NOT FOUND"
166                 repeat = 0
167                 continue
168
169             if self.XML_String is None or self.XML_String == "":
170                 print "Subtitle list download FAILED"
171                 continue
172                 
173         if r1_status != 200 or self.XML_String == 'brak wynikow' or self.XML_String == "" or self.XML_String is None:
174             return False
175         else:
176             if self.return_xml_dict() == True:
177                 return True
178             else:
179                 return False
180               
181     def Correct_MultiRoot_XML(self):
182         """Downloaded XML string isn't compatybil with XML standard in which minidom is written.
183         This function corrects  in downloaded string  stringsome known errors e.g.: multiroot, & char in data, CP1252 encodeing."""
184         if self.XML_String[0] == "\n":
185             self.XML_String=self.XML_String[1:]
186         SECONDLINE_CHAR = 0
187         for x in self.XML_String:
188             SECONDLINE_CHAR = SECONDLINE_CHAR+1
189             if x =="\n":
190                 break
191         self.XML_String = self.XML_String[0:SECONDLINE_CHAR] + "<lista>"+ self.XML_String[(SECONDLINE_CHAR+1):]+"</lista>"
192         self.XML_String = re.sub("&", "and", self.XML_String)
193         self.XML_String = self.XML_String.decode("CP1252").encode("UTF-8")
194     
195     def return_xml_dict(self):
196         """Function returns subtitle dictionary which is computed from correct xml string."""
197         try:
198             self.Correct_MultiRoot_XML()
199             self.subtitle_dict = sorted(self.xmltodict(self.XML_String)['subtitle'],key=itemgetter('imdb','cd'))
200             #self.subtitle_dict = self.xmltodict(self.XML_String)['subtitle']
201             print "XML subtitle list downloaded and converted to dict"
202             return True
203         except:
204             print "XML subtitle list  not downloaded or converterd."
205             return False
206             
207     
208     def return_xml_dict_entry_value(self,dict_entry, dict_entry_position):
209         """From subtitle dictionary function returns value."""
210         value = self.subtitle_dict[dict_entry][dict_entry_position]
211         return value[0]
212
213     def extract_zip_file(self):
214         extractor = zip_extractor(self.ZipFilePath,None,("txt","sub","srt"))
215         # return false if nothing extracted
216         return extractor.extract_zipped_file()
217         #os.remove(self.ZipFilePath)
218
219         
220     def save_downloaded_zip(self, dict_entry_to_download):
221         """Function saves downloaded zip string on given path anf destroy 
222         self.zip_string if saveing is succesfull."""
223         if self.download_subtitle_zip(dict_entry_to_download) == True:
224             try:
225                 zip_file = open(self.ZipFilePath,"wb")
226                 zip_file.write(self.zip_string)
227                 zip_file.close          
228                 print "Zipfile: %s saved on hdd." % self.ZipFilePath
229                 del self.zip_string
230                 return True
231             except:
232                 print "Problems with Zipfile: %s saveing on hdd." % self.ZipFilePath
233                 return False
234         
235     def download_subtitle_zip(self, dict_entry_to_download):
236         """Napisy 24 GET request for subtitle zip downloading. Data is stored in self.zip_string."""
237         request_subtitle_list = "http://napisy24.pl/download/%s/" % str(self.return_xml_dict_entry_value(dict_entry_to_download,'id'))
238         repeat = 3
239         while repeat > 0:  
240             repeat = repeat - 1
241             #request_subtitle_list = "/libs/webapi.php?title=%s" % self.MovieName
242             r1_status = self.__connect_with_server(request_subtitle_list, "download_subtilte_zip")            
243             if r1_status != 302:
244                 print  "Fetching subtitle failed, HTTP code: %s" % (str(r1_status))
245                 time.sleep(0.5)
246                 continue
247             else:
248                 repeat = 0
249     
250             if self.zip_string == None:
251                 print  "Subtitle NOT DOWNLOADED"
252                 repeat = 0
253                 continue
254
255             if self.zip_string is None or self.zip_string == "":
256                 print "Subtitle NOT DOWNLOADED"
257                 continue
258                 
259         if self.zip_string[0:2] == 'PK':
260             print "Success to download subtitle zip."
261             return True
262         else:
263             print "Reild to download subtitle zip."
264             return False
265
266 class GuessFileData_from_FileName(SubtitleDatabase.SubtitleDB):
267     def __init__(self, tvshowRegex, tvshowRegex2, movieRegex):
268         self.tvshowRegex = SubtitleDatabase.tvshowRegex
269         self.tvshowRegex2 = SubtitleDatabase.tvshowRegex2
270         self.movieRegex = SubtitleDatabase.movieRegex
271
272     def return_data_string(self,file_path):
273         file_data = self.guessFileData(file_path)
274         if file_data['type'] == 'tvshow':
275             return str(file_data['name']+" "+str(file_data['season'])+"x"+str(file_data['episode']))
276         elif file_data['type'] =='movie' or file_data['type'] == 'unknown':
277             return str(file_data['name'])
278     
279     def return_movie_data_to_XBMC(self,file_path):
280         fileData = self.guessFileData(file_path)
281         if fileData['type'] == 'tvshow':
282             tvShow = fileData['name']
283             season = fileData['season']
284             episode = fileData['episode']
285             #print fileData
286         elif fileData['type'] =='movie' or fileData['type'] =='unknown':
287             tvShow = []
288             season = []
289             episode = []  
290             #print fileData
291         return fileData['name'], tvShow, season, episode
292         
293         
294 class CompareMovie_and_Subtite_FileData(GuessFileData_from_FileName):
295     def __init__(self, tvshowRegex, tvshowRegex2, movieRegex, file_extentions):
296         self.tvshowRegex = SubtitleDatabase.tvshowRegex
297         self.tvshowRegex2 = SubtitleDatabase.tvshowRegex2
298         self.movieRegex = SubtitleDatabase.movieRegex
299         self.__file_extentions = file_extentions
300     
301     def __movie_file_extensions(self, extensions_dict):
302         movie_file_extensions = []
303         for x in extensions_dict:
304             if extensions_dict[x] == "movie":
305                 movie_file_extensions.append(x)        
306         return movie_file_extensions
307
308     
309     def __return_movie_file_list(self, movie_path):
310         """Funstion takes movie file path and based on EXTENSIONS from myListy.pl
311         returns list of movies in movie file directory"""
312         movie_dir = movie_path.rsplit("/",1)[0]
313         movie_file_list =[]
314         movie_extentionds = self.__movie_file_extensions(self.__file_extentions)
315         for x in os.listdir(movie_dir):
316             if x.rsplit(".",1)[-1]in movie_extentionds:
317                 movie_file_list.append(movie_dir+"/"+x)         
318         #USUNAC URL Z NAPISY24
319         return movie_file_list
320
321     def moviePath_and_movieFileData(self,file_path):
322         self.__file_path = file_path
323         """Function returns structure (file_path, {guesseFileData})"""
324         movie_file_list = self.__return_movie_file_list(file_path)
325         movie_file_data = []
326         for x in movie_file_list:
327             movie_file_data.append((x, self.guessFileData(x)))
328         return movie_file_data
329     
330     def subtitlePath_and_subtitleFileData(self,file_path_list):
331         """Function returns structure (file_path, {guesseFileData})"""
332         subtile_file_data = []        
333         for x in file_path_list:
334             subtile_file_data.append((x, self.guessFileData(x)))
335         return subtile_file_data
336     
337     def compare_movie_and_subtitle_FileData(self, movie_file_data, subtitle_file_data):
338         compare_result = []
339         for x in movie_file_data:
340             wynik = 0
341             for y in subtitle_file_data:
342                 wynik = 0
343                 #Cause and effect for subtitle and movie guesseFileData results
344                 if x[1].has_key('type') and y[1].has_key('type'):
345                     if x[1]['type'] == y[1]['type']:
346                         #wynik = wynik + 0.1600
347                         wynik = wynik +0.0900
348                 if x[1].has_key('name') and y[1].has_key('name'):
349                     if x[1]['name'] == y[1]['name']:
350                         #wynik = wynik +0.0900
351                         wynik = wynik + 0.1600
352                 if x[1].has_key('season') and y[1].has_key('season'):   
353                     if x[1]['season'] == y[1]['season']:
354                         wynik = wynik +0.0225
355                 if x[1].has_key('episode') and y[1].has_key('episode'):  
356                     if x[1]['episode'] == y[1]['episode']:
357                         wynik = wynik +0.0225
358                 if x[1].has_key('season') and y[1].has_key('part'): 
359                     if x[1]['season'] == y[1]['part']:
360                         wynik = wynik +0.0060
361                 if x[1].has_key('episode') and y[1].has_key('part'): 
362                     if x[1]['episode'] == y[1]['part']:
363                         wynik = wynik +0.0060
364                 if x[1].has_key('part') and y[1].has_key('part'): 
365                     if x[1]['part'] == y[1]['part']:
366                         wynik = wynik +0.0400
367                 if x[1].has_key('teams') and y[1].has_key('teams'):
368                     if x[1]['teams'] == y[1]['teams']:
369                         wynik = wynik +0.0025
370                 if x[1].has_key('year') and y[1].has_key('year'):
371                     if x[1]['year'] == y[1]['year']:
372                         wynik = wynik +0.0049
373                 #Cause and effect for subtitle and movie guesseFileData results
374                 compare_result.append({"movie":x[0],"subtitle":y[0],"propability": wynik})                       
375                # print x[0], y[0], wynik
376         return compare_result
377         #musi sprawdzic czy film jest najbardziej prawdopodobny
378             
379         
380     def give_movie_subtitle_consistent_data(self, movie_file_data, subtitle_file_data):
381         """Returns best matching movie <--> subtitle table."""
382         m_s_temp_data = []
383         preliminary_movie_subtitle_list = self.compare_movie_and_subtitle_FileData(movie_file_data, subtitle_file_data)
384         for x in preliminary_movie_subtitle_list:
385             """ Delete 0 'propability' registry"""
386             if x['propability'] != 0:
387                 m_s_temp_data.append(x)
388
389         temp_movieList = []
390         for x in m_s_temp_data:
391             """Check what movies are still in registry"""
392             if x['movie'] not in temp_movieList:
393                 temp_movieList.append(x['movie'])
394                 
395         final_movie_subtitle_list = []
396         matching_movie = False
397         for x in temp_movieList:
398             """For all movies in temp_movieList checks best subtitles"""
399             final_propability = 0
400             for y in preliminary_movie_subtitle_list:
401                 if y['movie'] == x and  y['propability'] > final_propability:
402                     if self.__file_path == y['movie']:
403                         """Check it primary movie is in results matching_movie = True"""
404                         matching_movie = True
405                     best_entry = y
406                     final_propability = y['propability']
407             final_movie_subtitle_list.append(best_entry)   
408             
409         """Filtering by subtitle name - if there is no multiple subtitles"""
410         preliminary_movie_subtitle_list = final_movie_subtitle_list
411         temp_movieList = [] #now subtile
412         for x in preliminary_movie_subtitle_list:
413             """Check what movies are still in registry"""
414             if x['subtitle'] not in temp_movieList:
415                 temp_movieList.append(x['subtitle'])
416                                 
417         final_movie_subtitle_list = []  
418         matching_movie = False
419         for x in temp_movieList: #now subtile
420             """For all subtitles in temp_movieList which now is subtitle checks best movie
421             This makes that one subtitle don't belong to multi movies.      
422             """
423             final_propability = 0
424             for y in preliminary_movie_subtitle_list:
425                 if y['subtitle'] == x and  y['propability'] > final_propability:
426                     if self.__file_path == y['movie']:
427                         """Check it primary movie is in results matching_movie = True"""
428                         matching_movie = True
429                     best_entry = y
430                     final_propability = y['propability']
431             final_movie_subtitle_list.append(best_entry)
432         
433         if  matching_movie == True:
434             return final_movie_subtitle_list
435         else:
436             return []