modified: Makefile.am
[enigma2-plugins.git] / SubsDownloader2 / src / SourceCode / anysub2srt.py
1 import os\r
2 import re\r
3 import sys\r
4 import hashlib\r
5 #import shutil\r
6 import urllib\r
7 import codecs\r
8 #from Screens.MessageBox import MessageBox\r
9 \r
10 """ Convert subtitles to SRT format based on napi2srt.py by 2009-11-02 Pawel Sternal <sternik@gmail.com>\r
11 modification by 2011-05-20 SileliS <silelis@tlen.pl>\r
12 """\r
13 \r
14 class SubConv():\r
15     #def __init__(self, subtitle_path):\r
16     def __init__(self, subtitle_path, encoding):\r
17         self.encodeing = encoding\r
18         self.subtitle = subtitle_path\r
19         file = codecs.open(self.subtitle,'r',self.encodeing, errors = "ignore")\r
20         self.subs_file = file.readlines()\r
21         file.close()\r
22   \r
23         \r
24     def detect_format(self, list):\r
25         """Detects format of readed subtittes and return information about format if unknown returns: "" """\r
26         re_mdvd = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)")\r
27         re_srt = re.compile("^(\d+):(\d+):(\d+),\d+\s*-->.*")\r
28         re_tmp = re.compile("^(\d+):(\d+):(\d+):(.*)")\r
29         re_sub2 = re.compile("^(\d+):(\d+):(\d+)\.\d+\s*\,.*")\r
30         re_mpl2 = re.compile("\[(?P<start>\d+)\]\[(?P<stop>\d+)\](?P<line>.*)", re.S)\r
31         #for line in list:\r
32         while len(list) > 0 :\r
33             line = list.pop(0)\r
34             if re_mdvd.match(line):\r
35                 return "mdvd"\r
36                 break\r
37             elif re_srt.match(line):\r
38                 return "srt"\r
39                 break\r
40             elif re_tmp.match(line):\r
41                 return "tmp"\r
42                 break\r
43             elif re_sub2.match(line):\r
44                 return "sub2"\r
45                 break\r
46             elif re_mpl2.match(line):              \r
47                 return "mpl2" \r
48                 break       \r
49             #becouse file is saved as mdvd returns mdvd value\r
50         print "Unsupported subtitle format appears. Please send this subtitle to developer."\r
51         ####################################################\r
52         #"""KOMUNIKAT message box"""\r
53         ####################################################\r
54         return "None"\r
55 \r
56     def read_mdvd(self,list,fps):\r
57         """\r
58     Read micro-dvd subtitles.\r
59     input: contents of a file as list\r
60     returns: list of subtitles in form: [[time_start in secs, time_end in secs, line1, ...],....]\r
61     """\r
62         re1 = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)")\r
63         subtitles = []\r
64         while len(list)>0:\r
65             try:\r
66                 m = re1.match(list.pop(0), 0)\r
67                 if m:\r
68                     subt = [int(m.group(1)) / float(fps)]\r
69                     if m.group(2):\r
70                         subt.append(int(m.group(2)) / float(fps))\r
71                     else:\r
72                         subt.append(int(m.group(1)) / float(fps) + 3)\r
73                     subt.extend(m.group(3).strip().split("|"))\r
74                     subtitles.append(subt)\r
75             except:\r
76                 sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")\r
77         return subtitles\r
78 \r
79     def read_sub2(self,list):\r
80         """\r
81 Reads subviewer 2.0 format subtitles, e.g. :\r
82 00:01:54.75,00:01:58.54\r
83 You shall not pass!\r
84 input: contents of a file as list\r
85 returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]\r
86 """\r
87         re1 = re.compile("^(\d+):(\d+):(\d+)\.(\d+)\s*\,\s*(\d+):(\d+):(\d+)\.(\d+).*$")\r
88         subtitles = []\r
89         while len(list)>0:\r
90             try:\r
91                 m = re1.match(list.pop(0), 0)\r
92                 if m:\r
93                     subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/100.0]\r
94                     subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/100.0)\r
95                     l = list.pop(0).strip()\r
96                     lines = l.split("[br]")\r
97                     for i in range(0,len(lines)):\r
98                         subt.append(lines[i])\r
99                     subtitles.append(subt)\r
100             except:\r
101                 sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")\r
102         return subtitles\r
103     \r
104 #    try:\r
105 #            while len(list)>0:\r
106 #                m = re1.match(list.pop(0), 0)\r
107 #                if m:\r
108 #                    subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/100.0]\r
109 #                    subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/100.0)\r
110 #                    l = list.pop(0).strip()\r
111 #                    lines = l.split("[br]")\r
112 #                    for i in range(0,len(lines)):\r
113 #                        subt.append(lines[i])\r
114 #                    subtitles.append(subt)\r
115 #        except IndexError:\r
116 #            sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")\r
117 #       return subtitles\r
118     \r
119     \r
120     def read_srt(self,list):\r
121         """\r
122 Reads srt subtitles.\r
123 input: contents of a file as list\r
124 returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]\r
125 """\r
126         re1 = re.compile("^(\d+)\s*$")\r
127         re2 = re.compile("^(\d+):(\d+):(\d+),(\d+)\s*-->\s*(\d+):(\d+):(\d+),(\d+).*$")\r
128         re3 = re.compile("^\s*$")\r
129         subtitles = []\r
130         while len(list)>0:\r
131             try:\r
132                 if re1.match(list.pop(0), 0):\r
133                     m = re2.match(list.pop(0), 0)\r
134                     if m:\r
135                         subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/1000.0]\r
136                         subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/1000.0)\r
137                         l = list.pop(0)\r
138                         while not re3.match(l, 0):\r
139                             subt.append(l.strip())\r
140                             l = list.pop(0)\r
141                         subtitles.append(subt)\r
142         #except IndexError:\r
143             except:\r
144                 sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")\r
145         return subtitles\r
146        \r
147 #       try:\r
148 #            while len(list)>0:\r
149 #                if re1.match(list.pop(0), 0):\r
150 #                    m = re2.match(list.pop(0), 0)\r
151 #                    if m:\r
152 #                        subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/1000.0]\r
153 #                        subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/1000.0)\r
154 #                        l = list.pop(0)\r
155 #                        while not re3.match(l, 0):\r
156 #                            subt.append(l.strip())\r
157 #                            l = list.pop(0)\r
158 #                        subtitles.append(subt)\r
159 #        #except IndexError:\r
160 #       except:\r
161 #            sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")\r
162 #        return subtitles\r
163 \r
164     def read_tmp(self,list):\r
165         """\r
166 Reads tmplayer (tmp) subtitles.\r
167 input: contents of a file as list\r
168 returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]\r
169 """\r
170         re1 = re.compile("^(\d+):(\d+):(\d+):(.*)")\r
171         subtitles = []\r
172         subs={}\r
173         while len(list)>0:\r
174             try:\r
175                 m = re1.match(list.pop(0), 0)\r
176                 if m:\r
177                     time = int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3))\r
178                     if subs.has_key(time) :\r
179                         subs[time].extend(m.group(4).strip().split("|"))\r
180                     else:\r
181                         subs[time] = m.group(4).strip().split("|")\r
182             except:\r
183                 sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")\r
184                 \r
185         times = subs.keys()\r
186         times.sort()\r
187         for i in range(0,len(times)):\r
188             next_time = 1;\r
189             while not subs.has_key(times[i]+next_time) and next_time < 4 :\r
190                 next_time = next_time + 1\r
191             subt = [ times[i] , times[i] + next_time]\r
192             subt.extend(subs[times[i]])\r
193             subtitles.append(subt)\r
194         return subtitles\r
195 \r
196 \r
197     def read_mpl2(self,list):\r
198             MPL2LINE = re.compile("\[(?P<start>\d+)\]\[(?P<stop>\d+)\](?P<line>.*)", re.S)\r
199             #FRAMERATE = float(fps)\r
200             subtitles = []\r
201             while len(list)>0:\r
202             #for line in list:  \r
203                 try:\r
204                     group = MPL2LINE.match(list.pop(0)).groupdict()\r
205                     start = float(float(group["start"])/10) #*0.1*FRAMERATE) or 1\r
206                     stop = float(float(group["stop"])/10)#*0.1*FRAMERATE)\r
207                     rest = group["line"]\r
208                     temp=[float(start), float(stop), str(rest).replace('|','\n')]\r
209                     subtitles.append(temp)\r
210                 except:\r
211                     sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")\r
212             return subtitles    \r
213     \r
214     \r
215     \r
216     def check_subs_long(self,subtitles_standard_list, fps):\r
217         """takes list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]\r
218         and checks in end time of subtittle in not longer then next subtitle start time if yes correct this error"""\r
219         loops = len(subtitles_standard_list)-1\r
220         x=0\r
221         while x < loops:\r
222             if subtitles_standard_list[x][1] is None:\r
223                 subtitles_standard_list[x][1] = subtitles_standard_list[x][1]+ 6* fps\r
224             if subtitles_standard_list[x][1] >= subtitles_standard_list[x+1][0]:\r
225                 if (subtitles_standard_list[x][1] - 0.1) <= subtitles_standard_list[x][0]:\r
226                     subtitles_standard_list[x][1] = (subtitles_standard_list[x][0] + subtitles_standard_list[x+1][0])/2\r
227                 else:\r
228                     subtitles_standard_list[x][1] = subtitles_standard_list[x][1] - 0.1\r
229                 print "Subtitle end time error detected. Line no. %d was corrected" % x\r
230             x = x+1\r
231         return subtitles_standard_list\r
232 \r
233     def to_srt(self,list):\r
234         """\r
235         Converts list of subtitles (internal format) to srt format\r
236         """\r
237         outl = []\r
238         count = 1\r
239         for l in list:\r
240             secs1 = l[0]\r
241             h1 = int(secs1/3600)\r
242             m1 = int(int(secs1%3600)/60)\r
243             s1 = int(secs1%60)\r
244             f1 = (secs1 - int(secs1))*1000\r
245             secs2 = l[1]\r
246             h2 = int(secs2/3600)\r
247             m2 = int(int(secs2%3600)/60)\r
248             s2 = int(secs2%60)\r
249             f2 = (secs2 - int(secs2))*1000\r
250             outl.append("%d\n%.2d:%.2d:%.2d,%.3d --> %.2d:%.2d:%.2d,%.3d\n%s\n\n" % (count,h1,m1,s1,f1,h2,m2,s2,f2,"\n".join(l[2:])))\r
251             count = count + 1\r
252         return outl\r
253     \r
254 #    def fileData_to_utf_8(self, input_coding):\r
255 #       """convert string readed from file coding to UTF-8 managed by Dreambox\r
256  #       input codint is string eg. 'iso-8859-2' 'utf-8' other"""\r
257 #       outPutList =[]\r
258 #       for x in self.subs_file:\r
259 #           x.encode(input_coding)\r
260 #           temp_list = unicode(x)\r
261 #           outPutList.append(temp_list)\r
262 #       return outPutList\r
263 \r
264 #    def to_utf_8(self, list):\r
265 #        """convert list coding to UTF-8 managed by Dreambox\r
266 #        input codint is string eg. 'iso-8859-2' 'utf-8' other"""\r
267 #        \r
268 #        temporary_list=[]\r
269 #        for x in list:\r
270 #            try:\r
271 #                temp = x.decode(self.encodeing)\r
272 #            except :\r
273 #                print "The encode decode error appeared. Encodeing is not changed. Please notice about it developer."\r
274 #                temporary_list = list\r
275 #\r
276 #        ####################################################\r
277 #        #"""KOMUNIKAT message box"""\r
278 #        ####################################################\r
279 #      \r
280 #                break\r
281 #           unicode_string = unicode( temp )\r
282 #            temp = unicode_string.encode('utf-8',"ignore")\r
283 #            temporary_list.append(temp)\r
284 #        return temporary_list\r
285 \r
286     def ___utf8_to_utf_8_BOM(self): \r
287         """Function write 3 bytes xEF xBB xBF at the begining of UTF-8 srt file.\r
288         This bytes are written by Windows Notepad for UTF-8 code page.\r
289         Probably it means that codepage is UTF-8 BOM (I'm not sure).\r
290         But without this 3 bytes polish chars are displayed badly after\r
291         few minutes of movie watching\r
292         http://www.howtofixcomputers.com/forums/windows-xp/extra-characters-beginning-file-ef-bb-bf-263070.html\r
293         """\r
294         file_in = open(self.subtitle, 'rb')\r
295         buffor = file_in.read()\r
296         file_in.close()\r
297         file_out = open(self.subtitle, 'wb')\r
298         file_out.write("\xef\xbb\xbf"+buffor)\r
299         file_out.close()\r
300                 \r
301     \r
302     def save_subtitle(self, list):\r
303         """Save subtitle list in file"""\r
304         sub_list = [list]\r
305         try:\r
306             #dst = codecs.open(self.subtitle, 'w','UTF-8')\r
307             dst = codecs.open(self.subtitle, 'w','utf-8-sig')\r
308             for nsub in sub_list:\r
309                 s = self.to_srt(nsub)\r
310                 dst.writelines(s)\r
311             dst.close()\r
312             #self.___utf8_to_utf_8_BOM()\r
313         except :\r
314             print "Can't save subtitles in file: %s" % file\r
315 \r
316 \r