FIX for latest problems with labor version
[enigma2-plugins.git] / fritzcall / src / nrzuname.py
1 #!/usr/bin/python
2 # -*- coding: UTF-8 -*-
3 '''
4 $Id: nrzuname.py 649 2011-07-03 14:16:49Z michael $
5 $Author: michael $
6 $Revision: 649 $
7 $Date: 2011-07-03 16:16:49 +0200 (So, 03 Jul 2011) $
8 '''
9
10 import re, sys, os
11 from xml.dom.minidom import parse
12 from twisted.web.client import getPage #@UnresolvedImport
13 from twisted.internet import reactor #@UnresolvedImport
14
15 try:
16         from . import debug #@UnresolvedImport # pylint: disable-msg=W0613,F0401
17         def setDebug(what): # pylint: disable-msg=W0613
18                 pass
19 except ValueError:
20         debugVal = True
21         def setDebug(what):
22                 global debugVal
23                 debugVal = what
24         def debug(message):
25                 if debugVal:
26                         print message
27
28 import htmlentitydefs
29 def html2unicode(in_html):
30 #===============================================================================
31 #       # sanity checks
32 #       try:
33 #               in_html = in_html.decode('iso-8859-1')
34 #               debug("[Callhtml2utf8] Converted from latin1")
35 #       except:
36 #               debug("[Callhtml2utf8] lost in translation from latin1")
37 #               pass
38 #       try:
39 #               in_html = in_html.decode('utf-8')
40 #               debug("[Callhtml2utf8] Converted from utf-8")
41 #       except:
42 #               debug("[Callhtml2utf8] lost in translation from utf-8")
43 #               pass
44 #===============================================================================
45
46         # first convert some WML codes from hex: e.g. &#xE4 -> &#228
47         htmlentityhexnumbermask = re.compile('(&#x(..);)')
48         entities = htmlentityhexnumbermask.finditer(in_html)
49         for x in entities:
50                 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2), 16)) + ';')
51
52         htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
53         entitydict = {}
54         entities = htmlentitynamemask.finditer(in_html)
55         for x in entities:
56                 # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
57                 entitydict[x.group(1)] = x.group(2)
58         for key, name in entitydict.items():
59                 try:
60                         entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
61                 except KeyError:
62                         debug("[Callhtml2utf8] KeyError " + key + "/" + name)
63
64         htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
65         entities = htmlentitynumbermask.finditer(in_html)
66         for x in entities:
67                 # debug("[Callhtml2utf8] number: found %s" %x.group(1))
68                 entitydict[x.group(1)] = x.group(2)
69         for key, codepoint in entitydict.items():
70                 try:
71                         uml = unichr(int(codepoint))
72                         debug("[nrzuname] html2utf8: replace %s with %s in %s" %(repr(key), repr(uml), repr(in_html[0:20]+'...')))
73                         in_html = in_html.replace(key, uml)
74                 except ValueError, e:
75                         debug("[nrzuname] html2utf8: ValueError " + repr(key) + ":" + repr(codepoint) + " (" + str(e) + ")")
76         return in_html
77
78 def normalizePhoneNumber(intNo):
79         found = re.match('^\+(.*)', intNo)
80         if found:
81                 intNo = '00' + found.group(1)
82         intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
83         found = re.match('.*?([0-9]+)', intNo)
84         if found:
85                 return found.group(1)
86         else:
87                 return '0'
88
89 def out(number, caller):
90         debug("[nrzuname] out: %s: %s" %(number, caller))
91         found = re.match("NA: ([^;]*);VN: ([^;]*);STR: ([^;]*);HNR: ([^;]*);PLZ: ([^;]*);ORT: ([^;]*)", caller)
92         if not found:
93                 return
94         ( name, vorname, strasse, hnr, plz, ort ) = (found.group(1),
95                                                                                         found.group(2),
96                                                                                         found.group(3),
97                                                                                         found.group(4),
98                                                                                         found.group(5),
99                                                                                         found.group(6)
100                                                                                         )
101         if vorname:
102                 name += ' ' + vorname
103         if strasse or hnr or plz or ort:
104                 name += ', '
105         if strasse:
106                 name += strasse
107         if hnr:
108                 name += ' ' + hnr
109         if (strasse or hnr) and (plz or ort):
110                 name += ', '
111         if plz and ort:
112                 name += plz + ' ' + ort
113         elif plz:
114                 name += plz
115         elif ort:
116                 name += ort
117
118         print(name)
119
120 def simpleout(number, caller): #@UnusedVariable # pylint: disable-msg=W0613
121         print caller
122
123 try:
124         from Tools.Directories import resolveFilename, SCOPE_PLUGINS
125         reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/FritzCall/reverselookup.xml")
126 except ImportError:
127         reverseLookupFileName = "reverselookup.xml"
128
129 countries = { }
130 reverselookupMtime = 0
131
132 class ReverseLookupAndNotifier:
133         def __init__(self, number, outputFunction=out, charset="cp1252", countrycode = "0049"):
134                 debug("[ReverseLookupAndNotifier] reverse Lookup for %s!" %number)
135                 self.number = number
136                 self.outputFunction = outputFunction
137                 self.caller = ""
138                 self.currentWebsite = None
139                 self.nextWebsiteNo = 0
140 #===============================================================================
141 # sorry does not work at all
142 #               if not charset:
143 #                       charset = sys.getdefaultencoding()
144 #                       debug("[ReverseLookupAndNotifier] set charset from system: %s!" %charset)
145 #===============================================================================
146                 self.charset = charset
147
148                 global reverselookupMtime
149                 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
150                 if not countries or reverselookupMtimeAct > reverselookupMtime:
151                         debug("[ReverseLookupAndNotifier] (Re-)Reading %s\n" %reverseLookupFileName)
152                         reverselookupMtime = reverselookupMtimeAct
153                         dom = parse(reverseLookupFileName)
154                         for top in dom.getElementsByTagName("reverselookup"):
155                                 for country in top.getElementsByTagName("country"):
156                                         code = country.getAttribute("code").replace("+","00")
157                                         countries[code] = country.getElementsByTagName("website")
158
159                 self.countrycode = countrycode
160
161                 if re.match('^\+', self.number):
162                         self.number = '00' + self.number[1:]
163
164                 if self.number[:len(countrycode)] == countrycode:
165                         self.number = '0' + self.number[len(countrycode):]
166
167                 if number[0] != "0":
168                         # self.caller = _("UNKNOWN")
169                         self.notifyAndReset()
170                         return
171
172                 if self.number[:2] == "00":
173                         if countries.has_key(self.number[:3]):   #      e.g. USA
174                                 self.countrycode = self.number[:3]
175                         elif countries.has_key(self.number[:4]):
176                                 self.countrycode = self.number[:4]
177                         elif countries.has_key(self.number[:5]):
178                                 self.countrycode = self.number[:5]
179                         else:
180                                 debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
181                                 # self.caller = _("UNKNOWN")
182                                 self.notifyAndReset()
183                                 return
184
185                 if countries.has_key(self.countrycode):
186                         debug("[ReverseLookupAndNotifier] Found website for reverse lookup")
187                         self.websites = countries[self.countrycode]
188                         self.nextWebsiteNo = 1
189                         self.handleWebsite(self.websites[0])
190                 else:
191                         debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
192                         # self.caller = _("UNKNOWN")
193                         self.notifyAndReset()
194                         return
195
196         def handleWebsite(self, website):
197                 debug("[ReverseLookupAndNotifier] handleWebsite: " + website.getAttribute("name"))
198                 if self.number[:2] == "00":
199                         number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
200                 else:
201                         number = self.number
202
203                 url = website.getAttribute("url")
204                 if re.search('$AREACODE', url) or re.search('$PFXAREACODE', url):
205                         debug("[ReverseLookupAndNotifier] handleWebsite: (PFX)ARECODE cannot be handled")
206                         # self.caller = _("UNKNOWN")
207                         self.notifyAndReset()
208                         return
209                 #
210                 # Apparently, there is no attribute called (pfx)areacode anymore
211                 # So, this below will not work.
212                 #
213                 if re.search('\\$AREACODE', url) and website.hasAttribute("areacode"):
214                         areaCodeLen = int(website.getAttribute("areacode"))
215                         url = url.replace("$AREACODE", number[:areaCodeLen]).replace("$NUMBER", number[areaCodeLen:])
216                 elif re.search('\\$PFXAREACODE', url) and website.hasAttribute("pfxareacode"):
217                         areaCodeLen = int(website.getAttribute("pfxareacode"))
218                         url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER", "%(number)s")
219                         url = url % { 'pfxareacode': number[:areaCodeLen], 'number': number[areaCodeLen:] }
220                 elif re.search('\\$NUMBER', url): 
221                         url = url.replace("$NUMBER","%s") %number
222                 else:
223                         debug("[ReverseLookupAndNotifier] handleWebsite: cannot handle websites with no $NUMBER in url")
224                         # self.caller = _("UNKNOWN")
225                         self.notifyAndReset()
226                         return
227                 debug("[ReverseLookupAndNotifier] Url to query: " + url)
228                 url = url.encode("UTF-8", "replace")
229                 self.currentWebsite = website
230                 getPage(url,
231                         agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
232                         ).addCallback(self._gotPage).addErrback(self._gotError)
233
234
235         def _gotPage(self, page):
236                 def cleanName(text):
237                         item = text.replace("%20"," ").replace("&nbsp;"," ").replace("</b>","").replace(","," ").replace('\n',' ').replace('\t',' ')
238
239                         item = html2unicode(item, self.charset)
240                         #===================================================================
241                         # try: # this works under Windows
242                         #       item = item.encode('iso-8859-1')
243                         # except UnicodeEncodeError:
244                         #       debug("[ReverseLookupAndNotifier] cleanName: encoding problem with iso8859")
245                         #       try: # this works under Enigma2
246                         #               item = item.encode('utf-8')
247                         #       except UnicodeEncodeError:
248                         #               debug("[ReverseLookupAndNotifier] cleanName: encoding problem with utf-8")
249                         #               try: # fall back
250                         #                       item = item.encode(self.charset)
251                         #               except UnicodeEncodeError:
252                         #                       # debug("[ReverseLookupAndNotifier] cleanName: " + traceback.format_exc())
253                         #                       debug("[ReverseLookupAndNotifier] cleanName: encoding problem")
254                         #===================================================================
255
256                         newitem = item.replace("  ", " ")
257                         while newitem != item:
258                                 item = newitem
259                                 newitem = item.replace("  ", " ")
260                         return newitem.strip()
261         
262                 debug("[ReverseLookupAndNotifier] _gotPage")
263                 found = re.match('.*<meta http-equiv="Content-Type" content="(?:application/xhtml\+xml|text/html); charset=([^"]+)" />', page, re.S)
264                 if found:
265                         debug("[ReverseLookupAndNotifier] Charset: " + found.group(1))
266                         page = page.replace("\xa0"," ").decode(found.group(1), "replace")
267                 else:
268                         debug("[ReverseLookupAndNotifier] Default Charset: iso-8859-1")
269                         page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
270
271                 for entry in self.currentWebsite.getElementsByTagName("entry"):
272                         #
273                         # for the sites delivering fuzzy matches, we check against the returned number
274                         #
275                         pat = self.getPattern(entry, "number")
276                         if pat:
277                                 pat = ".*?" + pat
278                                 debug("[ReverseLookupAndNotifier] _gotPage: look for number with '''%s'''" %( pat ))
279                                 found = re.match(pat, page, re.S|re.M)
280                                 if found:
281                                         if self.number[:2] == '00':
282                                                 number = '0' + self.number[4:]
283                                         else:
284                                                 number = self.number
285                                         if number != normalizePhoneNumber(found.group(1)):
286                                                 debug("[ReverseLookupAndNotifier] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1), self.number))
287                                                 continue
288                         
289                         # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
290                         name = ''
291                         firstname = ''
292                         street = ''
293                         streetno = ''
294                         city = ''
295                         zipcode = ''
296                         pat = self.getPattern(entry, "lastname")
297                         if pat:
298                                 pat = ".*?" + pat
299                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
300                                 found = re.match(pat, page, re.S|re.M)
301                                 if found:
302                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "lastname", found.group(1)))
303                                         name = cleanName(found.group(1))
304
305                                         pat = self.getPattern(entry, "firstname")
306                                         if pat:
307                                                 pat = ".*?" + pat
308                                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
309                                                 found = re.match(pat, page, re.S|re.M)
310                                                 if found:
311                                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "firstname", found.group(1)))
312                                                 firstname = cleanName(found.group(1)).strip()
313
314                         else:
315                                 pat = ".*?" + self.getPattern(entry, "name")
316                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
317                                 found = re.match(pat, page, re.S|re.M)
318                                 if found:
319                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "name", found.group(1)))
320                                         item = cleanName(found.group(1))
321                                         # debug("[ReverseLookupAndNotifier] _gotPage: name: " + item)
322                                         name = item.strip()
323                                         firstNameFirst = entry.getElementsByTagName('name')[0].getAttribute('swapFirstAndLastName')
324                                         # debug("[ReverseLookupAndNotifier] _gotPage: swapFirstAndLastName: " + firstNameFirst)
325                                         if firstNameFirst == 'true': # that means, the name is of the form "firstname lastname"
326                                                 found = re.match('(.*?)\s+(.*)', name)
327                                                 if found:
328                                                         firstname = found.group(1)
329                                                         name = found.group(2)
330                                 else:
331                                         debug("[ReverseLookupAndNotifier] _gotPage: no name found, skipping")
332                                         continue
333
334                         if not name:
335                                 continue
336
337                         pat = ".*?" + self.getPattern(entry, "city")
338                         debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "city", pat ))
339                         found = re.match(pat, page, re.S|re.M)
340                         if found:
341                                 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "city", found.group(1)))
342                                 item = cleanName(found.group(1))
343                                 debug("[ReverseLookupAndNotifier] _gotPage: city: " + item)
344                                 city = item.strip()
345
346                         if not city:
347                                 continue
348
349                         pat = ".*?" + self.getPattern(entry, "zipcode")
350                         debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "zipcode", pat ))
351                         found = re.match(pat, page, re.S|re.M)
352                         if found and found.group(1):
353                                 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "zipcode", found.group(1)))
354                                 item = cleanName(found.group(1))
355                                 debug("[ReverseLookupAndNotifier] _gotPage: zipcode: " + item)
356                                 zipcode = item.strip()
357
358                         pat = ".*?" + self.getPattern(entry, "street")
359                         debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "street", pat ))
360                         found = re.match(pat, page, re.S|re.M)
361                         if found and found.group(1):
362                                 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "street", found.group(1)))
363                                 item = cleanName(found.group(1))
364                                 debug("[ReverseLookupAndNotifier] _gotPage: street: " + item)
365                                 street = item.strip()
366                                 streetno = ''
367                                 found = re.match("^(.+) ([-\d]+)$", street, re.S)
368                                 if found:
369                                         street = found.group(1)
370                                         streetno = found.group(2)
371                                 #===============================================================
372                                 # else:
373                                 #       found = re.match("^(\d+) (.+)$", street, re.S)
374                                 #       if found:
375                                 #               street = found.group(2)
376                                 #               streetno = found.group(1)
377                                 #===============================================================
378
379                         self.caller = "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" % ( name, firstname, street, streetno, zipcode, city )
380                         debug("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s" %(self.caller))
381
382                         self.notifyAndReset()
383                         return True
384                 else:
385                         self._gotError("[ReverseLookupAndNotifier] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
386                         return False
387                         
388         def _gotError(self, error = ""):
389                 debug("[ReverseLookupAndNotifier] _gotError - Error: %s" %error)
390                 if self.nextWebsiteNo >= len(self.websites):
391                         debug("[ReverseLookupAndNotifier] _gotError: I give up")
392                         # self.caller = _("UNKNOWN")
393                         self.notifyAndReset()
394                         return
395                 else:
396                         debug("[ReverseLookupAndNotifier] _gotError: try next website")
397                         self.nextWebsiteNo = self.nextWebsiteNo+1
398                         self.handleWebsite(self.websites[self.nextWebsiteNo-1])
399
400         def getPattern(self, website, which):
401                 pat1 = website.getElementsByTagName(which)
402                 if len(pat1) == 0:
403                         return ''
404                 else:
405                         if len(pat1) > 1:
406                                 debug("[ReverseLookupAndNotifier] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
407                         return pat1[0].childNodes[0].data
408
409         def notifyAndReset(self):
410                 debug("[ReverseLookupAndNotifier] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
411                 # debug("1: " + repr(self.caller))
412                 if self.caller:
413                         try:
414                                 debug("2: " + repr(self.caller))
415                                 self.caller = self.caller.encode(self.charset, 'replace')
416                                 debug("3: " + repr(self.caller))
417                         except UnicodeDecodeError:
418                                 debug("[ReverseLookupAndNotifier] cannot encode?!?!")
419                         # self.caller = unicode(self.caller)
420                         # debug("4: " + repr(self.caller))
421                         self.outputFunction(self.number, self.caller)
422                 else:
423                         self.outputFunction(self.number, "")
424                 if __name__ == '__main__':
425                         reactor.stop() #@UndefinedVariable # pylint: disable-msg=E1101
426
427 if __name__ == '__main__':
428         cwd = os.path.dirname(sys.argv[0])
429         if (len(sys.argv) == 2):
430                 # nrzuname.py Nummer
431                 ReverseLookupAndNotifier(sys.argv[1], simpleout)
432                 reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101
433         elif (len(sys.argv) == 3):
434                 # nrzuname.py Nummer Charset
435                 setDebug(False)
436                 ReverseLookupAndNotifier(sys.argv[1], out, sys.argv[2])
437                 reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101