Merge branch 'master' of git+ssh://scm.schwerkraft.elitedvb.net/scmrepos/git/enigma2...
[enigma2-plugins.git] / fritzcall / src / nrzuname.py
1 #!/usr/bin/python
2 # -*- coding: UTF-8 -*-
3 '''
4 $Id: nrzuname.py 824 2013-12-02 16:56:32Z michael $
5 $Author: michael $
6 $Revision: 824 $
7 $Date: 2013-12-02 17:56:32 +0100 (Mon, 02 Dec 2013) $
8 '''
9
10 # W0603 (global statement); W0141 (map, filter, etc.)
11 # pylint: disable=W0603,W0141
12
13 import re, sys, os
14 from xml.dom.minidom import parse
15 from twisted.web.client import getPage #@UnresolvedImport
16 from twisted.internet import reactor #@UnresolvedImport
17
18 try:
19         from . import debug #@UnresolvedImport # pylint: disable=W0613,F0401
20         def setDebug(what): # pylint: disable=W0613
21                 pass
22 except ValueError:
23         debugVal = True
24         def setDebug(what):
25                 global debugVal
26                 debugVal = what
27         def debug(message):
28                 if debugVal:
29                         print message
30
31 import htmlentitydefs
32 def html2unicode(in_html):
33 #===============================================================================
34 #       # sanity checks
35 #       try:
36 #               in_html = in_html.decode('iso-8859-1')
37 #               debug("[Callhtml2utf8] Converted from latin1")
38 #       except:
39 #               debug("[Callhtml2utf8] lost in translation from latin1")
40 #               pass
41 #       try:
42 #               in_html = in_html.decode('utf-8')
43 #               debug("[Callhtml2utf8] Converted from utf-8")
44 #       except:
45 #               debug("[Callhtml2utf8] lost in translation from utf-8")
46 #               pass
47 #===============================================================================
48
49         # first convert some WML codes from hex: e.g. &#xE4 -> &#228
50         htmlentityhexnumbermask = re.compile('(&#x(..);)')
51         entities = htmlentityhexnumbermask.finditer(in_html)
52         for x in entities:
53                 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2), 16)) + ';')
54
55         htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
56         entitydict = {}
57         entities = htmlentitynamemask.finditer(in_html)
58         for x in entities:
59                 # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
60                 entitydict[x.group(1)] = x.group(2)
61         for key, name in entitydict.items():
62                 try:
63                         entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
64                 except KeyError:
65                         debug("[Callhtml2utf8] KeyError " + key + "/" + name)
66
67         htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
68         entities = htmlentitynumbermask.finditer(in_html)
69         for x in entities:
70                 # debug("[Callhtml2utf8] number: found %s" %x.group(1))
71                 entitydict[x.group(1)] = x.group(2)
72         for key, codepoint in entitydict.items():
73                 try:
74                         uml = unichr(int(codepoint))
75                         debug("[nrzuname] html2utf8: replace %s with %s in %s" %(repr(key), repr(uml), repr(in_html[0:20]+'...')))
76                         in_html = in_html.replace(key, uml)
77                 except ValueError, e:
78                         debug("[nrzuname] html2utf8: ValueError " + repr(key) + ":" + repr(codepoint) + " (" + str(e) + ")")
79         return in_html
80
81 def normalizePhoneNumber(intNo):
82         found = re.match('^\+(.*)', intNo)
83         if found:
84                 intNo = '00' + found.group(1)
85         intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
86         found = re.match('.*?([0-9]+)', intNo)
87         if found:
88                 return found.group(1)
89         else:
90                 return '0'
91
92 def out(number, caller):
93         debug("[nrzuname] out: %s: %s" %(number, caller))
94         found = re.match("NA: ([^;]*);VN: ([^;]*);STR: ([^;]*);HNR: ([^;]*);PLZ: ([^;]*);ORT: ([^;]*)", caller)
95         if not found:
96                 return
97         ( name, vorname, strasse, hnr, plz, ort ) = (found.group(1),
98                                                                                         found.group(2),
99                                                                                         found.group(3),
100                                                                                         found.group(4),
101                                                                                         found.group(5),
102                                                                                         found.group(6)
103                                                                                         )
104         if vorname:
105                 name += ' ' + vorname
106         if strasse or hnr or plz or ort:
107                 name += ', '
108         if strasse:
109                 name += strasse
110         if hnr:
111                 name += ' ' + hnr
112         if (strasse or hnr) and (plz or ort):
113                 name += ', '
114         if plz and ort:
115                 name += plz + ' ' + ort
116         elif plz:
117                 name += plz
118         elif ort:
119                 name += ort
120
121         print(name)
122
123 def simpleout(number, caller): #@UnusedVariable # pylint: disable=W0613
124         print caller
125
126 try:
127         from Tools.Directories import resolveFilename, SCOPE_PLUGINS
128         reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/FritzCall/reverselookup.xml")
129 except ImportError:
130         reverseLookupFileName = "reverselookup.xml"
131
132 countries = { }
133 reverselookupMtime = 0
134
135 class ReverseLookupAndNotifier:
136         def __init__(self, number, outputFunction=out, charset="cp1252", countrycode = "0049"):
137                 debug("[ReverseLookupAndNotifier] reverse Lookup for %s!" %number)
138                 self.number = number
139                 self.outputFunction = outputFunction
140                 self.caller = ""
141                 self.currentWebsite = None
142                 self.nextWebsiteNo = 0
143 #===============================================================================
144 # sorry does not work at all
145 #               if not charset:
146 #                       charset = sys.getdefaultencoding()
147 #                       debug("[ReverseLookupAndNotifier] set charset from system: %s!" %charset)
148 #===============================================================================
149                 self.charset = charset
150
151                 global reverselookupMtime
152                 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
153                 if not countries or reverselookupMtimeAct > reverselookupMtime:
154                         debug("[ReverseLookupAndNotifier] (Re-)Reading %s\n" %reverseLookupFileName)
155                         reverselookupMtime = reverselookupMtimeAct
156                         dom = parse(reverseLookupFileName)
157                         for top in dom.getElementsByTagName("reverselookup"):
158                                 for country in top.getElementsByTagName("country"):
159                                         code = country.getAttribute("code").replace("+","00")
160                                         countries[code] = country.getElementsByTagName("website")
161
162                 self.countrycode = countrycode
163
164                 if re.match(r'^\+', self.number):
165                         self.number = '00' + self.number[1:]
166
167                 if countrycode and self.number[:len(countrycode)] == countrycode:
168                         self.number = '0' + self.number[len(countrycode):]
169
170                 if number[0] != "0":
171                         # self.caller = _("UNKNOWN")
172                         self.notifyAndReset()
173                         return
174
175                 if self.number[:2] == "00":
176                         debug("[ReverseLookupAndNotifier] number %s, %s" %(self.number, self.number[:4]))
177                         if countries.has_key(self.number[:3]):   #      e.g. USA
178                                 self.countrycode = self.number[:3]
179                         elif countries.has_key(self.number[:4]):
180                                 self.countrycode = self.number[:4]
181                         elif countries.has_key(self.number[:5]):
182                                 self.countrycode = self.number[:5]
183                         else:
184                                 debug("[ReverseLookupAndNotifier] Country cannot be reverse handled")
185                                 # self.caller = _("UNKNOWN")
186                                 self.notifyAndReset()
187                                 return
188
189                 debug("[ReverseLookupAndNotifier] Found website for reverse lookup")
190                 self.websites = countries[self.countrycode]
191                 self.nextWebsiteNo = 1
192                 self.handleWebsite(self.websites[0])
193
194         def handleWebsite(self, website):
195                 debug("[ReverseLookupAndNotifier] handleWebsite: " + website.getAttribute("name"))
196                 if self.number[:2] == "00":
197                         number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
198                 else:
199                         number = self.number
200
201                 url = website.getAttribute("url")
202                 if re.search('$AREACODE', url) or re.search('$PFXAREACODE', url):
203                         debug("[ReverseLookupAndNotifier] handleWebsite: (PFX)ARECODE cannot be handled")
204                         # self.caller = _("UNKNOWN")
205                         self.notifyAndReset()
206                         return
207                 #
208                 # Apparently, there is no attribute called (pfx)areacode anymore
209                 # So, this below will not work.
210                 #
211                 if re.search('\\$AREACODE', url) and website.hasAttribute("areacode"):
212                         areaCodeLen = int(website.getAttribute("areacode"))
213                         url = url.replace("$AREACODE", number[:areaCodeLen]).replace("$NUMBER", number[areaCodeLen:])
214                 elif re.search('\\$PFXAREACODE', url) and website.hasAttribute("pfxareacode"):
215                         areaCodeLen = int(website.getAttribute("pfxareacode"))
216                         url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER", "%(number)s")
217                         url = url % { 'pfxareacode': number[:areaCodeLen], 'number': number[areaCodeLen:] }
218                 elif re.search('\\$NUMBER', url): 
219                         url = url.replace("$NUMBER","%s") %number
220                 else:
221                         debug("[ReverseLookupAndNotifier] handleWebsite: cannot handle websites with no $NUMBER in url")
222                         # self.caller = _("UNKNOWN")
223                         self.notifyAndReset()
224                         return
225                 debug("[ReverseLookupAndNotifier] Url to query: " + url)
226                 url = url.encode("UTF-8", "replace")
227                 self.currentWebsite = website
228                 getPage(url,
229                         agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
230                         ).addCallback(self._gotPage).addErrback(self._gotError)
231
232
233         def _gotPage(self, page):
234                 def cleanName(text):
235                         item = text.replace("%20"," ").replace("&nbsp;"," ").replace("</b>","").replace(","," ").replace('\n',' ').replace('\t',' ')
236
237                         item = html2unicode(item)
238                         #===================================================================
239                         # try: # this works under Windows
240                         #       item = item.encode('iso-8859-1')
241                         # except UnicodeEncodeError:
242                         #       debug("[ReverseLookupAndNotifier] cleanName: encoding problem with iso8859")
243                         #       try: # this works under Enigma2
244                         #               item = item.encode('utf-8')
245                         #       except UnicodeEncodeError:
246                         #               debug("[ReverseLookupAndNotifier] cleanName: encoding problem with utf-8")
247                         #               try: # fall back
248                         #                       item = item.encode(self.charset)
249                         #               except UnicodeEncodeError:
250                         #                       # debug("[ReverseLookupAndNotifier] cleanName: " + traceback.format_exc())
251                         #                       debug("[ReverseLookupAndNotifier] cleanName: encoding problem")
252                         #===================================================================
253
254                         newitem = item.replace("  ", " ")
255                         while newitem != item:
256                                 item = newitem
257                                 newitem = item.replace("  ", " ")
258                         return newitem.strip()
259         
260                 debug("[ReverseLookupAndNotifier] _gotPage")
261
262                 #=======================================================================
263                 # userDesktop = os.path.join(os.environ['USERPROFILE'], "Desktop")
264                 # linkP =  open(os.path.join(userDesktop, "page.htm"), "w")
265                 # linkP.write(page)
266                 # linkP.close()
267                 #=======================================================================
268
269                 found = re.match('.*<meta http-equiv="Content-Type" content="(?:application/xhtml\+xml|text/html); charset=([^"]+)" />', page, re.S)
270                 if found:
271                         debug("[ReverseLookupAndNotifier] Charset: " + found.group(1))
272                         page = page.replace("\xa0"," ").decode(found.group(1), "replace")
273                 else:
274                         debug("[ReverseLookupAndNotifier] Default Charset: iso-8859-1")
275                         page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
276
277                 for entry in self.currentWebsite.getElementsByTagName("entry"):
278                         #
279                         # for the sites delivering fuzzy matches, we check against the returned number
280                         #
281                         pat = self.getPattern(entry, "number")
282                         if pat:
283                                 pat = ".*?" + pat
284                                 debug("[ReverseLookupAndNotifier] _gotPage: look for number with '''%s'''" %( pat ))
285                                 found = re.match(pat, page, re.S|re.M)
286                                 if found:
287                                         if self.number[:2] == '00':
288                                                 number = '0' + self.number[4:]
289                                         else:
290                                                 number = self.number
291                                         if number != normalizePhoneNumber(found.group(1)):
292                                                 debug("[ReverseLookupAndNotifier] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1), self.number))
293                                                 continue
294                         
295                         # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
296                         name = ''
297                         firstname = ''
298                         street = ''
299                         streetno = ''
300                         city = ''
301                         zipcode = ''
302                         pat = self.getPattern(entry, "lastname")
303                         if pat:
304                                 pat = ".*?" + pat
305                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
306                                 found = re.match(pat, page, re.S|re.M)
307                                 if found:
308                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "lastname", found.group(1)))
309                                         name = cleanName(found.group(1))
310
311                                         pat = self.getPattern(entry, "firstname")
312                                         if pat:
313                                                 pat = ".*?" + pat
314                                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
315                                                 found = re.match(pat, page, re.S|re.M)
316                                                 if found:
317                                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "firstname", found.group(1)))
318                                                 firstname = cleanName(found.group(1)).strip()
319
320                         else:
321                                 pat = ".*?" + self.getPattern(entry, "name")
322                                 debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
323                                 found = re.match(pat, page, re.S|re.M)
324                                 if found:
325                                         debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "name", found.group(1)))
326                                         item = cleanName(found.group(1))
327                                         # debug("[ReverseLookupAndNotifier] _gotPage: name: " + item)
328                                         name = item.strip()
329                                         firstNameFirst = entry.getElementsByTagName('name')[0].getAttribute('swapFirstAndLastName')
330                                         # debug("[ReverseLookupAndNotifier] _gotPage: swapFirstAndLastName: " + firstNameFirst)
331                                         if firstNameFirst == 'true': # that means, the name is of the form "firstname lastname"
332                                                 found = re.match('(.*?)\s+(.*)', name)
333                                                 if found:
334                                                         firstname = found.group(1)
335                                                         name = found.group(2)
336                                 else:
337                                         debug("[ReverseLookupAndNotifier] _gotPage: no name found, skipping")
338                                         continue
339
340                         if not name:
341                                 continue
342
343                         pat = ".*?" + self.getPattern(entry, "city")
344                         debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "city", pat ))
345                         found = re.match(pat, page, re.S|re.M)
346                         if found:
347                                 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "city", found.group(1)))
348                                 item = cleanName(found.group(1))
349                                 debug("[ReverseLookupAndNotifier] _gotPage: city: " + item)
350                                 city = item.strip()
351
352                         if not city:
353                                 continue
354
355                         pat = ".*?" + self.getPattern(entry, "zipcode")
356                         debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "zipcode", pat ))
357                         found = re.match(pat, page, re.S|re.M)
358                         if found and found.group(1):
359                                 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "zipcode", found.group(1)))
360                                 item = cleanName(found.group(1))
361                                 debug("[ReverseLookupAndNotifier] _gotPage: zipcode: " + item)
362                                 zipcode = item.strip()
363
364                         pat = ".*?" + self.getPattern(entry, "street")
365                         debug("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( "street", pat ))
366                         found = re.match(pat, page, re.S|re.M)
367                         if found and found.group(1):
368                                 debug("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( "street", found.group(1)))
369                                 item = cleanName(found.group(1))
370                                 debug("[ReverseLookupAndNotifier] _gotPage: street: " + item)
371                                 street = item.strip()
372                                 streetno = ''
373                                 found = re.match("^(.+) ([-\d]+)$", street, re.S)
374                                 if found:
375                                         street = found.group(1)
376                                         streetno = found.group(2)
377                                 #===============================================================
378                                 # else:
379                                 #       found = re.match("^(\d+) (.+)$", street, re.S)
380                                 #       if found:
381                                 #               street = found.group(2)
382                                 #               streetno = found.group(1)
383                                 #===============================================================
384
385                         self.caller = "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" % ( name, firstname, street, streetno, zipcode, city )
386                         debug("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s" %(self.caller))
387
388                         self.notifyAndReset()
389                         return True
390                 else:
391                         self._gotError("[ReverseLookupAndNotifier] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
392                         return False
393                         
394         def _gotError(self, error = ""):
395                 debug("[ReverseLookupAndNotifier] _gotError - Error: %s" %error)
396                 if self.nextWebsiteNo >= len(self.websites):
397                         debug("[ReverseLookupAndNotifier] _gotError: I give up")
398                         # self.caller = _("UNKNOWN")
399                         self.notifyAndReset()
400                         return
401                 else:
402                         debug("[ReverseLookupAndNotifier] _gotError: try next website")
403                         self.nextWebsiteNo = self.nextWebsiteNo+1
404                         self.handleWebsite(self.websites[self.nextWebsiteNo-1])
405
406         def getPattern(self, website, which):
407                 pat1 = website.getElementsByTagName(which)
408                 if len(pat1) == 0:
409                         return ''
410                 else:
411                         if len(pat1) > 1:
412                                 debug("[ReverseLookupAndNotifier] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
413                         return pat1[0].childNodes[0].data
414
415         def notifyAndReset(self):
416                 debug("[ReverseLookupAndNotifier] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
417                 # debug("1: " + repr(self.caller))
418                 if self.caller:
419                         try:
420                                 debug("2: " + repr(self.caller))
421                                 self.caller = self.caller.encode(self.charset, 'replace')
422                                 debug("3: " + repr(self.caller))
423                         except UnicodeDecodeError:
424                                 debug("[ReverseLookupAndNotifier] cannot encode?!?!")
425                         # self.caller = unicode(self.caller)
426                         # debug("4: " + repr(self.caller))
427                         self.outputFunction(self.number, self.caller)
428                 else:
429                         self.outputFunction(self.number, "")
430                 if __name__ == '__main__':
431                         reactor.stop() #@UndefinedVariable # pylint: disable=E1101
432
433 if __name__ == '__main__':
434         cwd = os.path.dirname(sys.argv[0])
435         if (len(sys.argv) == 2):
436                 # nrzuname.py Nummer
437                 ReverseLookupAndNotifier(sys.argv[1], simpleout)
438                 reactor.run() #@UndefinedVariable # pylint: disable=E1101
439         elif (len(sys.argv) == 3):
440                 # nrzuname.py Nummer Charset
441                 setDebug(False)
442                 ReverseLookupAndNotifier(sys.argv[1], out, sys.argv[2])
443                 reactor.run() #@UndefinedVariable # pylint: disable=E1101