[MerlinSkinThemes] - add config option to disable rebuild on boot
[enigma2-plugins.git] / fritzcall / src / nrzuname.py
1 #!/usr/bin/python
2 # -*- coding: UTF-8 -*-
3 '''
4 $Id: nrzuname.py 1451 2017-06-08 16:35:18Z michael $
5 $Author: michael $
6 $Revision: 1451 $
7 $Date: 2017-06-08 18:35:18 +0200 (Thu, 08 Jun 2017) $
8 '''
9
10 # C0111 (Missing docstring)
11 # C0103 (Invalid name)
12 # C0301 (line too long)
13 # W0603 (global statement)
14 # W0141 (map, filter, etc.)
15 # W0110 lambda with map,filter
16 # W0403 Relative import
17 # W1401 Anomalous backslash in string
18 # W0110 deprecated-lambda
19 # C0302 too-many-lines
20 # C0410 multiple-imports
21 # pylint: disable=C0111,C0103,C0301,W0603,W0403,C0302
22
23 import re, sys, os
24 from xml.dom.minidom import parse
25
26 try:
27         import logging
28         logger = logging.getLogger("FritzCall.nrzuname")
29         debug = logger.debug
30         info = logger.info
31         warn = logger.warn
32         error = logger.error
33         exception = logger.exception
34
35         def setDebug(what):  # pylint: disable=W0613
36                 pass
37 except ValueError:
38         debugVal = True
39
40         def setDebug(what):
41                 global debugVal
42                 debugVal = what
43
44         def debug(message):
45                 if debugVal:
46                         print message
47
48 import htmlentitydefs
49
50 from twisted.web.client import getPage  # @UnresolvedImport
51 from twisted.internet import reactor  # @UnresolvedImport
52 from Tools.Directories import resolveFilename, SCOPE_PLUGINS
53
54
55 def html2unicode(in_html):
56 #===============================================================================
57 #       # sanity checks
58 #       try:
59 #               in_html = in_html.decode('iso-8859-1')
60 #               debug("[Callhtml2utf8] Converted from latin1")
61 #       except:
62 #               debug("[Callhtml2utf8] lost in translation from latin1")
63 #               pass
64 #       try:
65 #               in_html = in_html.decode('utf-8')
66 #               debug("[Callhtml2utf8] Converted from utf-8")
67 #       except:
68 #               debug("[Callhtml2utf8] lost in translation from utf-8")
69 #               pass
70 #===============================================================================
71
72         # first convert some WML codes from hex: e.g. &#xE4 -> &#228
73         htmlentityhexnumbermask = re.compile(r'(&#x(..);)')
74         entities = htmlentityhexnumbermask.finditer(in_html)
75         for x in entities:
76                 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2), 16)) + ';')
77
78         htmlentitynamemask = re.compile(r'(&(\D{1,5}?);)')
79         entitydict = {}
80         entities = htmlentitynamemask.finditer(in_html)
81         for x in entities:
82                 # debug("mask: found %s" %repr(x.group(2)))
83                 entitydict[x.group(1)] = x.group(2)
84         for key, name in entitydict.items():
85                 try:
86                         entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
87                 except KeyError:
88                         warn("KeyError " + key + "/" + name)
89
90         htmlentitynumbermask = re.compile(r'(&#(\d{1,5}?);)')
91         entities = htmlentitynumbermask.finditer(in_html)
92         for x in entities:
93                 # debug("found %s" %x.group(1))
94                 entitydict[x.group(1)] = x.group(2)
95         for key, codepoint in entitydict.items():
96                 try:
97                         uml = unichr(int(codepoint))
98                         debug("replace %s with %s in %s", repr(key), repr(uml), repr(in_html[0:20] + '...'))
99                         in_html = in_html.replace(key, uml)
100                 except ValueError, e:
101                         warn("html2utf8: ValueError " + repr(key) + ":" + repr(codepoint) + " (" + str(e) + ")")
102         return in_html
103
104 def normalizePhoneNumber(intNo):
105         found = re.match(r'^\+(.*)', intNo)
106         if found:
107                 intNo = '00' + found.group(1)
108         intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
109         found = re.match(r'.*?([0-9]+)', intNo)
110         if found:
111                 return found.group(1)
112         else:
113                 return '0'
114
115 def out(number, caller):
116         debug("%s: %s", number, caller)
117         found = re.match(r"NA: ([^;]*);VN: ([^;]*);STR: ([^;]*);HNR: ([^;]*);PLZ: ([^;]*);ORT: ([^;]*)", caller)
118         if not found:
119                 return
120         (name, vorname, strasse, hnr, plz, ort) = (found.group(1),
121                                                                                         found.group(2),
122                                                                                         found.group(3),
123                                                                                         found.group(4),
124                                                                                         found.group(5),
125                                                                                         found.group(6)
126                                                                                         )
127         if vorname:
128                 name += ' ' + vorname
129         if strasse or hnr or plz or ort:
130                 name += ', '
131         if strasse:
132                 name += strasse
133         if hnr:
134                 name += ' ' + hnr
135         if (strasse or hnr) and (plz or ort):
136                 name += ', '
137         if plz and ort:
138                 name += plz + ' ' + ort
139         elif plz:
140                 name += plz
141         elif ort:
142                 name += ort
143
144         print name
145
146 def simpleout(number, caller):  # @UnusedVariable # pylint: disable=W0613
147         print caller
148
149 try:
150         reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/FritzCall/reverselookup.xml")
151 except ImportError:
152         reverseLookupFileName = "reverselookup.xml"
153
154 countries = {}
155 reverselookupMtime = 0
156
157 class ReverseLookupAndNotifier(object):
158
159         def __init__(self, number, outputFunction = out, charset = "cp1252", countrycode = "0049"):
160                 debug("reverse Lookup for %s!", number)
161                 if not countrycode:
162                         out(number, "")
163
164                 self.number = number
165                 self.outputFunction = outputFunction
166                 self.caller = ""
167                 self.currentWebsite = None
168                 self.nextWebsiteNo = 0
169 #===============================================================================
170 # sorry does not work at all
171 #               if not charset:
172 #                       charset = sys.getdefaultencoding()
173 #                       debug("set charset from system: %s!" %charset)
174 #===============================================================================
175                 self.charset = charset
176
177                 global reverselookupMtime
178                 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
179                 if not countries or reverselookupMtimeAct > reverselookupMtime:
180                         debug("(Re-)Reading %s\n", reverseLookupFileName)
181                         reverselookupMtime = reverselookupMtimeAct
182                         dom = parse(reverseLookupFileName)
183                         for top in dom.getElementsByTagName("reverselookup"):
184                                 for country in top.getElementsByTagName("country"):
185                                         code = country.getAttribute("code").replace("+", "00")
186                                         countries[code] = country.getElementsByTagName("website")
187
188                 self.countrycode = countrycode
189
190                 if re.match(r'^\+', self.number):
191                         self.number = '00' + self.number[1:]
192
193                 if countrycode and self.number[:len(countrycode)] == countrycode:
194                         self.number = '0' + self.number[len(countrycode):]
195
196                 if number[0] != "0":
197                         # self.caller = _("UNKNOWN")
198                         self.notifyAndReset()
199                         return
200
201                 if self.number[:2] == "00":
202                         debug("number %s, %s", self.number, self.number[:4])
203                         if self.number[:3] in countries:  # e.g. USA
204                                 self.countrycode = self.number[:3]
205                         elif self.number[:4] in countries:
206                                 self.countrycode = self.number[:4]
207                         elif self.number[:5] in countries:
208                                 self.countrycode = self.number[:5]
209                         else:
210                                 debug("Country cannot be reverse handled")
211                                 # self.caller = _("UNKNOWN")
212                                 self.notifyAndReset()
213                                 return
214
215                 debug("Found website for reverse lookup")
216                 self.websites = countries[self.countrycode]
217                 self.nextWebsiteNo = 1
218                 self.handleWebsite(self.websites[0])
219
220         def handleWebsite(self, website):
221                 info(website.getAttribute("name"))
222                 if self.number[:2] == "00":
223                         number = website.getAttribute("prefix") + self.number.replace(self.countrycode, "")
224                 else:
225                         number = self.number
226
227                 url = website.getAttribute("url")
228                 if re.search('$AREACODE', url) or re.search('$PFXAREACODE', url):
229                         error("(PFX)ARECODE cannot be handled")
230                         # self.caller = _("UNKNOWN")
231                         self.notifyAndReset()
232                         return
233                 #
234                 # Apparently, there is no attribute called (pfx)areacode anymore
235                 # So, this below will not work.
236                 #
237                 if re.search('\\$AREACODE', url) and website.hasAttribute("areacode"):
238                         areaCodeLen = int(website.getAttribute("areacode"))
239                         url = url.replace("$AREACODE", number[:areaCodeLen]).replace("$NUMBER", number[areaCodeLen:])
240                 elif re.search('\\$PFXAREACODE', url) and website.hasAttribute("pfxareacode"):
241                         areaCodeLen = int(website.getAttribute("pfxareacode"))
242                         url = url.replace("$PFXAREACODE", "%(pfxareacode)s").replace("$NUMBER", "%(number)s")
243                         url = url % {'pfxareacode': number[:areaCodeLen], 'number': number[areaCodeLen:]}
244                 elif re.search('\\$NUMBER', url):
245                         url = url.replace("$NUMBER", "%s") % number
246                 else:
247                         error("cannot handle websites with no $NUMBER in url")
248                         # self.caller = _("UNKNOWN")
249                         self.notifyAndReset()
250                         return
251                 info("Url to query: " + url)
252                 url = url.encode("UTF-8", "replace")
253                 self.currentWebsite = website
254                 getPage(url, agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5").addCallback(self._gotPage).addErrback(self._gotError)
255
256         def _gotPage(self, page):
257                 def cleanName(text):
258                         item = text.replace("%20", " ").replace("&nbsp;", " ").replace("</b>", "").replace(",", " ").replace('\n', ' ').replace('\t', ' ')
259
260                         item = html2unicode(item)
261                         #===================================================================
262                         # try: # this works under Windows
263                         #       item = item.encode('iso-8859-1')
264                         # except UnicodeEncodeError:
265                         #       debug("cleanName: encoding problem with iso8859")
266                         #       try: # this works under Enigma2
267                         #               item = item.encode('utf-8')
268                         #       except UnicodeEncodeError:
269                         #               debug("encoding problem with utf-8")
270                         #               try: # fall back
271                         #                       item = item.encode(self.charset)
272                         #               except UnicodeEncodeError:
273                         #                       # debug("traceback.format_exc())
274                         #                       debug("encoding problem")
275                         #===================================================================
276
277                         newitem = item.replace("  ", " ")
278                         while newitem != item:
279                                 item = newitem
280                                 newitem = item.replace("  ", " ")
281                         return newitem.strip()
282
283                 debug("")
284
285                 #=======================================================================
286                 # userDesktop = os.path.join(os.environ['USERPROFILE'], "Desktop")
287                 # linkP =  open(os.path.join(userDesktop, "page.htm"), "w")
288                 # linkP.write(page)
289                 # linkP.close()
290                 #=======================================================================
291
292                 found = re.match(r'.*http-equiv="Content-Type" content="(?:application/xhtml\+xml|text/html); charset=([^"]+)"', page, re.S)
293                 found1 = re.match(r'.*charset="([^"]+)"', page, re.S)
294                 if found:
295                         debug("Charset: " + found.group(1))
296                         page = page.replace("\xa0", " ").decode(found.group(1), "replace")
297                 elif found1:
298                         debug("Charset: " + found1.group(1))
299                         page = page.replace("\xa0", " ").decode(found1.group(1), "replace")
300                 else:
301                         debug("Default Charset: iso-8859-1")
302                         page = page.replace("\xa0", " ").decode("UTF-8", "replace")
303
304                 for entry in self.currentWebsite.getElementsByTagName("entry"):
305                         #
306                         # for the sites delivering fuzzy matches, we check against the returned number
307                         #
308                         pat = self.getPattern(entry, "number")
309                         if pat:
310                                 pat = ".*?" + pat
311                                 debug("look for number with '''%s'''", pat)
312                                 found = re.match(pat, page, re.S | re.M)
313                                 if found:
314                                         if self.number[:2] == '00':
315                                                 number = '0' + self.number[4:]
316                                         else:
317                                                 number = self.number
318                                         if number != normalizePhoneNumber(found.group(1)):
319                                                 debug("got unequal number '''%s''' for '''%s'''", found.group(1), self.number)
320                                                 continue
321
322                         # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
323                         name = ''
324                         firstname = ''
325                         street = ''
326                         streetno = ''
327                         city = ''
328                         zipcode = ''
329                         pat = self.getPattern(entry, "lastname")
330                         if pat:
331                                 pat = ".*?" + pat
332                                 debug("look for '''%s''' with '''%s'''", "lastname", pat)
333                                 found = re.match(pat, page, re.S | re.M)
334                                 if found:
335                                         debug("found for '''%s''': '''%s'''", "lastname", found.group(1))
336                                         name = cleanName(found.group(1))
337
338                                         pat = self.getPattern(entry, "firstname")
339                                         if pat:
340                                                 pat = ".*?" + pat
341                                                 debug("look for '''%s''' with '''%s'''", "firstname", pat)
342                                                 found = re.match(pat, page, re.S | re.M)
343                                                 if found:
344                                                         debug("found for '''%s''': '''%s'''", "firstname", found.group(1))
345                                                 firstname = cleanName(found.group(1)).strip()
346
347                         else:
348                                 pat = ".*?" + self.getPattern(entry, "name")
349                                 debug("look for '''%s''' with '''%s'''", "name", pat)
350                                 found = re.match(pat, page, re.S | re.M)
351                                 if found:
352                                         debug("found for '''%s''': '''%s'''", "name", found.group(1))
353                                         item = cleanName(found.group(1))
354                                         # debug("name: " + item)
355                                         name = item.strip()
356                                         firstNameFirst = entry.getElementsByTagName('name')[0].getAttribute('swapFirstAndLastName')
357                                         # debug("swapFirstAndLastName: " + firstNameFirst)
358                                         if firstNameFirst == 'true':  # that means, the name is of the form "firstname lastname"
359                                                 found = re.match(r'(.*?)\s+(.*)', name)
360                                                 if found:
361                                                         firstname = found.group(1)
362                                                         name = found.group(2)
363                                 else:
364                                         info("no name found, skipping")
365                                         continue
366
367                         if not name:
368                                 continue
369
370                         pat = ".*?" + self.getPattern(entry, "city")
371                         debug("look for '''%s''' with '''%s'''", "city", pat)
372                         found = re.match(pat, page, re.S | re.M)
373                         if found:
374                                 debug("found for '''%s''': '''%s'''", "city", found.group(1))
375                                 item = cleanName(found.group(1))
376                                 info("city: " + item)
377                                 city = item.strip()
378
379                         if not city:
380                                 continue
381
382                         pat = ".*?" + self.getPattern(entry, "zipcode")
383                         debug("look for '''%s''' with '''%s'''", "zipcode", pat)
384                         found = re.match(pat, page, re.S | re.M)
385                         if found and found.group(1):
386                                 debug("found for '''%s''': '''%s'''", "zipcode", found.group(1))
387                                 item = cleanName(found.group(1))
388                                 info("zipcode: " + item)
389                                 zipcode = item.strip()
390
391                         pat = ".*?" + self.getPattern(entry, "street")
392                         debug("look for '''%s''' with '''%s'''", "street", pat)
393                         found = re.match(pat, page, re.S | re.M)
394                         if found and found.group(1):
395                                 debug("found for '''%s''': '''%s'''", "street", found.group(1))
396                                 item = cleanName(found.group(1))
397                                 info("street: " + item)
398                                 street = item.strip()
399                                 streetno = ''
400                                 found = re.match(r"^(.+) ([-\d]+)$", street, re.S)
401                                 if found:
402                                         street = found.group(1)
403                                         streetno = found.group(2)
404                                 #===============================================================
405                                 # else:
406                                 #       found = re.match(r'"^(\d+) (.+)$", street, re.S)
407                                 #       if found:
408                                 #               street = found.group(2)
409                                 #               streetno = found.group(1)
410                                 #===============================================================
411
412                         self.caller = "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" % (name, firstname, street, streetno, zipcode, city)
413                         info("Reverse lookup succeeded:\nName: %s", self.caller)
414
415                         self.notifyAndReset()
416                         return True
417
418                 self._gotError("[ReverseLookupAndNotifier] _gotPage: Nothing found at %s" % self.currentWebsite.getAttribute("name"))
419                 return False
420
421         def _gotError(self, errorMsg = ""):
422                 error("Error: %s", errorMsg)
423                 if self.nextWebsiteNo >= len(self.websites):
424                         debug("I give up")
425                         # self.caller = _("UNKNOWN")
426                         self.notifyAndReset()
427                         return
428                 else:
429                         debug("try next website")
430                         self.nextWebsiteNo = self.nextWebsiteNo + 1
431                         self.handleWebsite(self.websites[self.nextWebsiteNo - 1])
432
433         def getPattern(self, website, which):
434                 pat1 = website.getElementsByTagName(which)
435                 if len(pat1) == 0:
436                         return ''
437                 else:
438                         if len(pat1) > 1:
439                                 warn("Something strange: more than one %s for website %s", which, website.getAttribute("name"))
440                         return pat1[0].childNodes[0].data
441
442         def notifyAndReset(self):
443                 info("Number: " + self.number + "; Caller: " + self.caller)
444                 # debug("1: " + repr(self.caller))
445                 if self.caller:
446                         try:
447                                 debug("2: " + repr(self.caller))
448                                 self.caller = self.caller.encode(self.charset, 'replace')
449                                 debug("3: " + repr(self.caller))
450                         except UnicodeDecodeError:
451                                 exception("cannot encode?!?!")
452                         # self.caller = unicode(self.caller)
453                         # debug("4: " + repr(self.caller))
454                         self.outputFunction(self.number, self.caller)
455                 else:
456                         self.outputFunction(self.number, "")
457                 if __name__ == '__main__':
458                         reactor.stop()  # @UndefinedVariable # pylint: disable=E1101
459
460 if __name__ == '__main__':
461         cwd = os.path.dirname(sys.argv[0])
462         if len(sys.argv) == 2:
463                 # nrzuname.py Nummer
464                 ReverseLookupAndNotifier(sys.argv[1], simpleout)
465                 reactor.run()  # @UndefinedVariable # pylint: disable=E1101
466         elif len(sys.argv) == 3:
467                 # nrzuname.py Nummer Charset
468                 setDebug(False)
469                 ReverseLookupAndNotifier(sys.argv[1], out, sys.argv[2])
470                 reactor.run()  # @UndefinedVariable # pylint: disable=E1101