ecasa: add search functionality
[enigma2-plugins.git] / ncidclient / src / reverselookup.py
1 #!/usr/bin/python
2 # -*- coding: UTF-8 -*-
3 '''
4 $Id$
5 $Author$
6 $Revision$
7 $Date$
8 $Modified: sreichholf
9 '''
10
11 import re, sys, os
12 import htmlentitydefs
13 from xml.dom.minidom import parse
14 from twisted.web.client import getPage #@UnresolvedImport
15 from twisted.internet import reactor #@UnresolvedImport
16 from . import debug
17
18 def html2unicode(in_html, charset):
19         # first convert some WML codes from hex: e.g. &#xE4 -> &#228
20         htmlentityhexnumbermask = re.compile('(&#x(..);)')
21         entities = htmlentityhexnumbermask.finditer(in_html)
22         for x in entities:
23                 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2), 16)) + ';')
24
25         htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
26         entitydict = {}
27         entities = htmlentitynamemask.finditer(in_html)
28         for x in entities:
29                 # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
30                 entitydict[x.group(1)] = x.group(2)
31         for key, name in entitydict.items():
32                 try:
33                         entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
34                 except KeyError:
35                         debug("[Callhtml2utf8] KeyError " + key + "/" + name)
36
37         htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
38         entities = htmlentitynumbermask.finditer(in_html)
39         for x in entities:
40                 # debug("[Callhtml2utf8] number: found %s" %x.group(1))
41                 entitydict[x.group(1)] = x.group(2)
42         for key, codepoint in entitydict.items():
43                 try:
44                         uml = unichr(int(codepoint))
45                         debug("[nrzuname] html2utf8: replace %s with %s in %s" %(repr(key), repr(uml), repr(in_html[0:20]+'...')))
46                         in_html = in_html.replace(key, uml)
47                 except ValueError, e:
48                         debug("[nrzuname] html2utf8: ValueError " + repr(key) + ":" + repr(codepoint) + " (" + str(e) + ")")
49         return in_html
50
51 def normalizePhoneNumber(intNo):
52         found = re.match('^\+(.*)', intNo)
53         if found:
54                 intNo = '00' + found.group(1)
55         intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
56         found = re.match('.*?([0-9]+)', intNo)
57         if found:
58                 return found.group(1)
59         else:
60                 return '0'
61
62 def out(number, caller):
63         debug("[nrzuname] out: %s: %s" %(number, caller))
64         found = re.match("NA: ([^;]*);VN: ([^;]*);STR: ([^;]*);HNR: ([^;]*);PLZ: ([^;]*);ORT: ([^;]*)", caller)
65         if not found:
66                 return
67         ( name, vorname, strasse, hnr, plz, ort ) = (found.group(1),
68                                                                                         found.group(2),
69                                                                                         found.group(3),
70                                                                                         found.group(4),
71                                                                                         found.group(5),
72                                                                                         found.group(6)
73                                                                                         )
74         if vorname:
75                 name += ' ' + vorname
76         if strasse or hnr or plz or ort:
77                 name += ', '
78         if strasse:
79                 name += strasse
80         if hnr:
81                 name += ' ' + hnr
82         if (strasse or hnr) and (plz or ort):
83                 name += ', '
84         if plz and ort:
85                 name += plz + ' ' + ort
86         elif plz:
87                 name += plz
88         elif ort:
89                 name += ort
90
91         print(name)
92
93 def simpleout(number, caller): #@UnusedVariable # pylint: disable-msg=W0613
94         print caller
95
96 try:
97         from Tools.Directories import resolveFilename, SCOPE_PLUGINS
98         reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/NcidClient/reverselookup.xml")
99 except ImportError:
100         reverseLookupFileName = "reverselookup.xml"
101
102 countries = { }
103 reverselookupMtime = 0
104
105 class ReverseLookupAndNotify:
106         def __init__(self, number, notificationCallback=out, charset="cp1252", countrycode = "0049"):
107                 debug("[ReverseLookupAndNotify] reverse Lookup for %s!" %number)
108                 self.number = number
109                 self.notificationCallback = notificationCallback
110                 self.caller = ""
111                 self.currentWebsite = None
112                 self.nextWebsiteNo = 0
113 #===============================================================================
114 # sorry does not work at all
115 #               if not charset:
116 #                       charset = sys.getdefaultencoding()
117 #                       debug("[ReverseLookupAndNotify] set charset from system: %s!" %charset)
118 #===============================================================================
119                 self.charset = charset
120
121                 global reverselookupMtime
122                 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
123                 if not countries or reverselookupMtimeAct > reverselookupMtime:
124                         debug("[ReverseLookupAndNotify] (Re-)Reading %s\n" %reverseLookupFileName)
125                         reverselookupMtime = reverselookupMtimeAct
126                         dom = parse(reverseLookupFileName)
127                         for top in dom.getElementsByTagName("reverselookup"):
128                                 for country in top.getElementsByTagName("country"):
129                                         code = country.getAttribute("code").replace("+","00")
130                                         countries[code] = country.getElementsByTagName("website")
131
132                 self.countrycode = countrycode
133
134                 if re.match('^\+', self.number):
135                         self.number = '00' + self.number[1:]
136
137                 if self.number[:len(countrycode)] == countrycode:
138                         self.number = '0' + self.number[len(countrycode):]
139
140                 if number[0] != "0":
141                         self.notifyAndReset()
142                         return
143
144                 if self.number[:2] == "00":
145                         if countries.has_key(self.number[:3]):   #      e.g. USA
146                                 self.countrycode = self.number[:3]
147                         elif countries.has_key(self.number[:4]):
148                                 self.countrycode = self.number[:4]
149                         elif countries.has_key(self.number[:5]):
150                                 self.countrycode = self.number[:5]
151                         else:
152                                 debug("[ReverseLookupAndNotify] Country cannot be reverse handled")
153                                 self.notifyAndReset()
154                                 return
155
156                 if countries.has_key(self.countrycode):
157                         debug("[ReverseLookupAndNotify] Found website for reverse lookup")
158                         self.websites = countries[self.countrycode]
159                         self.nextWebsiteNo = 1
160                         self.handleWebsite(self.websites[0])
161                 else:
162                         debug("[ReverseLookupAndNotify] Country cannot be reverse handled")
163                         self.notifyAndReset()
164                         return
165
166         def handleWebsite(self, website):
167                 debug("[ReverseLookupAndNotify] handleWebsite: " + website.getAttribute("name"))
168                 if self.number[:2] == "00":
169                         number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
170                 else:
171                         number = self.number
172
173                 url = website.getAttribute("url")
174                 if re.search('$AREACODE', url) or re.search('$PFXAREACODE', url):
175                         debug("[ReverseLookupAndNotify] handleWebsite: (PFX)ARECODE cannot be handled")
176                         # self.caller = _("UNKNOWN")
177                         self.notifyAndReset()
178                         return
179                 #
180                 # Apparently, there is no attribute called (pfx)areacode anymore
181                 # So, this below will not work.
182                 #
183                 if re.search('\\$AREACODE', url) and website.hasAttribute("areacode"):
184                         areaCodeLen = int(website.getAttribute("areacode"))
185                         url = url.replace("$AREACODE", number[:areaCodeLen]).replace("$NUMBER", number[areaCodeLen:])
186                 elif re.search('\\$PFXAREACODE', url) and website.hasAttribute("pfxareacode"):
187                         areaCodeLen = int(website.getAttribute("pfxareacode"))
188                         url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER", "%(number)s")
189                         url = url % { 'pfxareacode': number[:areaCodeLen], 'number': number[areaCodeLen:] }
190                 elif re.search('\\$NUMBER', url): 
191                         url = url.replace("$NUMBER","%s") %number
192                 else:
193                         debug("[ReverseLookupAndNotify] handleWebsite: cannot handle websites with no $NUMBER in url")
194                         # self.caller = _("UNKNOWN")
195                         self.notifyAndReset()
196                         return
197                 debug("[ReverseLookupAndNotify] Url to query: " + url)
198                 url = url.encode("UTF-8", "replace")
199                 self.currentWebsite = website
200                 getPage(url,
201                         agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
202                         ).addCallback(self._gotPage).addErrback(self._gotError)
203
204
205         def _gotPage(self, page):
206                 def cleanName(text):
207                         item = text.replace("%20"," ").replace("&nbsp;"," ").replace("</b>","").replace(","," ").replace('\n',' ').replace('\t',' ')
208
209                         item = html2unicode(item, self.charset)
210                         #===================================================================
211                         # try: # this works under Windows
212                         #       item = item.encode('iso-8859-1')
213                         # except UnicodeEncodeError:
214                         #       debug("[ReverseLookupAndNotify] cleanName: encoding problem with iso8859")
215                         #       try: # this works under Enigma2
216                         #               item = item.encode('utf-8')
217                         #       except UnicodeEncodeError:
218                         #               debug("[ReverseLookupAndNotify] cleanName: encoding problem with utf-8")
219                         #               try: # fall back
220                         #                       item = item.encode(self.charset)
221                         #               except UnicodeEncodeError:
222                         #                       # debug("[ReverseLookupAndNotify] cleanName: " + traceback.format_exc())
223                         #                       debug("[ReverseLookupAndNotify] cleanName: encoding problem")
224                         #===================================================================
225
226                         newitem = item.replace("  ", " ")
227                         while newitem != item:
228                                 item = newitem
229                                 newitem = item.replace("  ", " ")
230                         return newitem.strip()
231         
232                 debug("[ReverseLookupAndNotify] _gotPage")
233                 found = re.match('.*<meta http-equiv="Content-Type" content="(?:application/xhtml\+xml|text/html); charset=([^"]+)" />', page, re.S)
234                 if found:
235                         debug("[ReverseLookupAndNotify] Charset: " + found.group(1))
236                         page = page.replace("\xa0"," ").decode(found.group(1), "replace")
237                 else:
238                         debug("[ReverseLookupAndNotify] Default Charset: iso-8859-1")
239                         page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
240
241                 for entry in self.currentWebsite.getElementsByTagName("entry"):
242                         #
243                         # for the sites delivering fuzzy matches, we check against the returned number
244                         #
245                         pat = self.getPattern(entry, "number")
246                         if pat:
247                                 pat = ".*?" + pat
248                                 debug("[ReverseLookupAndNotify] _gotPage: look for number with '''%s'''" %( pat ))
249                                 found = re.match(pat, page, re.S|re.M)
250                                 if found:
251                                         if self.number[:2] == '00':
252                                                 number = '0' + self.number[4:]
253                                         else:
254                                                 number = self.number
255                                         if number != normalizePhoneNumber(found.group(1)):
256                                                 debug("[ReverseLookupAndNotify] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1), self.number))
257                                                 continue
258                         
259                         # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
260                         name = ''
261                         firstname = ''
262                         street = ''
263                         streetno = ''
264                         city = ''
265                         zipcode = ''
266                         pat = self.getPattern(entry, "lastname")
267                         if pat:
268                                 pat = ".*?" + pat
269                                 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
270                                 found = re.match(pat, page, re.S|re.M)
271                                 if found:
272                                         debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "lastname", found.group(1)))
273                                         name = cleanName(found.group(1))
274
275                                         pat = self.getPattern(entry, "firstname")
276                                         if pat:
277                                                 pat = ".*?" + pat
278                                                 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
279                                                 found = re.match(pat, page, re.S|re.M)
280                                                 if found:
281                                                         debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "firstname", found.group(1)))
282                                                 firstname = cleanName(found.group(1)).strip()
283
284                         else:
285                                 pat = ".*?" + self.getPattern(entry, "name")
286                                 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
287                                 found = re.match(pat, page, re.S|re.M)
288                                 if found:
289                                         debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "name", found.group(1)))
290                                         item = cleanName(found.group(1))
291                                         # debug("[ReverseLookupAndNotify] _gotPage: name: " + item)
292                                         name = item.strip()
293                                         firstNameFirst = entry.getElementsByTagName('name')[0].getAttribute('swapFirstAndLastName')
294                                         # debug("[ReverseLookupAndNotify] _gotPage: swapFirstAndLastName: " + firstNameFirst)
295                                         if firstNameFirst == 'true': # that means, the name is of the form "firstname lastname"
296                                                 found = re.match('(.*?)\s+(.*)', name)
297                                                 if found:
298                                                         firstname = found.group(1)
299                                                         name = found.group(2)
300                                 else:
301                                         debug("[ReverseLookupAndNotify] _gotPage: no name found, skipping")
302                                         continue
303
304                         if not name:
305                                 continue
306
307                         pat = ".*?" + self.getPattern(entry, "city")
308                         debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "city", pat ))
309                         found = re.match(pat, page, re.S|re.M)
310                         if found:
311                                 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "city", found.group(1)))
312                                 item = cleanName(found.group(1))
313                                 debug("[ReverseLookupAndNotify] _gotPage: city: " + item)
314                                 city = item.strip()
315
316                         if not city:
317                                 continue
318
319                         pat = ".*?" + self.getPattern(entry, "zipcode")
320                         debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "zipcode", pat ))
321                         found = re.match(pat, page, re.S|re.M)
322                         if found and found.group(1):
323                                 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "zipcode", found.group(1)))
324                                 item = cleanName(found.group(1))
325                                 debug("[ReverseLookupAndNotify] _gotPage: zipcode: " + item)
326                                 zipcode = item.strip()
327
328                         pat = ".*?" + self.getPattern(entry, "street")
329                         debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "street", pat ))
330                         found = re.match(pat, page, re.S|re.M)
331                         if found and found.group(1):
332                                 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "street", found.group(1)))
333                                 item = cleanName(found.group(1))
334                                 debug("[ReverseLookupAndNotify] _gotPage: street: " + item)
335                                 street = item.strip()
336                                 streetno = ''
337                                 found = re.match("^(.+) ([-\d]+)$", street, re.S)
338                                 if found:
339                                         street = found.group(1)
340                                         streetno = found.group(2)
341                                 #===============================================================
342                                 # else:
343                                 #       found = re.match("^(\d+) (.+)$", street, re.S)
344                                 #       if found:
345                                 #               street = found.group(2)
346                                 #               streetno = found.group(1)
347                                 #===============================================================
348
349                         self.caller = "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" % ( name, firstname, street, streetno, zipcode, city )
350                         debug("[ReverseLookupAndNotify] _gotPage: Reverse lookup succeeded:\nName: %s" %(self.caller))
351
352                         self.notifyAndReset()
353                         return True
354                 else:
355                         self._gotError("[ReverseLookupAndNotify] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
356                         return False
357                         
358         def _gotError(self, error = ""):
359                 debug("[ReverseLookupAndNotify] _gotError - Error: %s" %error)
360                 if self.nextWebsiteNo >= len(self.websites):
361                         debug("[ReverseLookupAndNotify] _gotError: I give up")
362                         # self.caller = _("UNKNOWN")
363                         self.notifyAndReset()
364                         return
365                 else:
366                         debug("[ReverseLookupAndNotify] _gotError: try next website")
367                         self.nextWebsiteNo = self.nextWebsiteNo+1
368                         self.handleWebsite(self.websites[self.nextWebsiteNo-1])
369
370         def getPattern(self, website, which):
371                 pat1 = website.getElementsByTagName(which)
372                 if len(pat1) == 0:
373                         return ''
374                 else:
375                         if len(pat1) > 1:
376                                 debug("[ReverseLookupAndNotify] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
377                         return pat1[0].childNodes[0].data
378
379         def notifyAndReset(self):
380                 debug("[ReverseLookupAndNotify] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
381                 # debug("1: " + repr(self.caller))
382                 if self.caller:
383                         try:
384                                 debug("2: " + repr(self.caller))
385                                 self.caller = self.caller.encode(self.charset, 'replace')
386                                 debug("3: " + repr(self.caller))
387                         except UnicodeDecodeError:
388                                 debug("[ReverseLookupAndNotify] cannot encode?!?!")
389                         # self.caller = unicode(self.caller)
390                         # debug("4: " + repr(self.caller))
391                         self.notificationCallback(self.number, self.caller)
392                 else:
393                         self.notificationCallback(self.number, "")
394                 if __name__ == '__main__':
395                         reactor.stop() #@UndefinedVariable # pylint: disable-msg=E1101
396
397 if __name__ == '__main__':
398         cwd = os.path.dirname(sys.argv[0])
399         if (len(sys.argv) == 2):
400                 # nrzuname.py Nummer
401                 ReverseLookupAndNotify(sys.argv[1], simpleout)
402                 reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101
403         elif (len(sys.argv) == 3):
404                 # nrzuname.py Nummer Charset
405                 setDebug(False)
406                 ReverseLookupAndNotify(sys.argv[1], out, sys.argv[2])
407                 reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101