back to older revision. This was buggy with FBF fonbuch
[enigma2-plugins.git] / fritzcall / src / nrzuname.py
1 #!/usr/bin/python
2 # -*- coding: ISO-8859-1 -*-
3 # $Id$
4 # $Author$
5 # $Revision$
6 # $Date$
7
8 import re, sys, os
9 from xml.dom.minidom import parse
10 from twisted.web.client import getPage #@UnresolvedImport
11 from twisted.internet import reactor #@UnresolvedImport
12
13 debug = True
14 def setDebug(what):
15         global debug
16         debug = what
17
18 def myprint(str):
19         if debug:
20                 print str
21
22 def html2utf8(in_html):
23         try:
24                 import htmlentitydefs
25         except ImportError:
26                 try:
27                         return in_html.replace("&", "&").replace("ß", "\9f").replace("ä", "").replace("ö", "").replace("ü", "").replace("Ä", "").replace("Ö", "").replace("Ü", "")
28                 except UnicodeDecodeError:
29                         pass
30         else:
31                 # first convert some WML codes; does not work?!?!
32                 wmldefs = [
33                                 ("ß", ""),
34                                 ("ä", ""),
35                                 ("ö", ""),
36                                 ("ü", ""),
37                                 ("Ä", ""),
38                                 ("Ö", ""),
39                                 ("Ü", "")
40                                 ]
41                 for (a, b)in wmldefs:
42                         try:
43                                 in_html = in_html.replace(a,b)
44                         except UnicodeError:
45                                 pass
46
47                 htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
48                 entitydict = {}
49                 entities = htmlentitynamemask.finditer(in_html)
50                 for x in entities:
51                         entitydict[x.group(1)] = x.group(2)
52                 for key, name in entitydict.items():
53                         try:
54                                 entitydict[key] = htmlentitydefs.name2codepoint[name]
55                         except KeyError:
56                                 myprint("[Callhtml2utf8] KeyError " + key + "/" + name)
57                                 pass
58
59                 htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
60                 entities = htmlentitynumbermask.finditer(in_html)
61                 for x in entities:
62                         entitydict[x.group(1)] = x.group(2)
63                 for key, codepoint in entitydict.items():
64                         try:
65                                 in_html = in_html.replace(key, (unichr(int(codepoint)).encode('utf8', "replace")))
66                         except ValueError:
67                                 myprint("[Callhtml2utf8] ValueError " + key + "/" + str(codepoint))
68                                 pass
69         return in_html
70
71 def out(number, caller):
72         name = vorname = strasse = hnr = plz = ort = ""
73         lines = caller.split(', ')
74         found = re.match("(.+?)\s+(.+)", lines[0])
75         if found:
76                 name = found.group(1)
77                 vorname = found.group(2)
78         else:
79                 name = lines[0]
80         aktuell = 1
81         found = re.match("^(.+) ([-\d]+)$", lines[1], re.S)
82         if found:
83                 strasse = found.group(1)
84                 hnr = found.group(2)
85                 aktuell = 2
86         else:
87                 found = re.match("^(\d+) (.+)$", lines[1], re.S)
88                 if found:
89                         strasse = found.group(2)
90                         hnr = found.group(1)
91                 else:
92                         strasse = lines[1]
93                 aktuell = 2
94         for i in range(aktuell, len(lines)):
95                 found = re.match("(\S+)\s+(.+)", lines[i], re.S)
96                 if found:
97                         plz = found.group(1)
98                         ort = found.group(2)
99                         break
100         else:
101                 ort = lines[aktuell].strip()
102         print "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" %( name,vorname,strasse,hnr,plz,ort )
103
104 def simpleout(number, caller):
105         print caller
106
107 try:
108         from Tools.Directories import resolveFilename, SCOPE_PLUGINS
109         reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/FritzCall/reverselookup.xml")
110 except ImportError:
111         reverseLookupFileName = "reverselookup.xml"
112
113 countries = { }
114 reverselookupMtime = 0
115
116 class ReverseLookupAndNotifier:
117         def __init__(self, number, outputFunction=out, charset="ISO-8859-1", countrycode = "0049"):
118                 myprint("[ReverseLookupAndNotifier] reverse Lookup for %s!" %number)
119                 self.number = number
120                 self.outputFunction = outputFunction
121                 self.charset = charset
122                 self.caller = ""
123                 self.currentWebsite = None
124                 self.nextWebsiteNo = 0
125
126                 global reverselookupMtime
127                 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
128                 if not countries or reverselookupMtimeAct > reverselookupMtime:
129                         myprint("[ReverseLookupAndNotifier] (Re-)Reading %s\n" %reverseLookupFileName)
130                         reverselookupMtime = reverselookupMtimeAct
131                         dom = parse(reverseLookupFileName)
132                         for top in dom.getElementsByTagName("reverselookup"):
133                                 for country in top.getElementsByTagName("country"):
134                                         code = country.getAttribute("code").replace("+","00")
135                                         countries[code] = country.getElementsByTagName("website")
136
137                 self.countrycode = countrycode
138
139                 if number[0] != "0":
140                         # self.caller = _("UNKNOWN")
141                         self.notifyAndReset()
142                         return
143
144                 if self.number[:2] == "00":
145                         if countries.has_key(self.number[:3]):   #      e.g. USA
146                                 self.countrycode = self.number[:3]
147                         elif countries.has_key(self.number[:4]):
148                                 self.countrycode = self.number[:4]
149                         elif countries.has_key(self.number[:5]):
150                                 self.countrycode = self.number[:5]
151                         else:
152                                 myprint("[ReverseLookupAndNotifier] Country cannot be reverse handled")
153                                 # self.caller = _("UNKNOWN")
154                                 self.notifyAndReset()
155                                 return
156
157                 if countries.has_key(self.countrycode):
158                         myprint("[ReverseLookupAndNotifier] Found website for reverse lookup")
159                         self.websites = countries[self.countrycode]
160                         self.nextWebsiteNo = 1
161                         self.handleWebsite(self.websites[0])
162                 else:
163                         myprint("[ReverseLookupAndNotifier] Country cannot be reverse handled")
164                         # self.caller = _("UNKNOWN")
165                         self.notifyAndReset()
166                         return
167
168         def handleWebsite(self, website):
169                 myprint("[ReverseLookupAndNotifier] handleWebsite: " + website.getAttribute("name"))
170                 if self.number[:2] == "00":
171                         number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
172                 else:
173                         number = self.number
174
175                 url = website.getAttribute("url")
176                 if re.search('$AREACODE',url) or re.search('$PFXAREACODE',url):
177                         myprint("[ReverseLookupAndNotifier] handleWebsite: (PFX)ARECODE cannot be handled")
178                         # self.caller = _("UNKNOWN")
179                         self.notifyAndReset()
180                         return
181                 #
182                 # Apparently, there is no attribute called (pfx)areacode anymore
183                 # So, this below will not work.
184                 #
185                 if re.search('\\$AREACODE',url) and website.hasAttribute("areacode"):
186                         areaCodeLen = int(website.getAttribute("areacode"))
187                         url = url.replace("$AREACODE","%(areacode)s").replace("$NUMBER","%(number)s")
188                         url = url %{ 'areacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
189                 elif re.search('\\$PFXAREACODE',url) and website.hasAttribute("pfxareacode"):
190                         areaCodeLen = int(website.getAttribute("pfxareacode"))
191                         url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER","%(number)s")
192                         url = url %{ 'pfxareacode':number[:areaCodeLen], 'number':number[areaCodeLen:] }
193                 elif re.search('\\$NUMBER',url): 
194                         url = url.replace("$NUMBER","%s") %number
195                 else:
196                         myprint("[ReverseLookupAndNotifier] handleWebsite: cannot handle websites with no $NUMBER in url")
197                         # self.caller = _("UNKNOWN")
198                         self.notifyAndReset()
199                         return
200                 myprint("[ReverseLookupAndNotifier] Url to query: " + url)
201                 url = url.encode("UTF-8", "replace")
202                 self.currentWebsite = website
203                 # I am not sure, whether setting the user-agent works this way
204                 getPage(url,
205                         agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
206                         ).addCallback(self._gotPage).addErrback(self._gotError)
207
208         def _gotPage(self, page):
209                 myprint("[ReverseLookupAndNotifier] _gotPage")
210                 found = re.match('.*content=".*?charset=([^"]+)"',page,re.S)
211                 if found:
212                         myprint("[ReverseLookupAndNotifier] Charset: " + found.group(1))
213                         page = page.replace("\xa0"," ").decode(found.group(1), "replace")
214                 else:
215                         page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
216
217                 for entry in self.currentWebsite.getElementsByTagName("entry"):
218                         # myprint("[ReverseLookupAndNotifier] _gotPage: try entry")
219                         details = []
220                         for what in ["name", "street", "city", "zipcode"]:
221                                 # myprint("[ReverseLookupAndNotifier] _gotPage: look for '''%s''' with '''%s'''" %( what, pat ))
222                                 pat = ".*?" + self.getPattern(entry, what)
223                                 found = re.match(pat, page, re.S|re.M)
224                                 if found:
225                                         # myprint("[ReverseLookupAndNotifier] _gotPage: found for '''%s''': '''%s'''" %( what, found.group(2) ))
226                                         myprint(found.group(1))
227                                         item = found.group(1).replace("&nbsp;"," ").replace("</b>","").replace(","," ")
228                                         item = html2utf8(item).decode("ISO-8859-1", "replace")
229                                         newitem = item.replace("  ", " ")
230                                         while newitem != item:
231                                                 item = newitem
232                                                 newitem = item.replace("  ", " ")
233                                         details.append(item.strip())
234                                 else:
235                                         break
236
237                         if len(details) != 4:
238                                 continue
239                         else:
240                                 name = details[0]
241                                 address =  details[1] + ", " + details[3] + " " + details[2]
242                                 myprint("[ReverseLookupAndNotifier] _gotPage: Reverse lookup succeeded:\nName: %s\nAddress: %s" %(name, address))
243                                 self.caller = "%s, %s" %(name, address)
244                                 # if self.number != 0 and config.plugins.Call.addcallers.value and self.event == "RING":
245                                         # phonebook.add(self.number, self.caller)
246
247                                 self.caller = self.caller.encode("UTF-8", "replace")
248                                 self.notifyAndReset()
249                                 return True
250                                 break
251                 else:
252                         self._gotError("[ReverseLookupAndNotifier] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
253                         
254         def _gotError(self, error = ""):
255                 myprint("[ReverseLookupAndNotifier] _gotError - Error: %s" %error)
256                 if self.nextWebsiteNo >= len(self.websites):
257                         myprint("[ReverseLookupAndNotifier] _gotError: I give up")
258                         # self.caller = _("UNKNOWN")
259                         self.notifyAndReset()
260                         return
261                 else:
262                         myprint("[ReverseLookupAndNotifier] _gotError: try next website")
263                         self.nextWebsiteNo = self.nextWebsiteNo+1
264                         self.handleWebsite(self.websites[self.nextWebsiteNo-1])
265
266         def getPattern(self, website, which):
267                 pat1 = website.getElementsByTagName(which)
268                 if len(pat1) > 1:
269                         myprint("Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
270                 return pat1[0].childNodes[0].data
271
272         def notifyAndReset(self):
273                 myprint("[ReverseLookupAndNotifier] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
274                 if self.caller:
275                         self.outputFunction(self.number, self.caller.decode("utf-8").encode(self.charset))
276                 else:
277                         self.outputFunction(self.number, "")
278                 if __name__ == '__main__':
279                         reactor.stop() #@UndefinedVariable
280
281 if __name__ == '__main__':
282         cwd = os.path.dirname(sys.argv[0])
283         if (len(sys.argv) == 2):
284                 # nrzuname.py Nummer
285                 ReverseLookupAndNotifier(sys.argv[1])
286                 reactor.run() #@UndefinedVariable
287         elif (len(sys.argv) == 3):
288                 # nrzuname.py Nummer SimpleOut
289                 debug = False
290                 ReverseLookupAndNotifier(sys.argv[1], simpleout)
291                 reactor.run() #@UndefinedVariable