Simplify unnecessarily complicated Code in stripper from last commit,
[enigma2-plugins.git] / simplerss / src / TagStrip.py
1 # -*- coding: utf-8 -*-
2
3 import re
4
5 # Why no sgmllib ?!
6
7 class TagStrip():
8         convertables = {
9                 "ä": u"",
10                 "ä": u"",
11                 "ü": u"",
12                 "ü": u"",
13                 "ö": u"",
14                 "ö": u"",
15                 "Ä": u"",
16                 "Ä": u"",
17                 "Ü": u"",
18                 "Ü": u"",
19                 "Ö": u"",
20                 "Ö": u"",
21                 "ß": u"",
22                 "ß": u"",
23                 "&": u"&",
24                 "…": u"...",
25                 "–": u"-",
26                 " ": u" ",
27     
28                 "&lt;": u"<",
29                 "&gt;": u">",
30                 "&nbsp;": u" ",
31                 "&amp;": u"&",
32                 "&quot;": u"\"",
33         }
34
35         def strip(self, html):
36                 # Strip enclosed tags
37                 html = re.sub('<(.*?)>', '', html)
38
39                 # Convert htmlspecialchars
40                 for escaped, unescaped in self.convertables.iteritems():
41                         html = html.replace(escaped, unescaped)
42
43                 return html