diff options
-rw-r--r-- | test/normalize.py | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/test/normalize.py b/test/normalize.py index 2bce2cc..6eb4ec2 100644 --- a/test/normalize.py +++ b/test/normalize.py @@ -1,5 +1,14 @@ # -*- coding: utf-8 -*- from html.parser import HTMLParser + +try: + from html.parser import HTMLParseError +except ImportError: + # HTMLParseError was removed in Python 3.5. It could never be + # thrown, so we define a placeholder instead. + class HTMLParseError(Exception): + pass + from html.entities import name2codepoint import sys import re @@ -12,6 +21,7 @@ whitespace_re = re.compile('\s+') class MyHTMLParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) + self.convert_charrefs = False self.last = "starttag" self.in_pre = False self.output = "" @@ -175,6 +185,6 @@ def normalize_html(html): parser.feed(chunk.group(0)) parser.close() return parser.output - except Exception as e: + except HTMLParseError as e: sys.stderr.write("Normalization error: " + e.msg + "\n") return html # on error, return unnormalized HTML |