diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-11-23 18:26:33 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-11-23 18:26:33 -0800 |
commit | df500700f5a0975e0bfa53552fcb071e9686cb2b (patch) | |
tree | 7a8a487b1cb7e140d4fffc71b688b1684edee22d | |
parent | 938ee9f92bf15816247ac7180d432ba91a99eb69 (diff) |
Better fix for #380.
-rw-r--r-- | test/normalize.py | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/test/normalize.py b/test/normalize.py index 2bce2cc..6eb4ec2 100644 --- a/test/normalize.py +++ b/test/normalize.py @@ -1,5 +1,14 @@ # -*- coding: utf-8 -*- from html.parser import HTMLParser + +try: + from html.parser import HTMLParseError +except ImportError: + # HTMLParseError was removed in Python 3.5. It could never be + # thrown, so we define a placeholder instead. + class HTMLParseError(Exception): + pass + from html.entities import name2codepoint import sys import re @@ -12,6 +21,7 @@ whitespace_re = re.compile('\s+') class MyHTMLParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) + self.convert_charrefs = False self.last = "starttag" self.in_pre = False self.output = "" @@ -175,6 +185,6 @@ def normalize_html(html): parser.feed(chunk.group(0)) parser.close() return parser.output - except Exception as e: + except HTMLParseError as e: sys.stderr.write("Normalization error: " + e.msg + "\n") return html # on error, return unnormalized HTML |