From df500700f5a0975e0bfa53552fcb071e9686cb2b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 23 Nov 2015 18:26:33 -0800 Subject: Better fix for #380. --- test/normalize.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/test/normalize.py b/test/normalize.py index 2bce2cc..6eb4ec2 100644 --- a/test/normalize.py +++ b/test/normalize.py @@ -1,5 +1,14 @@ # -*- coding: utf-8 -*- from html.parser import HTMLParser + +try: + from html.parser import HTMLParseError +except ImportError: + # HTMLParseError was removed in Python 3.5. It could never be + # thrown, so we define a placeholder instead. + class HTMLParseError(Exception): + pass + from html.entities import name2codepoint import sys import re @@ -12,6 +21,7 @@ whitespace_re = re.compile('\s+') class MyHTMLParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) + self.convert_charrefs = False self.last = "starttag" self.in_pre = False self.output = "" @@ -175,6 +185,6 @@ def normalize_html(html): parser.feed(chunk.group(0)) parser.close() return parser.output - except Exception as e: + except HTMLParseError as e: sys.stderr.write("Normalization error: " + e.msg + "\n") return html # on error, return unnormalized HTML -- cgit v1.2.3