aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-11-23 18:26:33 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2015-11-23 18:26:33 -0800
commitdf500700f5a0975e0bfa53552fcb071e9686cb2b (patch)
tree7a8a487b1cb7e140d4fffc71b688b1684edee22d
parent938ee9f92bf15816247ac7180d432ba91a99eb69 (diff)
Better fix for #380.
-rw-r--r--test/normalize.py12
1 files changed, 11 insertions, 1 deletions
diff --git a/test/normalize.py b/test/normalize.py
index 2bce2cc..6eb4ec2 100644
--- a/test/normalize.py
+++ b/test/normalize.py
@@ -1,5 +1,14 @@
# -*- coding: utf-8 -*-
from html.parser import HTMLParser
+
+try:
+ from html.parser import HTMLParseError
+except ImportError:
+ # HTMLParseError was removed in Python 3.5. It could never be
+ # thrown, so we define a placeholder instead.
+ class HTMLParseError(Exception):
+ pass
+
from html.entities import name2codepoint
import sys
import re
@@ -12,6 +21,7 @@ whitespace_re = re.compile('\s+')
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
+ self.convert_charrefs = False
self.last = "starttag"
self.in_pre = False
self.output = ""
@@ -175,6 +185,6 @@ def normalize_html(html):
parser.feed(chunk.group(0))
parser.close()
return parser.output
- except Exception as e:
+ except HTMLParseError as e:
sys.stderr.write("Normalization error: " + e.msg + "\n")
return html # on error, return unnormalized HTML