From df500700f5a0975e0bfa53552fcb071e9686cb2b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 23 Nov 2015 18:26:33 -0800
Subject: Better fix for #380.

---
 test/normalize.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'test')

diff --git a/test/normalize.py b/test/normalize.py
index 2bce2cc..6eb4ec2 100644
--- a/test/normalize.py
+++ b/test/normalize.py
@@ -1,5 +1,14 @@
 # -*- coding: utf-8 -*-
 from html.parser import HTMLParser
+
+try:
+    from html.parser import HTMLParseError
+except ImportError:
+    # HTMLParseError was removed in Python 3.5. It could never be
+    # thrown, so we define a placeholder instead.
+    class HTMLParseError(Exception):
+        pass
+
 from html.entities import name2codepoint
 import sys
 import re
@@ -12,6 +21,7 @@ whitespace_re = re.compile('\s+')
 class MyHTMLParser(HTMLParser):
     def __init__(self):
         HTMLParser.__init__(self)
+        self.convert_charrefs = False
         self.last = "starttag"
         self.in_pre = False
         self.output = ""
@@ -175,6 +185,6 @@ def normalize_html(html):
                 parser.feed(chunk.group(0))
         parser.close()
         return parser.output
-    except Exception as e:
+    except HTMLParseError as e:
         sys.stderr.write("Normalization error: " + e.msg + "\n")
         return html  # on error, return unnormalized HTML
-- 
cgit v1.2.3