diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-11-20 08:22:20 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-11-20 08:22:20 -0800 |
commit | c9875cbbbe293e6727a7a25b79e7ea4949ef5670 (patch) | |
tree | 23d9aaec1026d64d117c3dfd2acdeb5ff63a4085 | |
parent | 698dab76847e5d671cce42a0c0ce2c98c5f07776 (diff) |
runtests.py: catch HTMLParser errors in normalizer.
-rwxr-xr-x | runtests.py | 14 |
1 files changed, 9 insertions, 5 deletions
diff --git a/runtests.py b/runtests.py index b3c8d98..83c331d 100755 --- a/runtests.py +++ b/runtests.py @@ -7,7 +7,7 @@ import platform from difflib import unified_diff from subprocess import * import argparse -from HTMLParser import HTMLParser +from HTMLParser import HTMLParser, HTMLParseError from htmlentitydefs import name2codepoint import re import cgi @@ -180,10 +180,14 @@ def normalize_html(html): * HTMLParser just swallows CDATA. * HTMLParser seems to treat unknown declarations as comments. """ - parser = MyHTMLParser() - parser.feed(html.decode(encoding='UTF-8')) - parser.close() - return parser.output + try: + parser = MyHTMLParser() + parser.feed(html.decode(encoding='UTF-8')) + parser.close() + return parser.output + except HTMLParseError as e: + sys.stderr.write("Normalization error: " + e.msg + "\n") + return html # on error, return unnormalized HTML def print_test_header(headertext, example_number, start_line, end_line): print "Example %d (lines %d-%d) %s" % (example_number,start_line,end_line,headertext) |