diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-12-22 23:38:56 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-12-22 23:40:17 -0700 |
commit | a8d97a098742413d0ffdc3602d1798df6e4f00a1 (patch) | |
tree | f72b4b33e8ff140670f1d07920d5c3f0848df107 /test | |
parent | 1ba48229420ac28152f87d27a77d0980d79ff1a9 (diff) |
Fixed normalization bug, added more doctests for normalization.
* The tests test for removal of whitespace around block-level tags.
* Previously whitespace wasn't removed before an initial block-level
tag; this commit fixes that.
* Also revised wording so it's clear that whitespace is removed
on both sides of block-level tags.
Closes #246 in a slightly different way.
Diffstat (limited to 'test')
-rw-r--r-- | test/normalize.py | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/test/normalize.py b/test/normalize.py index 5b4803b..894a837 100644 --- a/test/normalize.py +++ b/test/normalize.py @@ -32,7 +32,7 @@ class MyHTMLParser(HTMLParser): def handle_endtag(self, tag): if tag == "pre": self.in_pre = False - if self.is_block_tag(tag): + elif self.is_block_tag(tag): self.output = self.output.rstrip() self.output += "</" + tag + ">" self.last_tag = tag @@ -40,6 +40,8 @@ class MyHTMLParser(HTMLParser): def handle_starttag(self, tag, attrs): if tag == "pre": self.in_pre = True + if self.is_block_tag(tag): + self.output = self.output.rstrip() self.output += "<" + tag # For now we don't strip out 'extra' attributes, because of # raw HTML test cases. @@ -125,11 +127,23 @@ def normalize_html(html): >>> normalize_html("<p>a \t\nb</p>") u'<p>a b</p>' - * Outer whitespace (outside block-level tags) is removed. + * Whitespace surrounding block-level tags is removed. + + >>> normalize_html("<p>a b</p>") + u'<p>a b</p>' + + >>> normalize_html(" <p>a b</p>") + u'<p>a b</p>' - >>> normalize_html("<p>a b</p> ") + >>> normalize_html("<p>a b</p> ") u'<p>a b</p>' + >>> normalize_html("\n\t<p>\n\t\ta b\t\t</p>\n\t") + u'<p>a b</p>' + + >>> normalize_html("<i>a b</i> ") + u'<i>a b</i> ' + * Self-closing tags are converted to open tags. >>> normalize_html("<br />") |