From b28c97c9b8af266d4f12deb5febcf28807d9f5c6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Dec 2014 08:14:13 -0800
Subject: Added a few more doctests for HTML normalization (#245).

---
 test/normalize.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'test')
diff --git a/test/normalize.py b/test/normalize.py
index 4b922e6..5b4803b 100644
--- a/test/normalize.py
+++ b/test/normalize.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from HTMLParser import HTMLParser, HTMLParseError
 from htmlentitydefs import name2codepoint
 import sys
@@ -118,14 +119,33 @@ def normalize_html(html):
     Multiple inner whitespaces are collapsed to a single space (except
     in pre tags):
 
+        >>> normalize_html("<p>a  \t b</p>")
+        u'<p>a b</p>'
+
         >>> normalize_html("<p>a  \t\nb</p>")
         u'<p>a b</p>'
 
     * Outer whitespace (outside block-level tags) is removed.
+
+        >>> normalize_html("<p>a  b</p>  ")
+        u'<p>a b</p>'
+
     * Self-closing tags are converted to open tags.
+
+        >>> normalize_html("<br />")
+        u'<br>'
+
     * Attributes are sorted and lowercased.
+
+        >>> normalize_html('<a title="bar" HREF="foo">x</a>')
+        u'<a href="foo" title="bar">x</a>'
+
     * References are converted to unicode, except that '<', '>', '&', and
-      '&' are rendered using entities.
+      '"' are rendered using entities.
+
+        >>> normalize_html("&forall;&amp;&gt;&lt;&quot;")
+        u'\u2200&amp;&gt;&lt;&quot;'
+
     """
     html_chunk_re = re.compile("(\<!\[CDATA\[.*?\]\]\>|\<[^>]*\>|[^<]+)")
     try:
-- 
cgit v1.2.3