Browse Source

Refs #30399 -- Made assertHTMLEqual normalize character and entity references.

Jon Dufresne 5 years ago
parent
commit
48235ba807
4 changed files with 40 additions and 11 deletions
  1. 6 9
      django/test/html.py
  2. 5 0
      docs/releases/3.0.txt
  3. 4 2
      docs/topics/testing/tools.txt
  4. 25 0
      tests/test_utils/tests.py

+ 6 - 9
django/test/html.py

@@ -3,11 +3,14 @@
 import re
 from html.parser import HTMLParser
 
-WHITESPACE = re.compile(r'\s+')
+# ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020
+# SPACE.
+# https://infra.spec.whatwg.org/#ascii-whitespace
+ASCII_WHITESPACE = re.compile(r'[\t\n\f\r ]+')
 
 
 def normalize_whitespace(string):
-    return WHITESPACE.sub(' ', string)
+    return ASCII_WHITESPACE.sub(' ', string)
 
 
 class Element:
@@ -144,7 +147,7 @@ class Parser(HTMLParser):
     )
 
     def __init__(self):
-        super().__init__(convert_charrefs=False)
+        super().__init__()
         self.root = RootElement()
         self.open_tags = []
         self.element_positions = {}
@@ -202,12 +205,6 @@ class Parser(HTMLParser):
     def handle_data(self, data):
         self.current.append(data)
 
-    def handle_charref(self, name):
-        self.current.append('&%s;' % name)
-
-    def handle_entityref(self, name):
-        self.current.append('&%s;' % name)
-
 
 def parse_html(html):
     """

+ 5 - 0
docs/releases/3.0.txt

@@ -246,6 +246,11 @@ Tests
 * Tests and test cases to run can be selected by test name pattern using the
   new :option:`test -k` option.
 
+* HTML comparison, as used by
+  :meth:`~django.test.SimpleTestCase.assertHTMLEqual`, now treats text, character
+  references, and entity references that refer to the same character as
+  equivalent.
+
 URLs
 ~~~~
 

+ 4 - 2
docs/topics/testing/tools.txt

@@ -1603,14 +1603,16 @@ your test suite.
     * The ordering of attributes of an HTML element is not significant.
     * Attributes without an argument are equal to attributes that equal in
       name and value (see the examples).
+    * Text, character references, and entity references that refer to the same
+      character are equivalent.
 
     The following examples are valid tests and don't raise any
     ``AssertionError``::
 
         self.assertHTMLEqual(
-            '<p>Hello <b>world!</p>',
+            '<p>Hello <b>&#x27;world&#x27;!</p>',
             '''<p>
-                Hello   <b>world! </b>
+                Hello   <b>&#39;world&#39;! </b>
             </p>'''
         )
         self.assertHTMLEqual(

+ 25 - 0
tests/test_utils/tests.py

@@ -612,6 +612,31 @@ class HTMLEqualTests(SimpleTestCase):
             '<input type="text" id="id_name" />',
             '<input type="password" id="id_name" />')
 
+    def test_normalize_refs(self):
+        pairs = [
+            ('&#39;', '&#x27;'),
+            ('&#39;', "'"),
+            ('&#x27;', '&#39;'),
+            ('&#x27;', "'"),
+            ("'", '&#39;'),
+            ("'", '&#x27;'),
+            ('&amp;', '&#38;'),
+            ('&amp;', '&#x26;'),
+            ('&amp;', '&'),
+            ('&#38;', '&amp;'),
+            ('&#38;', '&#x26;'),
+            ('&#38;', '&'),
+            ('&#x26;', '&amp;'),
+            ('&#x26;', '&#38;'),
+            ('&#x26;', '&'),
+            ('&', '&amp;'),
+            ('&', '&#38;'),
+            ('&', '&#x26;'),
+        ]
+        for pair in pairs:
+            with self.subTest(repr(pair)):
+                self.assertHTMLEqual(*pair)
+
     def test_complex_examples(self):
         self.assertHTMLEqual(
             """<tr><th><label for="id_first_name">First name:</label></th>