Răsfoiți Sursa

Fixed #36013 -- Removed use of IDNA-2003 in django.utils.html.

Removed obsolete and potentially problematic IDNA 2003 ("punycode")
encoding of international domain names in smart_urlquote() and Urlizer,
which are used (only) by AdminURLFieldWidget and the urlize/urlizetrunc
template filters. Changed to use percent-encoded UTF-8, which defers
IDNA details to the browser (like other URLs rendered by Django).
Mike Edmunds 3 luni în urmă
părinte
comite
29ba75e6e5

+ 1 - 0
AUTHORS

@@ -735,6 +735,7 @@ answer newbie questions, and generally made Django that much better:
     Mihai Preda <mihai_preda@yahoo.com>
     Mikaël Barbero <mikael.barbero nospam at nospam free.fr>
     Mike Axiak <axiak@mit.edu>
+    Mike Edmunds <medmunds@gmail.com>
     Mike Grouchy <https://mikegrouchy.com/>
     Mike Malone <mjmalone@gmail.com>
     Mike Richardson

+ 6 - 10
django/utils/html.py

@@ -9,7 +9,6 @@ from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsp
 
 from django.core.exceptions import SuspiciousOperation, ValidationError
 from django.core.validators import EmailValidator
-from django.utils.encoding import punycode
 from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
 from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
 from django.utils.regex_helper import _lazy_re_compile
@@ -237,17 +236,16 @@ def smart_urlquote(url):
         # see also https://bugs.python.org/issue16285
         return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~")
 
-    # Handle IDN before quoting.
     try:
         scheme, netloc, path, query, fragment = urlsplit(url)
     except ValueError:
         # invalid IPv6 URL (normally square brackets in hostname part).
         return unquote_quote(url)
 
-    try:
-        netloc = punycode(netloc)  # IDN -> ACE
-    except UnicodeError:  # invalid domain part
-        return unquote_quote(url)
+    # Handle IDN as percent-encoded UTF-8 octets, per WHATWG URL Specification
+    # section 3.5 and RFC 3986 section 3.2.2. Defer any IDNA to the user agent.
+    # See #36013.
+    netloc = unquote_quote(netloc)
 
     if query:
         # Separately unquoting key/value, so as to not mix querystring separators
@@ -348,10 +346,8 @@ class Urlizer:
                 url = smart_urlquote("http://%s" % html.unescape(middle))
             elif ":" not in middle and self.is_email_simple(middle):
                 local, domain = middle.rsplit("@", 1)
-                try:
-                    domain = punycode(domain)
-                except UnicodeError:
-                    return word
+                # Encode per RFC 6068 Section 2 (items 1, 4, 5). Defer any IDNA
+                # to the user agent. See #36013.
                 local = quote(local, safe="")
                 domain = quote(domain, safe="")
                 url = self.mailto_template.format(local=local, domain=domain)

+ 5 - 2
tests/admin_widgets/tests.py

@@ -486,11 +486,13 @@ class AdminURLWidgetTest(SimpleTestCase):
         w = widgets.AdminURLFieldWidget()
         self.assertHTMLEqual(
             w.render("test", "http://example-äüö.com"),
-            '<p class="url">Currently: <a href="http://xn--example--7za4pnc.com">'
+            '<p class="url">Currently: <a href="http://example-%C3%A4%C3%BC%C3%B6.com">'
             "http://example-äüö.com</a><br>"
             'Change:<input class="vURLField" name="test" type="url" '
             'value="http://example-äüö.com"></p>',
         )
+        # Does not use obsolete IDNA-2003 encoding (#36013).
+        self.assertNotIn("fass.example.com", w.render("test", "http://faß.example.com"))
 
     def test_render_quoting(self):
         """
@@ -517,7 +519,8 @@ class AdminURLWidgetTest(SimpleTestCase):
         output = w.render("test", "http://example-äüö.com/<sometag>some-text</sometag>")
         self.assertEqual(
             HREF_RE.search(output)[1],
-            "http://xn--example--7za4pnc.com/%3Csometag%3Esome-text%3C/sometag%3E",
+            "http://example-%C3%A4%C3%BC%C3%B6.com/"
+            "%3Csometag%3Esome-text%3C/sometag%3E",
         )
         self.assertEqual(
             TEXT_RE.search(output)[1],

+ 19 - 4
tests/template_tests/filter_tests/test_urlize.py

@@ -229,19 +229,34 @@ class FunctionTests(SimpleTestCase):
         """
         #13704 - Check urlize handles IDN correctly
         """
+        # The "✶" below is \N{SIX POINTED BLACK STAR}, not "*" \N{ASTERISK}.
         self.assertEqual(
             urlize("http://c✶.ws"),
-            '<a href="http://xn--c-lgq.ws" rel="nofollow">http://c✶.ws</a>',
+            '<a href="http://c%E2%9C%B6.ws" rel="nofollow">http://c✶.ws</a>',
         )
         self.assertEqual(
             urlize("www.c✶.ws"),
-            '<a href="http://www.xn--c-lgq.ws" rel="nofollow">www.c✶.ws</a>',
+            '<a href="http://www.c%E2%9C%B6.ws" rel="nofollow">www.c✶.ws</a>',
         )
         self.assertEqual(
-            urlize("c✶.org"), '<a href="http://xn--c-lgq.org" rel="nofollow">c✶.org</a>'
+            urlize("c✶.org"),
+            '<a href="http://c%E2%9C%B6.org" rel="nofollow">c✶.org</a>',
         )
         self.assertEqual(
-            urlize("info@c✶.org"), '<a href="mailto:info@xn--c-lgq.org">info@c✶.org</a>'
+            urlize("info@c✶.org"),
+            '<a href="mailto:info@c%E2%9C%B6.org">info@c✶.org</a>',
+        )
+
+        # Pre-encoded IDNA is urlized but not re-encoded.
+        self.assertEqual(
+            urlize("www.xn--iny-zx5a.com/idna2003"),
+            '<a href="http://www.xn--iny-zx5a.com/idna2003"'
+            ' rel="nofollow">www.xn--iny-zx5a.com/idna2003</a>',
+        )
+        self.assertEqual(
+            urlize("www.xn--fa-hia.com/idna2008"),
+            '<a href="http://www.xn--fa-hia.com/idna2008"'
+            ' rel="nofollow">www.xn--fa-hia.com/idna2008</a>',
         )
 
     def test_malformed(self):

+ 43 - 4
tests/utils_tests/test_html.py

@@ -264,8 +264,26 @@ class TestUtilsHtml(SimpleTestCase):
 
     def test_smart_urlquote(self):
         items = (
-            ("http://öäü.com/", "http://xn--4ca9at.com/"),
-            ("http://öäü.com/öäü/", "http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/"),
+            # IDN is encoded as percent-encoded ("quoted") UTF-8 (#36013).
+            ("http://öäü.com/", "http://%C3%B6%C3%A4%C3%BC.com/"),
+            ("https://faß.example.com", "https://fa%C3%9F.example.com"),
+            (
+                "http://öäü.com/öäü/",
+                "http://%C3%B6%C3%A4%C3%BC.com/%C3%B6%C3%A4%C3%BC/",
+            ),
+            (
+                # Valid under IDNA 2008, but was invalid in IDNA 2003.
+                "https://މިހާރު.com",
+                "https://%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.com",
+            ),
+            (
+                # Valid under WHATWG URL Specification but not IDNA 2008.
+                "http://👓.ws",
+                "http://%F0%9F%91%93.ws",
+            ),
+            # Pre-encoded IDNA is left unchanged.
+            ("http://xn--iny-zx5a.com/idna2003", "http://xn--iny-zx5a.com/idna2003"),
+            ("http://xn--fa-hia.com/idna2008", "http://xn--fa-hia.com/idna2008"),
             # Everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered
             # safe as per RFC.
             (
@@ -287,8 +305,10 @@ class TestUtilsHtml(SimpleTestCase):
                 "django",
             ),
             ("http://.www.f oo.bar/", "http://.www.f%20oo.bar/"),
+            ('http://example.com">', "http://example.com%22%3E"),
+            ("http://10.22.1.1/", "http://10.22.1.1/"),
+            ("http://[fd00::1]/", "http://[fd00::1]/"),
         )
-        # IDNs are properly quoted
         for value, output in items:
             with self.subTest(value=value, output=output):
                 self.assertEqual(smart_urlquote(value), output)
@@ -361,11 +381,21 @@ class TestUtilsHtml(SimpleTestCase):
                 lazystr("Search for google.com/?q=!"),
                 'Search for <a href="http://google.com/?q=">google.com/?q=</a>!',
             ),
+            (
+                "http://www.foo.bar/",
+                '<a href="http://www.foo.bar/">http://www.foo.bar/</a>',
+            ),
+            (
+                "Look on www.نامه‌ای.com.",
+                "Look on <a "
+                'href="http://www.%D9%86%D8%A7%D9%85%D9%87%E2%80%8C%D8%A7%DB%8C.com"'
+                ">www.نامه‌ای.com</a>.",
+            ),
             ("foo@example.com", '<a href="mailto:foo@example.com">foo@example.com</a>'),
             (
                 "test@" + "한.글." * 15 + "aaa",
                 '<a href="mailto:test@'
-                + "xn--6q8b.xn--bj0b." * 15
+                + "%ED%95%9C.%EA%B8%80." * 15
                 + 'aaa">'
                 + "test@"
                 + "한.글." * 15
@@ -378,6 +408,15 @@ class TestUtilsHtml(SimpleTestCase):
                 '<a href="mailto:yes%2Bthis%3Dis%26a%25valid%21email@example.com"'
                 ">yes+this=is&a%valid!email@example.com</a>",
             ),
+            (
+                "foo@faß.example.com",
+                '<a href="mailto:foo@fa%C3%9F.example.com">foo@faß.example.com</a>',
+            ),
+            (
+                "idna-2008@މިހާރު.example.mv",
+                '<a href="mailto:idna-2008@%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.ex'
+                'ample.mv">idna-2008@މިހާރު.example.mv</a>',
+            ),
         )
         for value, output in tests:
             with self.subTest(value=value):