Просмотр исходного кода

Replace `urlparse` with `urlsplit`. (#12028)

`urlsplit` is approximately 6x faster.
Jake Howard 9 месяцев назад
Родитель
Сommit
d1a0f4cd29

+ 1 - 0
CHANGELOG.txt

@@ -36,6 +36,7 @@ Changelog
  * Maintenance: Exclude the `client/scss` directory in Tailwind content config to speed up CSS compilation (Sage Abdullah)
  * Maintenance: Split `contrib.frontend_cache.backends` into dedicated sub-modules (Andy Babic)
  * Maintenance: Remove unused `docs/autobuild.sh` script (Sævar Öfjörð Magnússon)
+ * Maintenance: Replace `urlparse` with `urlsplit` to improve performance (Jake Howard)
 
 
 6.1.2 (30.05.2024)

+ 1 - 0
docs/releases/6.2.md

@@ -57,6 +57,7 @@ depth: 1
  * Exclude the `client/scss` directory in Tailwind content config to speed up CSS compilation (Sage Abdullah)
  * Split `contrib.frontend_cache.backends` into dedicated sub-modules (Andy Babic)
  * Remove unused `docs/autobuild.sh` script (Sævar Öfjörð Magnússon)
+ * Replace `urlparse` with `urlsplit` to improve performance (Jake Howard)
 
 
 ## Upgrade considerations - changes affecting all projects

+ 7 - 7
wagtail/admin/tests/test_page_chooser.py

@@ -1,5 +1,5 @@
 import json
-import urllib.parse as urlparse
+from urllib.parse import parse_qs, urlsplit
 
 from django.contrib.auth import get_user_model
 from django.test import TestCase, TransactionTestCase, override_settings
@@ -930,8 +930,8 @@ class TestChooserEmailLink(WagtailTestUtils, TestCase):
         )  # When link text is given, it is used
         self.assertIs(result["prefer_this_title_as_link_text"], True)
 
-        mail_parts = urlparse.urlparse(url)
-        query = urlparse.parse_qs(mail_parts.query)
+        mail_parts = urlsplit(url)
+        query = parse_qs(mail_parts.query)
         self.assertEqual(mail_parts.path, "example@example.com")
         self.assertEqual(query["subject"][0], "Awesome Subject")
         self.assertEqual(query["body"][0], "An example body")
@@ -953,8 +953,8 @@ class TestChooserEmailLink(WagtailTestUtils, TestCase):
         )  # When link text is given, it is used
         self.assertIs(result["prefer_this_title_as_link_text"], True)
 
-        mail_parts = urlparse.urlparse(url)
-        query = urlparse.parse_qs(mail_parts.query)
+        mail_parts = urlsplit(url)
+        query = parse_qs(mail_parts.query)
         self.assertEqual(mail_parts.path, "example@example.com")
         self.assertEqual(query["subject"][0], "Awesome Subject")
         self.assertTrue("body" not in query)
@@ -976,8 +976,8 @@ class TestChooserEmailLink(WagtailTestUtils, TestCase):
         )  # When link text is given, it is used
         self.assertIs(result["prefer_this_title_as_link_text"], True)
 
-        mail_parts = urlparse.urlparse(url)
-        query = urlparse.parse_qs(mail_parts.query)
+        mail_parts = urlsplit(url)
+        query = parse_qs(mail_parts.query)
         self.assertEqual(mail_parts.path, "example@example.com")
         self.assertEqual(query["body"][0], "An example body")
         self.assertTrue("subject" not in query)

+ 5 - 5
wagtail/admin/views/chooser.py

@@ -1,5 +1,5 @@
 import re
-import urllib.parse as urlparse
+from urllib.parse import parse_qs, quote, urlencode, urlsplit
 
 from django.conf import settings
 from django.core.paginator import InvalidPage, Paginator
@@ -748,9 +748,9 @@ class EmailLinkView(BaseLinkFormView):
             "subject": self.form.cleaned_data["subject"],
             "body": self.form.cleaned_data["body"],
         }
-        encoded_params = urlparse.urlencode(
+        encoded_params = urlencode(
             {k: v for k, v in params.items() if v is not None and v != ""},
-            quote_via=urlparse.quote,
+            quote_via=quote,
         )
 
         url = "mailto:" + self.form.cleaned_data["email_address"]
@@ -781,11 +781,11 @@ class EmailLinkView(BaseLinkFormView):
     def parse_email_link(self, mailto):
         result = {}
 
-        mail_result = urlparse.urlparse(mailto)
+        mail_result = urlsplit(mailto)
 
         result["email"] = mail_result.path
 
-        query = urlparse.parse_qs(mail_result.query)
+        query = parse_qs(mail_result.query)
         result["subject"] = query["subject"][0] if "subject" in query else ""
         result["body"] = query["body"][0] if "body" in query else ""
 

+ 2 - 2
wagtail/api/v2/utils.py

@@ -1,4 +1,4 @@
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
 
 from django.conf import settings
 from django.utils.encoding import force_str
@@ -21,7 +21,7 @@ def get_base_url(request=None):
 
     if base_url:
         # We only want the scheme and netloc
-        base_url_parsed = urlparse(force_str(base_url))
+        base_url_parsed = urlsplit(force_str(base_url))
 
         return base_url_parsed.scheme + "://" + base_url_parsed.netloc
 

+ 4 - 5
wagtail/contrib/frontend_cache/backends/http.py

@@ -1,6 +1,6 @@
 import logging
 from urllib.error import HTTPError, URLError
-from urllib.parse import urlparse, urlunparse
+from urllib.parse import urlsplit, urlunsplit
 from urllib.request import Request, urlopen
 
 from wagtail import __version__
@@ -21,12 +21,12 @@ class PurgeRequest(Request):
 class HTTPBackend(BaseBackend):
     def __init__(self, params):
         super().__init__(params)
-        location_url_parsed = urlparse(params.pop("LOCATION"))
+        location_url_parsed = urlsplit(params.pop("LOCATION"))
         self.cache_scheme = location_url_parsed.scheme
         self.cache_netloc = location_url_parsed.netloc
 
     def purge(self, url):
-        url_parsed = urlparse(url)
+        url_parsed = urlsplit(url)
         host = url_parsed.hostname
 
         # Append port to host if it is set in the original URL
@@ -34,12 +34,11 @@ class HTTPBackend(BaseBackend):
             host += ":" + str(url_parsed.port)
 
         request = PurgeRequest(
-            url=urlunparse(
+            url=urlunsplit(
                 [
                     self.cache_scheme,
                     self.cache_netloc,
                     url_parsed.path,
-                    url_parsed.params,
                     url_parsed.query,
                     url_parsed.fragment,
                 ]

+ 4 - 5
wagtail/contrib/frontend_cache/utils.py

@@ -1,7 +1,7 @@
 import logging
 import re
 from collections import defaultdict
-from urllib.parse import urlparse, urlunparse
+from urllib.parse import urlsplit, urlunsplit
 
 from django.conf import settings
 from django.core.exceptions import ImproperlyConfigured
@@ -80,13 +80,12 @@ def purge_urls_from_cache(urls, backend_settings=None, backends=None):
         # Purge the given url for each managed language
         for isocode in languages:
             for url in urls:
-                up = urlparse(url)
-                new_url = urlunparse(
+                up = urlsplit(url)
+                new_url = urlunsplit(
                     (
                         up.scheme,
                         up.netloc,
                         re.sub(langs_regex, "/%s/" % isocode, up.path),
-                        up.params,
                         up.query,
                         up.fragment,
                     )
@@ -104,7 +103,7 @@ def purge_urls_from_cache(urls, backend_settings=None, backends=None):
     urls_by_hostname = defaultdict(list)
 
     for url in urls:
-        urls_by_hostname[urlparse(url).netloc].append(url)
+        urls_by_hostname[urlsplit(url).netloc].append(url)
 
     backends = get_backends(backend_settings, backends)
 

+ 3 - 3
wagtail/contrib/redirects/models.py

@@ -146,7 +146,7 @@ class Redirect(models.Model):
         url_parsed = urlparse(url)
 
         # Path must start with / but not end with /
-        path = url_parsed[2]
+        path = url_parsed.path
         if not path.startswith("/"):
             path = "/" + path
 
@@ -154,12 +154,12 @@ class Redirect(models.Model):
             path = path[:-1]
 
         # Parameters must be sorted alphabetically
-        parameters = url_parsed[3]
+        parameters = url_parsed.params
         parameters_components = parameters.split(";")
         parameters = ";".join(sorted(parameters_components))
 
         # Query string components must be sorted alphabetically
-        query_string = url_parsed[4]
+        query_string = url_parsed.query
         query_string_components = query_string.split("&")
         query_string = "&".join(sorted(query_string_components))
 

+ 2 - 2
wagtail/models/__init__.py

@@ -17,7 +17,7 @@ import posixpath
 import uuid
 from io import StringIO
 from typing import TYPE_CHECKING
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
 from warnings import warn
 
 from django import forms
@@ -702,7 +702,7 @@ class PreviewableMixin:
         """
         url = self._get_dummy_header_url(original_request)
         if url:
-            url_info = urlparse(url)
+            url_info = urlsplit(url)
             hostname = url_info.hostname
             path = url_info.path
             port = url_info.port or (443 if url_info.scheme == "https" else 80)