|
@@ -4,18 +4,9 @@ import re
|
|
|
import unicodedata
|
|
|
from binascii import Error as BinasciiError
|
|
|
from email.utils import formatdate
|
|
|
-from urllib.parse import (
|
|
|
- ParseResult,
|
|
|
- SplitResult,
|
|
|
- _coerce_args,
|
|
|
- _splitnetloc,
|
|
|
- _splitparams,
|
|
|
- quote,
|
|
|
- scheme_chars,
|
|
|
- unquote,
|
|
|
-)
|
|
|
+from urllib.parse import quote, unquote
|
|
|
from urllib.parse import urlencode as original_urlencode
|
|
|
-from urllib.parse import uses_params
|
|
|
+from urllib.parse import urlparse
|
|
|
|
|
|
from django.utils.datastructures import MultiValueDict
|
|
|
from django.utils.regex_helper import _lazy_re_compile
|
|
@@ -47,10 +38,6 @@ ASCTIME_DATE = _lazy_re_compile(r"^\w{3} %s %s %s %s$" % (__M, __D2, __T, __Y))
|
|
|
RFC3986_GENDELIMS = ":/?#[]@"
|
|
|
RFC3986_SUBDELIMS = "!$&'()*+,;="
|
|
|
|
|
|
-# TODO: Remove when dropping support for PY38.
|
|
|
-# Unsafe bytes to be removed per WHATWG spec.
|
|
|
-_UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"]
|
|
|
-
|
|
|
|
|
|
def urlencode(query, doseq=False):
|
|
|
"""
|
|
@@ -283,74 +270,13 @@ def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
|
|
|
)
|
|
|
|
|
|
|
|
|
-# TODO: Remove when dropping support for PY38.
|
|
|
-# Copied from urllib.parse.urlparse() but uses fixed urlsplit() function.
|
|
|
-def _urlparse(url, scheme="", allow_fragments=True):
|
|
|
- """Parse a URL into 6 components:
|
|
|
- <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
|
|
|
- Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
|
|
|
- Note that we don't break the components up in smaller bits
|
|
|
- (e.g. netloc is a single string) and we don't expand % escapes."""
|
|
|
- url, scheme, _coerce_result = _coerce_args(url, scheme)
|
|
|
- splitresult = _urlsplit(url, scheme, allow_fragments)
|
|
|
- scheme, netloc, url, query, fragment = splitresult
|
|
|
- if scheme in uses_params and ";" in url:
|
|
|
- url, params = _splitparams(url)
|
|
|
- else:
|
|
|
- params = ""
|
|
|
- result = ParseResult(scheme, netloc, url, params, query, fragment)
|
|
|
- return _coerce_result(result)
|
|
|
-
|
|
|
-
|
|
|
-# TODO: Remove when dropping support for PY38.
|
|
|
-def _remove_unsafe_bytes_from_url(url):
|
|
|
- for b in _UNSAFE_URL_BYTES_TO_REMOVE:
|
|
|
- url = url.replace(b, "")
|
|
|
- return url
|
|
|
-
|
|
|
-
|
|
|
-# TODO: Remove when dropping support for PY38.
|
|
|
-# Backport of urllib.parse.urlsplit() from Python 3.9.
|
|
|
-def _urlsplit(url, scheme="", allow_fragments=True):
|
|
|
- """Parse a URL into 5 components:
|
|
|
- <scheme>://<netloc>/<path>?<query>#<fragment>
|
|
|
- Return a 5-tuple: (scheme, netloc, path, query, fragment).
|
|
|
- Note that we don't break the components up in smaller bits
|
|
|
- (e.g. netloc is a single string) and we don't expand % escapes."""
|
|
|
- url, scheme, _coerce_result = _coerce_args(url, scheme)
|
|
|
- url = _remove_unsafe_bytes_from_url(url)
|
|
|
- scheme = _remove_unsafe_bytes_from_url(scheme)
|
|
|
-
|
|
|
- netloc = query = fragment = ""
|
|
|
- i = url.find(":")
|
|
|
- if i > 0:
|
|
|
- for c in url[:i]:
|
|
|
- if c not in scheme_chars:
|
|
|
- break
|
|
|
- else:
|
|
|
- scheme, url = url[:i].lower(), url[i + 1 :]
|
|
|
-
|
|
|
- if url[:2] == "//":
|
|
|
- netloc, url = _splitnetloc(url, 2)
|
|
|
- if ("[" in netloc and "]" not in netloc) or (
|
|
|
- "]" in netloc and "[" not in netloc
|
|
|
- ):
|
|
|
- raise ValueError("Invalid IPv6 URL")
|
|
|
- if allow_fragments and "#" in url:
|
|
|
- url, fragment = url.split("#", 1)
|
|
|
- if "?" in url:
|
|
|
- url, query = url.split("?", 1)
|
|
|
- v = SplitResult(scheme, netloc, url, query, fragment)
|
|
|
- return _coerce_result(v)
|
|
|
-
|
|
|
-
|
|
|
def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
|
|
|
# Chrome considers any URL with more than two slashes to be absolute, but
|
|
|
# urlparse is not so flexible. Treat any url with three slashes as unsafe.
|
|
|
if url.startswith("///"):
|
|
|
return False
|
|
|
try:
|
|
|
- url_info = _urlparse(url)
|
|
|
+ url_info = urlparse(url)
|
|
|
except ValueError: # e.g. invalid IPv6 addresses
|
|
|
return False
|
|
|
# Forbid URLs like http:///example.com - with a scheme, but without a hostname.
|