Browse Source

Fixed #18456 -- Added path escaping to HttpRequest.get_full_path().

Unai Zalakain 10 years ago
parent
commit
c548c8d0d1

+ 4 - 2
django/http/request.py

@@ -15,7 +15,9 @@ from django.core.files import uploadhandler
 from django.http.multipartparser import MultiPartParser, MultiPartParserError
 from django.utils import six
 from django.utils.datastructures import MultiValueDict, ImmutableList
-from django.utils.encoding import force_bytes, force_text, force_str, iri_to_uri
+from django.utils.encoding import (
+    force_bytes, force_text, force_str, escape_uri_path, iri_to_uri,
+)
 from django.utils.six.moves.urllib.parse import parse_qsl, urlencode, quote, urljoin, urlsplit
 
 
@@ -98,7 +100,7 @@ class HttpRequest(object):
         # RFC 3986 requires query string arguments to be in the ASCII range.
         # Rather than crash if this doesn't happen, we encode defensively.
         return '%s%s' % (
-            self.path,
+            escape_uri_path(self.path),
             ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) if self.META.get('QUERY_STRING', '') else ''
         )
 

+ 17 - 0
django/utils/encoding.py

@@ -226,6 +226,23 @@ def uri_to_iri(uri):
     return repercent_broken_unicode(iri).decode('utf-8')
 
 
+def escape_uri_path(path):
+    """
+    Escape the unsafe characters from the path portion of a Uniform Resource
+    Identifier (URI).
+    """
+    # These are the "reserved" and "unreserved" characters specified in
+    # sections 2.2 and 2.3 of RFC 2396:
+    #   reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
+    #   unreserved  = alphanum | mark
+    #   mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
+    # The list of safe characters here is constructed substracting ";", "=",
+    # and "?" according to section 3.3 of RFC 2396.
+    # The reason for not subtracting and escaping "/" is that we are escaping
+    # the entire path, not a path segment.
+    return quote(force_bytes(path), safe=b"/:@&+$,-_.!~*'()")
+
+
 def repercent_broken_unicode(path):
     """
     As per section 3.2 of RFC 3987, step three of converting a URI into an IRI,

+ 7 - 0
docs/ref/utils.txt

@@ -298,6 +298,13 @@ The functions defined in this module share the following properties:
 
     Returns an ASCII string containing the encoded result.
 
+.. function:: escape_uri_path(path)
+
+    .. versionadded:: 1.8
+
+    Escapes the unsafe characters from the path portion of a Uniform Resource
+    Identifier (URI).
+
 ``django.utils.feedgenerator``
 ==============================
 

+ 4 - 0
docs/releases/1.8.txt

@@ -381,6 +381,10 @@ Requests and Responses
 * ``WSGIRequestHandler`` now follows RFC in converting URI to IRI, using
   ``uri_to_iri()``.
 
+* The :meth:`HttpRequest.get_full_path()
+  <django.http.HttpRequest.get_full_path>` method now escapes unsafe characters
+  from the path portion of a Uniform Resource Identifier (URI) properly.
+
 Tests
 ^^^^^
 

+ 13 - 0
tests/requests/tests.py

@@ -35,6 +35,19 @@ class RequestsTests(SimpleTestCase):
         # and FILES should be MultiValueDict
         self.assertEqual(request.FILES.getlist('foo'), [])
 
+    def test_httprequest_full_path(self):
+        request = HttpRequest()
+        request.path = request.path_info = '/;some/?awful/=path/foo:bar/'
+        request.META['QUERY_STRING'] = ';some=query&+query=string'
+        expected = '/%3Bsome/%3Fawful/%3Dpath/foo:bar/?;some=query&+query=string'
+        self.assertEqual(request.get_full_path(), expected)
+
+    def test_httprequest_full_path_with_query_string_and_fragment(self):
+        request = HttpRequest()
+        request.path = request.path_info = '/foo#bar'
+        request.META['QUERY_STRING'] = 'baz#quux'
+        self.assertEqual(request.get_full_path(), '/foo%23bar?baz#quux')
+
     def test_httprequest_repr(self):
         request = HttpRequest()
         request.path = '/somepath/'

+ 12 - 2
tests/utils_tests/test_encoding.py

@@ -5,8 +5,10 @@ import unittest
 import datetime
 
 from django.utils import six
-from django.utils.encoding import (filepath_to_uri, force_bytes, force_text,
-    iri_to_uri, uri_to_iri)
+from django.utils.encoding import (
+    filepath_to_uri, force_bytes, force_text, escape_uri_path,
+    iri_to_uri, uri_to_iri,
+)
 from django.utils.http import urlquote_plus
 
 
@@ -40,6 +42,14 @@ class TestEncodingUtils(unittest.TestCase):
         today = datetime.date.today()
         self.assertEqual(force_bytes(today, strings_only=True), today)
 
+    def test_escape_uri_path(self):
+        self.assertEqual(
+            escape_uri_path('/;some/=awful/?path/:with/@lots/&of/+awful/chars'),
+            '/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars'
+        )
+        self.assertEqual(escape_uri_path('/foo#bar'), '/foo%23bar')
+        self.assertEqual(escape_uri_path('/foo?bar'), '/foo%3Fbar')
+
 
 class TestRFC3987IEncodingUtils(unittest.TestCase):