Browse Source

Fixed #27083 -- Added support for weak ETags.

Kevin Christopher Henry 8 years ago
parent
commit
4ef0e019b7

+ 1 - 2
django/middleware/common.py

@@ -10,7 +10,6 @@ from django.utils.cache import (
 )
 from django.utils.deprecation import MiddlewareMixin
 from django.utils.encoding import force_text
-from django.utils.http import unquote_etag
 from django.utils.six.moves.urllib.parse import urlparse
 
 
@@ -122,7 +121,7 @@ class CommonMiddleware(MiddlewareMixin):
             if response.has_header('ETag'):
                 return get_conditional_response(
                     request,
-                    etag=unquote_etag(response['ETag']),
+                    etag=response['ETag'],
                     response=response,
                 )
         # Add the Content-Length header to non-streaming responses if not

+ 2 - 2
django/middleware/http.py

@@ -1,6 +1,6 @@
 from django.utils.cache import get_conditional_response
 from django.utils.deprecation import MiddlewareMixin
-from django.utils.http import http_date, parse_http_date_safe, unquote_etag
+from django.utils.http import http_date, parse_http_date_safe
 
 
 class ConditionalGetMiddleware(MiddlewareMixin):
@@ -24,7 +24,7 @@ class ConditionalGetMiddleware(MiddlewareMixin):
         if etag or last_modified:
             return get_conditional_response(
                 request,
-                etag=unquote_etag(etag),
+                etag=etag,
                 last_modified=last_modified,
                 response=response,
             )

+ 25 - 20
django/utils/http.py

@@ -21,7 +21,15 @@ from django.utils.six.moves.urllib.parse import (
     urlparse,
 )
 
-ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"')
+# based on RFC 7232, Appendix C
+ETAG_MATCH = re.compile(r'''
+    \A(      # start of string and capture group
+    (?:W/)?  # optional weak indicator
+    "        # opening quote
+    [^"]*    # any sequence of non-quote characters
+    "        # end quote
+    )\Z      # end of string and capture group
+''', re.X)
 
 MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split()
 __D = r'(?P<day>\d{2})'
@@ -234,30 +242,27 @@ def urlsafe_base64_decode(s):
 
 def parse_etags(etag_str):
     """
-    Parses a string with one or several etags passed in If-None-Match and
-    If-Match headers by the rules in RFC 2616. Returns a list of etags
-    without surrounding double quotes (") and unescaped from \<CHAR>.
+    Parse a string of ETags given in an If-None-Match or If-Match header as
+    defined by RFC 7232. Return a list of quoted ETags, or ['*'] if all ETags
+    should be matched.
     """
-    etags = ETAG_MATCH.findall(etag_str)
-    if not etags:
-        # etag_str has wrong format, treat it as an opaque string then
-        return [etag_str]
-    etags = [e.encode('ascii').decode('unicode_escape') for e in etags]
-    return etags
-
-
-def quote_etag(etag):
-    """
-    Wraps a string in double quotes escaping contents as necessary.
-    """
-    return '"%s"' % etag.replace('\\', '\\\\').replace('"', '\\"')
+    if etag_str.strip() == '*':
+        return ['*']
+    else:
+        # Parse each ETag individually, and return any that are valid.
+        etag_matches = (ETAG_MATCH.match(etag.strip()) for etag in etag_str.split(','))
+        return [match.group(1) for match in etag_matches if match]
 
 
-def unquote_etag(etag):
+def quote_etag(etag_str):
     """
-    Unquote an ETag string; i.e. revert quote_etag().
+    If the provided string is already a quoted ETag, return it. Otherwise, wrap
+    the string in quotes, making it a strong ETag.
     """
-    return etag.strip('"').replace('\\"', '"').replace('\\\\', '\\') if etag else etag
+    if ETAG_MATCH.match(etag_str):
+        return etag_str
+    else:
+        return '"%s"' % etag_str
 
 
 def is_same_domain(host, pattern):

+ 11 - 9
django/views/decorators/http.py

@@ -62,16 +62,16 @@ def condition(etag_func=None, last_modified_func=None):
     None if the resource doesn't exist), while the last_modified function
     should return a datetime object (or None if the resource doesn't exist).
 
-    If both parameters are provided, all the preconditions must be met before
-    the view is processed.
+    The ETag function should return a complete ETag, including quotes (e.g.
+    '"etag"'), since that's the only way to distinguish between weak and strong
+    ETags. If an unquoted ETag is returned (e.g. 'etag'), it will be converted
+    to a strong ETag by adding quotes.
 
     This decorator will either pass control to the wrapped view function or
-    return an HTTP 304 response (unmodified) or 412 response (preconditions
-    failed), depending upon the request method.
-
-    Any behavior marked as "undefined" in the HTTP spec (e.g. If-none-match
-    plus If-modified-since headers) will result in the view function being
-    called.
+    return an HTTP 304 response (unmodified) or 412 response (precondition
+    failed), depending upon the request method. In either case, it will add the
+    generated ETag and Last-Modified headers to the response if it doesn't
+    already have them.
     """
     def decorator(func):
         @wraps(func, assigned=available_attrs(func))
@@ -83,7 +83,9 @@ def condition(etag_func=None, last_modified_func=None):
                     if dt:
                         return timegm(dt.utctimetuple())
 
+            # The value from etag_func() could be quoted or unquoted.
             res_etag = etag_func(request, *args, **kwargs) if etag_func else None
+            res_etag = quote_etag(res_etag) if res_etag is not None else None
             res_last_modified = get_last_modified()
 
             response = get_conditional_response(
@@ -99,7 +101,7 @@ def condition(etag_func=None, last_modified_func=None):
             if res_last_modified and not response.has_header('Last-Modified'):
                 response['Last-Modified'] = http_date(res_last_modified)
             if res_etag and not response.has_header('ETag'):
-                response['ETag'] = quote_etag(res_etag)
+                response['ETag'] = res_etag
 
             return response
 

+ 3 - 0
docs/releases/1.11.txt

@@ -491,6 +491,9 @@ Miscellaneous
 * The admin's widget for ``IntegerField`` uses ``type="number"`` rather than
   ``type="text"``.
 
+* ETags are now parsed according to the :rfc:`7232` Conditional Requests
+  specification rather than the syntax from :rfc:`2616`.
+
 .. _deprecated-features-1.11:
 
 Features deprecated in 1.11

+ 8 - 0
docs/topics/conditional-view-processing.txt

@@ -66,6 +66,14 @@ last time the resource was modified, or ``None`` if the resource doesn't
 exist. The function passed to the ``etag`` decorator should return a string
 representing the `ETag`_ for the resource, or ``None`` if it doesn't exist.
 
+.. versionchanged:: 1.11
+
+    In older versions, the return value from ``etag_func()`` was interpreted as
+    the unquoted part of the ETag. That prevented the use of weak ETags, which
+    have the format ``W/"<string>"``. The return value is now expected to be
+    an ETag as defined by the specification (including the quotes), although
+    the unquoted format is also accepted for backwards compatibility.
+
 Using this feature usefully is probably best explained with an example.
 Suppose you have this pair of models, representing a simple blog system::
 

+ 49 - 22
tests/conditional_processing/tests.py

@@ -11,8 +11,8 @@ LAST_MODIFIED_STR = 'Sun, 21 Oct 2007 23:21:47 GMT'
 LAST_MODIFIED_NEWER_STR = 'Mon, 18 Oct 2010 16:56:23 GMT'
 LAST_MODIFIED_INVALID_STR = 'Mon, 32 Oct 2010 16:56:23 GMT'
 EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT'
-ETAG = 'b4246ffc4f62314ca13147c9d4f76974'
-EXPIRED_ETAG = '7fae4cd4b0f81e7d2914700043aa8ed6'
+ETAG = '"b4246ffc4f62314ca13147c9d4f76974"'
+EXPIRED_ETAG = '"7fae4cd4b0f81e7d2914700043aa8ed6"'
 
 
 @override_settings(ROOT_URLCONF='conditional_processing.urls')
@@ -24,7 +24,7 @@ class ConditionalGet(SimpleTestCase):
         if check_last_modified:
             self.assertEqual(response['Last-Modified'], LAST_MODIFIED_STR)
         if check_etag:
-            self.assertEqual(response['ETag'], '"%s"' % ETAG)
+            self.assertEqual(response['ETag'], ETAG)
 
     def assertNotModified(self, response):
         self.assertEqual(response.status_code, 304)
@@ -63,66 +63,66 @@ class ConditionalGet(SimpleTestCase):
         self.assertEqual(response.status_code, 412)
 
     def test_if_none_match(self):
-        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
         response = self.client.get('/condition/')
         self.assertNotModified(response)
-        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
         response = self.client.get('/condition/')
         self.assertFullResponse(response)
 
         # Several etags in If-None-Match is a bit exotic but why not?
-        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s", "%s"' % (ETAG, EXPIRED_ETAG)
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = '%s, %s' % (ETAG, EXPIRED_ETAG)
         response = self.client.get('/condition/')
         self.assertNotModified(response)
 
     def test_if_match(self):
-        self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % ETAG
+        self.client.defaults['HTTP_IF_MATCH'] = ETAG
         response = self.client.put('/condition/etag/')
         self.assertEqual(response.status_code, 200)
-        self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % EXPIRED_ETAG
+        self.client.defaults['HTTP_IF_MATCH'] = EXPIRED_ETAG
         response = self.client.put('/condition/etag/')
         self.assertEqual(response.status_code, 412)
 
     def test_both_headers(self):
         # see http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.4
         self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
-        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
         response = self.client.get('/condition/')
         self.assertNotModified(response)
 
         self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
-        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
         response = self.client.get('/condition/')
         self.assertFullResponse(response)
 
         self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
-        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
         response = self.client.get('/condition/')
         self.assertFullResponse(response)
 
         self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
-        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
         response = self.client.get('/condition/')
         self.assertFullResponse(response)
 
     def test_both_headers_2(self):
         self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR
-        self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % ETAG
+        self.client.defaults['HTTP_IF_MATCH'] = ETAG
         response = self.client.get('/condition/')
         self.assertFullResponse(response)
 
         self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
-        self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % EXPIRED_ETAG
+        self.client.defaults['HTTP_IF_MATCH'] = ETAG
         response = self.client.get('/condition/')
         self.assertEqual(response.status_code, 412)
 
-        self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR
-        self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % EXPIRED_ETAG
+        self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
+        self.client.defaults['HTTP_IF_MATCH'] = EXPIRED_ETAG
         response = self.client.get('/condition/')
         self.assertEqual(response.status_code, 412)
 
-        self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
-        self.client.defaults['HTTP_IF_MATCH'] = '"%s"' % ETAG
+        self.client.defaults['HTTP_IF_UNMODIFIED_SINCE'] = LAST_MODIFIED_STR
+        self.client.defaults['HTTP_IF_MATCH'] = EXPIRED_ETAG
         response = self.client.get('/condition/')
         self.assertEqual(response.status_code, 412)
 
@@ -134,7 +134,7 @@ class ConditionalGet(SimpleTestCase):
         self.assertFullResponse(response, check_last_modified=False)
 
     def test_single_condition_2(self):
-        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
         response = self.client.get('/condition/etag/')
         self.assertNotModified(response)
         response = self.client.get('/condition/last_modified/')
@@ -146,7 +146,7 @@ class ConditionalGet(SimpleTestCase):
         self.assertFullResponse(response, check_etag=False)
 
     def test_single_condition_4(self):
-        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
         response = self.client.get('/condition/etag/')
         self.assertFullResponse(response, check_last_modified=False)
 
@@ -158,7 +158,7 @@ class ConditionalGet(SimpleTestCase):
         self.assertFullResponse(response, check_last_modified=False)
 
     def test_single_condition_6(self):
-        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
         response = self.client.get('/condition/etag2/')
         self.assertNotModified(response)
         response = self.client.get('/condition/last_modified2/')
@@ -188,7 +188,34 @@ class ConditionalGet(SimpleTestCase):
         response = self.client.head('/condition/')
         self.assertNotModified(response)
 
+    def test_unquoted(self):
+        """
+        The same quoted ETag should be set on the header regardless of whether
+        etag_func() in condition() returns a quoted or an unquoted ETag.
+        """
+        response_quoted = self.client.get('/condition/etag/')
+        response_unquoted = self.client.get('/condition/unquoted_etag/')
+        self.assertEqual(response_quoted['ETag'], response_unquoted['ETag'])
+
+    # It's possible that the matching algorithm could use the wrong value even
+    # if the ETag header is set correctly correctly (as tested by
+    # test_unquoted()), so check that the unquoted value is matched.
+    def test_unquoted_if_none_match(self):
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = ETAG
+        response = self.client.get('/condition/unquoted_etag/')
+        self.assertNotModified(response)
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = EXPIRED_ETAG
+        response = self.client.get('/condition/unquoted_etag/')
+        self.assertFullResponse(response, check_last_modified=False)
+
+    def test_all_if_none_match(self):
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = '*'
+        response = self.client.get('/condition/etag/')
+        self.assertNotModified(response)
+        response = self.client.get('/condition/no_etag/')
+        self.assertFullResponse(response, check_last_modified=False, check_etag=False)
+
     def test_invalid_etag(self):
-        self.client.defaults['HTTP_IF_NONE_MATCH'] = r'"\"'
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"""'
         response = self.client.get('/condition/etag/')
         self.assertFullResponse(response, check_last_modified=False)

+ 2 - 0
tests/conditional_processing/urls.py

@@ -8,4 +8,6 @@ urlpatterns = [
     url('^condition/last_modified2/$', views.last_modified_view2),
     url('^condition/etag/$', views.etag_view1),
     url('^condition/etag2/$', views.etag_view2),
+    url('^condition/unquoted_etag/$', views.etag_view_unquoted),
+    url('^condition/no_etag/$', views.etag_view_none),
 ]

+ 16 - 0
tests/conditional_processing/views.py

@@ -27,3 +27,19 @@ etag_view1 = condition(etag_func=lambda r: ETAG)(etag_view1)
 def etag_view2(request):
     return HttpResponse(FULL_RESPONSE)
 etag_view2 = etag(lambda r: ETAG)(etag_view2)
+
+
+@condition(etag_func=lambda r: ETAG.strip('"'))
+def etag_view_unquoted(request):
+    """
+    Use an etag_func() that returns an unquoted ETag.
+    """
+    return HttpResponse(FULL_RESPONSE)
+
+
+@condition(etag_func=lambda r: None)
+def etag_view_none(request):
+    """
+    Use an etag_func() that returns None, as opposed to setting etag_func=None.
+    """
+    return HttpResponse(FULL_RESPONSE)

+ 0 - 5
tests/middleware/tests.py

@@ -513,11 +513,6 @@ class ConditionalGetMiddlewareTest(SimpleTestCase):
         self.assertEqual(self.resp.status_code, 200)
 
     def test_if_none_match_and_same_etag(self):
-        self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = 'spam'
-        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
-        self.assertEqual(self.resp.status_code, 304)
-
-    def test_if_none_match_and_same_etag_with_quotes(self):
         self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = '"spam"'
         self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
         self.assertEqual(self.resp.status_code, 304)

+ 11 - 6
tests/utils_tests/test_http.py

@@ -208,14 +208,19 @@ class TestUtilsHttp(unittest.TestCase):
 
 class ETagProcessingTests(unittest.TestCase):
     def test_parsing(self):
-        etags = http.parse_etags(r'"", "etag", "e\"t\"ag", "e\\tag", W/"weak"')
-        self.assertEqual(etags, ['', 'etag', 'e"t"ag', r'e\tag', 'weak'])
+        self.assertEqual(
+            http.parse_etags(r'"" ,  "etag", "e\\tag", W/"weak"'),
+            ['""', '"etag"', r'"e\\tag"', 'W/"weak"']
+        )
+        self.assertEqual(http.parse_etags('*'), ['*'])
+
+        # Ignore RFC 2616 ETags that are invalid according to RFC 7232.
+        self.assertEqual(http.parse_etags(r'"etag", "e\"t\"ag"'), ['"etag"'])
 
     def test_quoting(self):
-        original_etag = r'e\t"ag'
-        quoted_etag = http.quote_etag(original_etag)
-        self.assertEqual(quoted_etag, r'"e\\t\"ag"')
-        self.assertEqual(http.unquote_etag(quoted_etag), original_etag)
+        self.assertEqual(http.quote_etag('etag'), '"etag"')  # unquoted
+        self.assertEqual(http.quote_etag('"etag"'), '"etag"')  # quoted
+        self.assertEqual(http.quote_etag('W/"etag"'), 'W/"etag"')  # quoted, weak
 
 
 class HttpDateProcessingTests(unittest.TestCase):