Browse Source

Fixed #5791 -- Added early-bailout support for views (ETags and Last-modified).

This provides support for views that can have their ETag and/or Last-modified
values computed much more quickly than the view itself. Supports all HTTP
verbs (not just GET).

Documentation and tests need a little more fleshing out (I'm not happy with the
documentation at the moment, since it's a bit backwards), but the functionality
is correct.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@10114 bcc190cf-cafb-0310-a4f2-bffc1f526a37
Malcolm Tredinnick 16 years ago
parent
commit
b203db6ec8

+ 23 - 0
django/utils/http.py

@@ -1,9 +1,12 @@
+import re
 import urllib
 from email.Utils import formatdate
 
 from django.utils.encoding import smart_str, force_unicode
 from django.utils.functional import allow_lazy
 
+ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"')
+
 def urlquote(url, safe='/'):
     """
     A version of Python's urllib.quote() function that can operate on unicode
@@ -94,3 +97,23 @@ def int_to_base36(i):
         i = i % j
         factor -= 1
     return ''.join(base36)
+
+def parse_etags(etag_str):
+    """
+    Parses a string with one or several etags passed in If-None-Match and
+    If-Match headers by the rules in RFC 2616. Returns a list of etags
+    without surrounding double quotes (") and unescaped from \<CHAR>.
+    """
+    etags = ETAG_MATCH.findall(etag_str)
+    if not etags:
+        # etag_str has wrong format, treat it as an opaque string then
+        return [etag_str]
+    etags = [e.decode('string_escape') for e in etags]
+    return etags
+
+def quote_etag(etag):
+    """
+    Wraps a string in double quotes escaping contents as necesary.
+    """
+    return '"%s"' % etag.replace('\\', '\\\\').replace('"', '\\"')
+

+ 98 - 2
django/views/decorators/http.py

@@ -7,9 +7,15 @@ try:
 except ImportError:
     from django.utils.functional import wraps  # Python 2.3, 2.4 fallback.
 
+from calendar import timegm
+from datetime import timedelta
+from email.Utils import formatdate
+
 from django.utils.decorators import decorator_from_middleware
+from django.utils.http import parse_etags, quote_etag
 from django.middleware.http import ConditionalGetMiddleware
-from django.http import HttpResponseNotAllowed
+from django.http import HttpResponseNotAllowed, HttpResponseNotModified, HttpResponse
+
 
 conditional_page = decorator_from_middleware(ConditionalGetMiddleware)
 
@@ -36,4 +42,94 @@ require_GET = require_http_methods(["GET"])
 require_GET.__doc__ = "Decorator to require that a view only accept the GET method."
 
 require_POST = require_http_methods(["POST"])
-require_POST.__doc__ = "Decorator to require that a view only accept the POST method."
+require_POST.__doc__ = "Decorator to require that a view only accept the POST method."
+
+def condition(etag_func=None, last_modified_func=None):
+    """
+    Decorator to support conditional retrieval (or change) for a view
+    function.
+
+    The parameters are callables to compute the ETag and last modified time for
+    the requested resource, respectively. The callables are passed the same
+    parameters as the view itself. The Etag function should return a string (or
+    None if the resource doesn't exist), whilst the last_modified function
+    should return a datetime object (or None if the resource doesn't exist).
+
+    If both parameters are provided, all the preconditions must be met before
+    the view is processed.
+
+    This decorator will either pass control to the wrapped view function or
+    return an HTTP 304 response (unmodified) or 412 response (preconditions
+    failed), depending upon the request method.
+
+    Any behavior marked as "undefined" in the HTTP spec (e.g. If-none-match
+    plus If-modified-since headers) will result in the view function being
+    called.
+    """
+    def decorator(func):
+        def inner(request, *args, **kwargs):
+            # Get HTTP request headers
+            if_modified_since = request.META.get("HTTP_IF_MODIFIED_SINCE")
+            if_none_match = request.META.get("HTTP_IF_NONE_MATCH")
+            if_match = request.META.get("HTTP_IF_MATCH")
+            if if_none_match or if_match:
+                # There can be more than one ETag in the request, so we
+                # consider the list of values.
+                etags = parse_etags(if_none_match)
+
+            # Compute values (if any) for the requested resource.
+            if etag_func:
+                res_etag = etag_func(request, *args, **kwargs)
+            else:
+                res_etag = None
+            if last_modified_func:
+                dt = last_modified_func(request, *args, **kwargs)
+                if dt:
+                    res_last_modified = formatdate(timegm(dt.utctimetuple()))[:26] + 'GMT'
+                else:
+                    res_last_modified = None
+            else:
+                res_last_modified = None
+
+            response = None
+            if not ((if_match and (if_modified_since or if_none_match)) or
+                    (if_match and if_none_match)):
+                # We only get here if no undefined combinations of headers are
+                # specified.
+                if ((if_none_match and (res_etag in etags or
+                        "*" in etags and res_etag)) and
+                        (not if_modified_since or
+                            res_last_modified == if_modified_since)):
+                    if request.method in ("GET", "HEAD"):
+                        response = HttpResponseNotModified()
+                    else:
+                        response = HttpResponse(status=412)
+                elif if_match and ((not res_etag and "*" in etags) or
+                        (res_etag and res_etag not in etags)):
+                    response = HttpResponse(status=412)
+                elif (not if_none_match and if_modified_since and
+                        request.method == "GET" and
+                        res_last_modified == if_modified_since):
+                    response = HttpResponseNotModified()
+
+            if response is None:
+                response = func(request, *args, **kwargs)
+
+            # Set relevant headers on the response if they don't already exist.
+            if res_last_modified and not response.has_header('Last-Modified'):
+                response['Last-Modified'] = res_last_modified
+            if res_etag and not response.has_header('ETag'):
+                response['ETag'] = quote_etag(res_etag)
+
+            return response
+
+        return inner
+    return decorator
+
+# Shortcut decorators for common cases based on ETag or Last-Modified only
+def etag(callable):
+    return condition(etag=callable)
+
+def last_modified(callable):
+    return condition(last_modified=callable)
+

+ 1 - 0
docs/index.txt

@@ -81,6 +81,7 @@ Other batteries included
     * :ref:`Admin site <ref-contrib-admin>`
     * :ref:`Authentication <topics-auth>`
     * :ref:`Cache system <topics-cache>`
+    * :ref:`Conditional content processing <topics-conditional-processing>`
     * :ref:`Comments <ref-contrib-comments-index>`
     * :ref:`Content types <ref-contrib-contenttypes>`
     * :ref:`Cross Site Request Forgery protection <ref-contrib-csrf>`

+ 134 - 0
docs/topics/conditional-view-processing.txt

@@ -0,0 +1,134 @@
+.. _topics-conditional-processing:
+
+===========================
+Conditional View Processing
+===========================
+
+.. versionadded:: 1.1
+
+HTTP clients can send a number of headers to tell the server about copies of a
+resource that they have already seen. This is commonly used when retrieving a
+web page (using an HTTP ``GET`` request) to avoid sending all the data for
+something the client has already retrieved. However, the same headers can be
+used for all HTTP methods (``POST``, ``PUT``, ``DELETE``, etc).
+
+For each page (response) that Django sends back from a view, it might provide
+two HTTP headers: the ``ETag`` header and the ``Last-Modified`` header. These
+headers are optional on HTTP responses. They can be set by your view function,
+or you can rely on the :class:`~django.middleware.common.CommonMiddleware`
+middleware to set the ``ETag`` header.
+
+When the client next requests the same resource, it might send along a header
+such as `If-modified-since`_, containing the date of the last modification
+time it was sent, or `If-none-match`_, containing the ``ETag`` it was sent.
+If there is no match with the ETag, or if the resource has not been modified,
+a 304 status code can be sent back, instead of a full response, telling the
+client that nothing has changed.
+
+.. _If-none-match: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.26
+.. _If-modified-since: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.25
+
+Django allows simple usage of this feature with
+:class:`django.middleware.http.ConditionalGetMiddleware` and
+:class:`~django.middleware.common.CommonMiddleware`. However, whilst being
+easy to use and suitable for many situations, they both have limitations for
+advanced usage:
+
+    * They are applied globally to all views in your project
+    * They don't save you from generating the response itself, which may be
+      expensive
+    * They are only appropriate for HTTP ``GET`` requests.
+
+.. conditional-decorators:
+
+Decorators
+==========
+
+When you need more fine-grained control you may use per-view conditional
+processing functions. 
+
+The decorators ``django.views.decorators.http.etag`` and
+``django.views.decorators.http.last_modified`` each accept a user-defined
+function that takes the same parameters as the view itself. The function
+passed ``last_modified`` should return a standard datetime value specifying
+the last time the resource was modified, or ``None`` if the resource doesn't
+exist. The function passed to the ``etag`` decorator should return a string
+representing the `Etag`_ for the resource, or ``None`` if it doesn't exist.
+
+.. _ETag: http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.11
+
+For example::
+
+    # Compute the last-modified time from when the object was last saved.
+    @last_modified(lambda r, obj_id: MyObject.objects.get(pk=obj_id).update_time)
+    def my_object_view(request, obj_id):
+        # Expensive generation of response with MyObject instance
+        ...
+
+Of course, you can always use the non-decorator form if you're using Python
+2.3 or don't like the decorator syntax::
+
+    def my_object_view(request, obj_id):
+        ...
+    my_object_view = last_modified(my_func)(my_object_view)
+
+Using the ``etag`` decorator is similar.
+
+In practice, though, you won't know if the client is going to send the
+``Last-modified`` or the ``If-none-match`` header. If you can quickly compute
+both values and want to short-circuit as often as possible, you'll need to use
+the ``conditional`` decorator described below.
+
+HTTP allows to use both "ETag" and "Last-Modified" headers in your response.
+Then a response is considered not modified only if the client sends both
+headers back and they're both equal to the response headers. This means that
+you can't just chain decorators on your view::
+
+    # Bad code. Don't do this!
+    @etag(etag_func)
+    @last_modified(last_modified_func)
+    def my_view(request):
+        # ...
+
+    # End of bad code.
+
+The first decorator doesn't know anything about the second and might
+answer that the response is not modified even if the second decorators would
+determine otherwise. In this case you should use a more general decorator -
+``django.views.decorator.http.condition`` that accepts two functions at once::
+
+    # The correct way to implement the above example
+    @condition(etag_func, last_modified_func)
+    def my_view(request):
+        # ...
+
+Using the decorators with other HTTP methods
+============================================
+
+The ``conditional`` decorator is useful for more than only ``GET`` and
+``HEAD`` requests (``HEAD`` requests are the same as ``GET`` in this
+situation). It can be used also to be used to provide checking for ``POST``,
+``PUT`` and ``DELETE`` requests. In these situations, the idea isn't to return
+a "not modified" response, but to tell the client that the resource they are
+trying to change has been altered in the meantime.
+
+For example, consider the following exchange between the client and server:
+
+    1. Client requests ``/foo/``.
+    2. Server responds with some content with an ETag of ``"abcd1234"``.
+    3. Client sends and HTTP ``PUT`` request to ``/foo/`` to update the
+       resource. It sends an ``If-Match: "abcd1234"`` header to specify the
+       version it is trying to update.
+    4. Server checks to see if the resource has changed, by computing the ETag
+       the same way it does for a ``GET`` request (using the same function).
+       If the resource *has* changed, it will return a 412 status code code,
+       meaning "precondition failed".
+    5. Client sends a ``GET`` request to ``/foo/``, after receiving a 412
+       response, to retrieve an updated version of the content before updating
+       it.
+
+The important thing this example shows is that the same functions can be used
+to compute the ETag and last modification values in all situations. In fact,
+you *should* use the same functions, so that the same values are returned
+every time.
+

+ 1 - 0
docs/topics/index.txt

@@ -18,6 +18,7 @@ Introductions to all the key parts of Django you'll need to know:
    testing
    auth
    cache
+   conditional-view-processing
    email
    i18n
    pagination

+ 1 - 0
tests/regressiontests/conditional_processing/__init__.py

@@ -0,0 +1 @@
+# -*- coding:utf-8 -*-

+ 100 - 0
tests/regressiontests/conditional_processing/models.py

@@ -0,0 +1,100 @@
+# -*- coding:utf-8 -*-
+from datetime import datetime, timedelta
+from calendar import timegm
+
+from django.test import TestCase
+from django.utils.http import parse_etags, quote_etag
+
+FULL_RESPONSE = 'Test conditional get response'
+LAST_MODIFIED = datetime(2007, 10, 21, 23, 21, 47)
+LAST_MODIFIED_STR = 'Sun, 21 Oct 2007 23:21:47 GMT'
+EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT'
+ETAG = 'b4246ffc4f62314ca13147c9d4f76974'
+EXPIRED_ETAG = '7fae4cd4b0f81e7d2914700043aa8ed6'
+
+class ConditionalGet(TestCase):
+    def assertFullResponse(self, response, check_last_modified=True, check_etag=True):
+        self.assertEquals(response.status_code, 200)
+        self.assertEquals(response.content, FULL_RESPONSE)
+        if check_last_modified:
+            self.assertEquals(response['Last-Modified'], LAST_MODIFIED_STR)
+        if check_etag:
+            self.assertEquals(response['ETag'], '"%s"' % ETAG)
+
+    def assertNotModified(self, response):
+        self.assertEquals(response.status_code, 304)
+        self.assertEquals(response.content, '')
+
+    def testWithoutConditions(self):
+        response = self.client.get('/condition/')
+        self.assertFullResponse(response)
+
+    def testIfModifiedSince(self):
+        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
+        response = self.client.get('/condition/')
+        self.assertNotModified(response)
+        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
+        response = self.client.get('/condition/')
+        self.assertFullResponse(response)
+
+    def testIfNoneMatch(self):
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
+        response = self.client.get('/condition/')
+        self.assertNotModified(response)
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG
+        response = self.client.get('/condition/')
+        self.assertFullResponse(response)
+
+        # Several etags in If-None-Match is a bit exotic but why not?
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s", "%s"' % (ETAG, EXPIRED_ETAG)
+        response = self.client.get('/condition/')
+        self.assertNotModified(response)
+
+    def testBothHeaders(self):
+        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
+        response = self.client.get('/condition/')
+        self.assertNotModified(response)
+
+        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
+        response = self.client.get('/condition/')
+        self.assertFullResponse(response)
+
+        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG
+        response = self.client.get('/condition/')
+        self.assertFullResponse(response)
+
+    def testSingleCondition1(self):
+        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
+        response = self.client.get('/condition/last_modified/')
+        self.assertNotModified(response)
+        response = self.client.get('/condition/etag/')
+        self.assertFullResponse(response, check_last_modified=False)
+
+    def testSingleCondition2(self):
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % ETAG
+        response = self.client.get('/condition/etag/')
+        self.assertNotModified(response)
+        response = self.client.get('/condition/last_modified/')
+        self.assertFullResponse(response, check_etag=False)
+
+    def testSingleCondition3(self):
+        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
+        response = self.client.get('/condition/last_modified/')
+        self.assertFullResponse(response, check_etag=False)
+
+    def testSingleCondition4(self):
+        self.client.defaults['HTTP_IF_NONE_MATCH'] = '"%s"' % EXPIRED_ETAG
+        response = self.client.get('/condition/etag/')
+        self.assertFullResponse(response, check_last_modified=False)
+
+class ETagProcesing(TestCase):
+    def testParsing(self):
+        etags = parse_etags(r'"", "etag", "e\"t\"ag", "e\\tag", W/"weak"')
+        self.assertEquals(etags, ['', 'etag', 'e"t"ag', r'e\tag', 'weak'])
+
+    def testQuoting(self):
+        quoted_etag = quote_etag(r'e\t"ag')
+        self.assertEquals(quoted_etag, r'"e\\t\"ag"')

+ 8 - 0
tests/regressiontests/conditional_processing/urls.py

@@ -0,0 +1,8 @@
+from django.conf.urls.defaults import *
+import views
+
+urlpatterns = patterns('',
+    ('^$', views.index),
+    ('^last_modified/$', views.last_modified),
+    ('^etag/$', views.etag),
+)

+ 17 - 0
tests/regressiontests/conditional_processing/views.py

@@ -0,0 +1,17 @@
+# -*- coding:utf-8 -*-
+from django.views.decorators.http import condition
+from django.http import HttpResponse
+
+from models import FULL_RESPONSE, LAST_MODIFIED, ETAG
+
+@condition(lambda r: ETAG, lambda r: LAST_MODIFIED)
+def index(request):
+    return HttpResponse(FULL_RESPONSE)
+
+@condition(last_modified_func=lambda r: LAST_MODIFIED)
+def last_modified(request):
+    return HttpResponse(FULL_RESPONSE)
+
+@condition(etag_func=lambda r: ETAG)
+def etag(request):
+    return HttpResponse(FULL_RESPONSE)

+ 5 - 2
tests/urls.py

@@ -20,11 +20,11 @@ urlpatterns = patterns('',
 
     # test urlconf for middleware tests
     (r'^middleware/', include('regressiontests.middleware.urls')),
-    
+
     # admin view tests
     (r'^test_admin/', include('regressiontests.admin_views.urls')),
     (r'^generic_inline_admin/', include('regressiontests.generic_inline_admin.urls')),
-    
+
     # admin widget tests
     (r'widget_admin/', include('regressiontests.admin_widgets.urls')),
 
@@ -32,4 +32,7 @@ urlpatterns = patterns('',
 
     # test urlconf for syndication tests
     (r'^syndication/', include('regressiontests.syndication.urls')),
+
+    # conditional get views
+    (r'condition/', include('regressiontests.conditional_processing.urls')),
 )