Browse Source

Fixed #10190 -- Made HttpResponse charset customizable.

Thanks to Simon Charette, Aymeric Augustin, and Tim Graham
for reviews and contributions.
Unai Zalakain 11 years ago
parent
commit
5f2542f12a

+ 26 - 7
django/http/response.py

@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 import datetime
 import json
+import re
 import sys
 import time
 from email.header import Header
@@ -83,6 +84,9 @@ REASON_PHRASES = {
 }
 
 
+_charset_from_content_type_re = re.compile(r';\s*charset=(?P<charset>[^\s;]+)', re.I)
+
+
 class BadHeaderError(ValueError):
     pass
 
@@ -98,19 +102,15 @@ class HttpResponseBase(six.Iterator):
     status_code = 200
     reason_phrase = None        # Use default reason phrase for status code.
 
-    def __init__(self, content_type=None, status=None, reason=None):
+    def __init__(self, content_type=None, status=None, reason=None, charset=None):
         # _headers is a mapping of the lower-case name to the original case of
         # the header (required for working with legacy systems) and the header
         # value. Both the name of the header and its value are ASCII strings.
         self._headers = {}
-        self._charset = settings.DEFAULT_CHARSET
         self._closable_objects = []
         # This parameter is set by the handler. It's necessary to preserve the
         # historical behavior of request_finished.
         self._handler_class = None
-        if not content_type:
-            content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE,
-                    self._charset)
         self.cookies = SimpleCookie()
         if status is not None:
             self.status_code = status
@@ -119,8 +119,27 @@ class HttpResponseBase(six.Iterator):
         elif self.reason_phrase is None:
             self.reason_phrase = REASON_PHRASES.get(self.status_code,
                                                     'UNKNOWN STATUS CODE')
+        self._charset = charset
+        if content_type is None:
+            content_type = '%s; charset=%s' % (settings.DEFAULT_CONTENT_TYPE,
+                                               self.charset)
         self['Content-Type'] = content_type
 
+    @property
+    def charset(self):
+        if self._charset is not None:
+            return self._charset
+        content_type = self.get('Content-Type', '')
+        matched = _charset_from_content_type_re.search(content_type)
+        if matched:
+            # Extract the charset and strip its double quotes
+            return matched.group('charset').replace('"', '')
+        return settings.DEFAULT_CHARSET
+
+    @charset.setter
+    def charset(self, value):
+        self._charset = value
+
     def serialize_headers(self):
         """HTTP headers as a bytestring."""
         def to_bytes(val, encoding):
@@ -278,10 +297,10 @@ class HttpResponseBase(six.Iterator):
         if isinstance(value, bytes):
             return bytes(value)
         if isinstance(value, six.text_type):
-            return bytes(value.encode(self._charset))
+            return bytes(value.encode(self.charset))
 
         # Handle non-string types (#16494)
-        return force_bytes(value, self._charset)
+        return force_bytes(value, self.charset)
 
     # These methods partially implement the file-like object interface.
     # See http://docs.python.org/lib/bltin-file-objects.html

+ 5 - 4
django/template/response.py

@@ -10,7 +10,8 @@ class ContentNotRenderedError(Exception):
 class SimpleTemplateResponse(HttpResponse):
     rendering_attrs = ['template_name', 'context_data', '_post_render_callbacks']
 
-    def __init__(self, template, context=None, content_type=None, status=None):
+    def __init__(self, template, context=None, content_type=None, status=None,
+                 charset=None):
         # It would seem obvious to call these next two members 'template' and
         # 'context', but those names are reserved as part of the test Client
         # API. To avoid the name collision, we use tricky-to-debug problems
@@ -22,7 +23,7 @@ class SimpleTemplateResponse(HttpResponse):
         # content argument doesn't make sense here because it will be replaced
         # with rendered template so we always pass empty string in order to
         # prevent errors and provide shorter signature.
-        super(SimpleTemplateResponse, self).__init__('', content_type, status)
+        super(SimpleTemplateResponse, self).__init__('', content_type, status, charset)
 
         # _is_rendered tracks whether the template and context has been baked
         # into a final response.
@@ -136,7 +137,7 @@ class TemplateResponse(SimpleTemplateResponse):
     rendering_attrs = SimpleTemplateResponse.rendering_attrs + ['_request', '_current_app']
 
     def __init__(self, request, template, context=None, content_type=None,
-            status=None, current_app=None):
+            status=None, current_app=None, charset=None):
         # self.request gets over-written by django.test.client.Client - and
         # unlike context_data and template_name the _request should not
         # be considered part of the public API.
@@ -145,7 +146,7 @@ class TemplateResponse(SimpleTemplateResponse):
         # having to avoid needing to create the RequestContext directly
         self._current_app = current_app
         super(TemplateResponse, self).__init__(
-            template, context, content_type, status)
+            template, context, content_type, status, charset)
 
     def resolve_context(self, context):
         """Convert context data into a full RequestContext object

+ 2 - 2
django/test/testcases.py

@@ -327,8 +327,8 @@ class SimpleTestCase(unittest.TestCase):
         else:
             content = response.content
         if not isinstance(text, bytes) or html:
-            text = force_text(text, encoding=response._charset)
-            content = content.decode(response._charset)
+            text = force_text(text, encoding=response.charset)
+            content = content.decode(response.charset)
             text_repr = "'%s'" % text
         else:
             text_repr = repr(text)

+ 18 - 1
docs/ref/request-response.txt

@@ -627,6 +627,15 @@ Attributes
     A bytestring representing the content, encoded from a Unicode
     object if necessary.
 
+.. attribute:: HttpResponse.charset
+
+    .. versionadded:: 1.8
+
+    A string denoting the charset in which the response will be encoded. If not
+    given at ``HttpResponse`` instantiation time, it will be extracted from
+    ``content_type`` and if that is unsuccessful, the
+    :setting:`DEFAULT_CHARSET` setting will be used.
+
 .. attribute:: HttpResponse.status_code
 
     The `HTTP status code`_ for the response.
@@ -645,7 +654,7 @@ Attributes
 Methods
 -------
 
-.. method:: HttpResponse.__init__(content='', content_type=None, status=200, reason=None)
+.. method:: HttpResponse.__init__(content='', content_type=None, status=200, reason=None, charset=None)
 
     Instantiates an ``HttpResponse`` object with the given page content and
     content type.
@@ -666,6 +675,14 @@ Methods
     ``reason`` is the HTTP response phrase. If not provided, a default phrase
     will be used.
 
+    ``charset`` is the charset in which the response will be encoded. If not
+    given it will be extracted from ``content_type``, and if that
+    is unsuccessful, the :setting:`DEFAULT_CHARSET` setting will be used.
+
+    .. versionadded:: 1.8
+
+        The ``charset`` parameter was added.
+
 .. method:: HttpResponse.__setitem__(header, value)
 
     Sets the given header name to the given value. Both ``header`` and

+ 19 - 2
docs/ref/template-response.txt

@@ -56,7 +56,7 @@ Attributes
 Methods
 -------
 
-.. method:: SimpleTemplateResponse.__init__(template, context=None, content_type=None, status=None)
+.. method:: SimpleTemplateResponse.__init__(template, context=None, content_type=None, status=None, charset=None)
 
     Instantiates a
     :class:`~django.template.response.SimpleTemplateResponse` object
@@ -80,6 +80,15 @@ Methods
         ``content_type`` is specified, then its value is used. Otherwise,
         :setting:`DEFAULT_CONTENT_TYPE` is used.
 
+    ``charset``
+        The charset in which the response will be encoded. If not given it will
+        be extracted from ``content_type``, and if that is unsuccessful, the
+        :setting:`DEFAULT_CHARSET` setting will be used.
+
+    .. versionadded:: 1.8
+
+        The ``charset`` parameter was added.
+
 .. method:: SimpleTemplateResponse.resolve_context(context)
 
     Converts context data into a context instance that can be used for
@@ -140,7 +149,7 @@ TemplateResponse objects
 Methods
 -------
 
-.. method:: TemplateResponse.__init__(request, template, context=None, content_type=None, status=None, current_app=None)
+.. method:: TemplateResponse.__init__(request, template, context=None, content_type=None, status=None, current_app=None, charset=None)
 
     Instantiates an ``TemplateResponse`` object with the given
     template, context, MIME type and HTTP status.
@@ -173,6 +182,14 @@ Methods
         :ref:`namespaced URL resolution strategy <topics-http-reversing-url-namespaces>`
         for more information.
 
+    ``charset``
+        The charset in which the response will be encoded. If not given it will
+        be extracted from ``content_type``, and if that is unsuccessful, the
+        :setting:`DEFAULT_CHARSET` setting will be used.
+
+    .. versionadded:: 1.8
+
+        The ``charset`` parameter was added.
 
 The rendering process
 =====================

+ 3 - 0
docs/releases/1.8.txt

@@ -278,6 +278,9 @@ Requests and Responses
   This brings this class into line with the documentation and with
   ``WSGIRequest``.
 
+* The :attr:`HttpResponse.charset <django.http.HttpResponse.charset>` attribute
+  was added.
+
 Tests
 ^^^^^
 

+ 52 - 2
tests/responses/tests.py

@@ -1,8 +1,16 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import unicode_literals
+
+from django.conf import settings
 from django.http import HttpResponse
-import unittest
+from django.test import SimpleTestCase
 
+UTF8 = 'utf-8'
+ISO88591 = 'iso-8859-1'
 
-class HttpResponseTests(unittest.TestCase):
+
+class HttpResponseTests(SimpleTestCase):
 
     def test_status_code(self):
         resp = HttpResponse(status=418)
@@ -14,3 +22,45 @@ class HttpResponseTests(unittest.TestCase):
         resp = HttpResponse(status=814, reason=reason)
         self.assertEqual(resp.status_code, 814)
         self.assertEqual(resp.reason_phrase, reason)
+
+    def test_charset_detection(self):
+        """ HttpResponse should parse charset from content_type."""
+        response = HttpResponse('ok')
+        self.assertEqual(response.charset, settings.DEFAULT_CHARSET)
+
+        response = HttpResponse(charset=ISO88591)
+        self.assertEqual(response.charset, ISO88591)
+        self.assertEqual(response['Content-Type'], 'text/html; charset=%s' % ISO88591)
+
+        response = HttpResponse(content_type='text/plain; charset=%s' % UTF8, charset=ISO88591)
+        self.assertEqual(response.charset, ISO88591)
+
+        response = HttpResponse(content_type='text/plain; charset=%s' % ISO88591)
+        self.assertEqual(response.charset, ISO88591)
+
+        response = HttpResponse(content_type='text/plain; charset="%s"' % ISO88591)
+        self.assertEqual(response.charset, ISO88591)
+
+        response = HttpResponse(content_type='text/plain; charset=')
+        self.assertEqual(response.charset, settings.DEFAULT_CHARSET)
+
+        response = HttpResponse(content_type='text/plain')
+        self.assertEqual(response.charset, settings.DEFAULT_CHARSET)
+
+    def test_response_content_charset(self):
+        """HttpResponse should encode based on charset."""
+        content = "Café :)"
+        utf8_content = content.encode(UTF8)
+        iso_content = content.encode(ISO88591)
+
+        response = HttpResponse(utf8_content)
+        self.assertContains(response, utf8_content)
+
+        response = HttpResponse(iso_content, content_type='text/plain; charset=%s' % ISO88591)
+        self.assertContains(response, iso_content)
+
+        response = HttpResponse(iso_content)
+        self.assertContains(response, iso_content)
+
+        response = HttpResponse(iso_content, content_type='text/plain')
+        self.assertContains(response, iso_content)