Prechádzať zdrojové kódy

Fixed #20197 -- Made XML serializer fail loudly when outputting unserializable chars

Thanks Tim Graham for the review.
Claude Paroz 9 rokov pred
rodič
commit
9368f51e12

+ 8 - 2
django/core/serializers/xml_serializer.py

@@ -14,7 +14,9 @@ from django.conf import settings
 from django.core.serializers import base
 from django.db import DEFAULT_DB_ALIAS, models
 from django.utils.encoding import smart_text
-from django.utils.xmlutils import SimplerXMLGenerator
+from django.utils.xmlutils import (
+    SimplerXMLGenerator, UnserializableContentError,
+)
 
 
 class Serializer(base.Serializer):
@@ -78,7 +80,11 @@ class Serializer(base.Serializer):
 
         # Get a "string version" of the object's data.
         if getattr(obj, field.name) is not None:
-            self.xml.characters(field.value_to_string(obj))
+            try:
+                self.xml.characters(field.value_to_string(obj))
+            except UnserializableContentError:
+                raise ValueError("%s.%s (pk:%s) contains unserializable characters" % (
+                    obj.__class__.__name__, field.name, obj._get_pk_val()))
         else:
             self.xml.addQuickElement("None")
 

+ 12 - 0
django/utils/xmlutils.py

@@ -2,9 +2,14 @@
 Utilities for XML generation/parsing.
 """
 
+import re
 from xml.sax.saxutils import XMLGenerator
 
 
+class UnserializableContentError(ValueError):
+    pass
+
+
 class SimplerXMLGenerator(XMLGenerator):
     def addQuickElement(self, name, contents=None, attrs=None):
         "Convenience method for adding an element with no children"
@@ -14,3 +19,10 @@ class SimplerXMLGenerator(XMLGenerator):
         if contents is not None:
             self.characters(contents)
         self.endElement(name)
+
+    def characters(self, content):
+        if content and re.search(r'[\x00-\x08\x0B-\x0C\x0E-\x1F]', content):
+            # Fail loudly when content has control chars (unsupported in XML 1.0)
+            # See http://www.w3.org/International/questions/qa-controls
+            raise UnserializableContentError("Control characters are not supported in XML 1.0")
+        XMLGenerator.characters(self, content)

+ 4 - 0
docs/releases/1.9.txt

@@ -720,6 +720,10 @@ Miscellaneous
 * Private function ``django.utils.functional.total_ordering()`` has been
   removed. It contained a workaround for a ``functools.total_ordering()`` bug
   in Python versions older than 2.7.3.
+* XML serialization (either through :djadmin:`dumpdata` or the syndication
+  framework) used to output any characters it received. Now if the content to
+  be serialized contains any control characters not allowed in the XML 1.0
+  standard, the serialization will fail with a :exc:`ValueError`.
 
 .. _deprecated-features-1.9:
 

+ 10 - 0
docs/topics/serialization.txt

@@ -213,6 +213,16 @@ the auth.User model has such a relation to the auth.Permission model::
 
 This example links the given user with the permission models with PKs 46 and 47.
 
+.. admonition:: Control characters
+
+    .. versionchanged:: 1.9
+
+    If the content to be serialized contains control characters that are not
+    accepted in the XML 1.0 standard, the serialization will fail with a
+    :exc:`ValueError` exception. Read also the W3C's explanation of `HTML,
+    XHTML, XML and Control Codes
+    <http://www.w3.org/International/questions/qa-controls>`_.
+
 .. _serialization-formats-json:
 
 JSON

+ 15 - 0
tests/serializers/tests.py

@@ -371,6 +371,21 @@ class XmlSerializerTestCase(SerializersTestBase, TestCase):
                 ret_list.append("".join(temp))
         return ret_list
 
+    def test_control_char_failure(self):
+        """
+        Serializing control characters with XML should fail as those characters
+        are not supported in the XML 1.0 standard (except HT, LF, CR).
+        """
+        self.a1.headline = "This contains \u0001 control \u0011 chars"
+        msg = "Article.headline (pk:%s) contains unserializable characters" % self.a1.pk
+        with self.assertRaisesMessage(ValueError, msg):
+            serializers.serialize(self.serializer_name, [self.a1])
+        self.a1.headline = "HT \u0009, LF \u000A, and CR \u000D are allowed"
+        self.assertIn(
+            "HT \t, LF \n, and CR \r are allowed",
+            serializers.serialize(self.serializer_name, [self.a1])
+        )
+
 
 class XmlSerializerTransactionTestCase(SerializersTransactionTestBase, TransactionTestCase):
     serializer_name = "xml"