瀏覽代碼

Fixed #27007 -- Handled non-UTF-8 bytes objects for text/* attachments.

The fallback logic which allows non-UTF-8 encoded files to be passed to
attach_file() even when a `text/*` mime type has been specified is
moved to attach(). Both functions now fall back to a content type of
`application/octet-stream`.

A side effect is that a file's content is decoded in memory instead of
opening it in text mode and reading it into a string.

Some mimetype-related logic in _create_attachment() has become
obsolete as the code moved from attach_file() to attach() already
handles this.
Michael Schwarz 8 年之前
父節點
當前提交
72d541b61c
共有 5 個文件被更改,包括 72 次插入28 次删除
  1. 1 0
      AUTHORS
  2. 29 28
      django/core/mail/message.py
  3. 4 0
      docs/releases/1.11.txt
  4. 13 0
      docs/topics/email.txt
  5. 25 0
      tests/mail/tests.py

+ 1 - 0
AUTHORS

@@ -519,6 +519,7 @@ answer newbie questions, and generally made Django that much better:
     michael.mcewan@gmail.com
     Michael Placentra II <someone@michaelplacentra2.net>
     Michael Radziej <mir@noris.de>
+    Michael Schwarz <michi.schwarz@gmail.com>
     Michael Thornhill <michael.thornhill@gmail.com>
     Michal Chruszcz <troll@pld-linux.org>
     michal@plovarna.cz

+ 29 - 28
django/core/mail/message.py

@@ -356,6 +356,11 @@ class EmailMessage(object):
 
         If the first parameter is a MIMEBase subclass it is inserted directly
         into the resulting message attachments.
+
+        For a text/* mimetype (guessed or specified), when a bytes object is
+        specified as content, it will be decoded as UTF-8. If that fails,
+        the mimetype will be set to DEFAULT_ATTACHMENT_MIME_TYPE and the
+        content is not decoded.
         """
         if isinstance(filename, MIMEBase):
             assert content is None
@@ -363,6 +368,22 @@ class EmailMessage(object):
             self.attachments.append(filename)
         else:
             assert content is not None
+
+            if not mimetype:
+                mimetype, _ = mimetypes.guess_type(filename)
+                if not mimetype:
+                    mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
+            basetype, subtype = mimetype.split('/', 1)
+
+            if basetype == 'text':
+                if isinstance(content, six.binary_type):
+                    try:
+                        content = content.decode('utf-8')
+                    except UnicodeDecodeError:
+                        # If mimetype suggests the file is text but it's actually
+                        # binary, read() will raise a UnicodeDecodeError on Python 3.
+                        mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
+
             self.attachments.append((filename, content, mimetype))
 
     def attach_file(self, path, mimetype=None):
@@ -370,33 +391,17 @@ class EmailMessage(object):
         Attaches a file from the filesystem.
 
         The mimetype will be set to the DEFAULT_ATTACHMENT_MIME_TYPE if it is
-        not specified and cannot be guessed or (PY3 only) if it suggests
-        text/* for a binary file.
+        not specified and cannot be guessed.
+
+        For a text/* mimetype (guessed or specified), the file's content
+        will be decoded as UTF-8. If that fails, the mimetype will be set to
+        DEFAULT_ATTACHMENT_MIME_TYPE and the content is not decoded.
         """
         filename = os.path.basename(path)
-        if not mimetype:
-            mimetype, _ = mimetypes.guess_type(filename)
-            if not mimetype:
-                mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
-        basetype, subtype = mimetype.split('/', 1)
-        read_mode = 'r' if basetype == 'text' else 'rb'
-        content = None
-
-        with open(path, read_mode) as f:
-            try:
-                content = f.read()
-            except UnicodeDecodeError:
-                # If mimetype suggests the file is text but it's actually
-                # binary, read() will raise a UnicodeDecodeError on Python 3.
-                pass
-
-        # If the previous read in text mode failed, try binary mode.
-        if content is None:
-            with open(path, 'rb') as f:
-                content = f.read()
-                mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
 
-        self.attach(filename, content, mimetype)
+        with open(path, 'rb') as file:
+            content = file.read()
+            self.attach(filename, content, mimetype)
 
     def _create_message(self, msg):
         return self._create_attachments(msg)
@@ -450,10 +455,6 @@ class EmailMessage(object):
         Converts the filename, content, mimetype triple into a MIME attachment
         object.
         """
-        if mimetype is None:
-            mimetype, _ = mimetypes.guess_type(filename)
-            if mimetype is None:
-                mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
         attachment = self._create_mime_attachment(content, mimetype)
         if filename:
             try:

+ 4 - 0
docs/releases/1.11.txt

@@ -162,6 +162,10 @@ Email
 * Added the :setting:`EMAIL_USE_LOCALTIME` setting to allow sending SMTP date
   headers in the local time zone rather than in UTC.
 
+* ``EmailMessage.attach()`` and ``attach_file()`` now fall back to MIME type
+  ``application/octet-stream`` when binary content that can't be decoded as
+  UTF-8 is specified for a ``text/*`` attachment.
+
 File Storage
 ~~~~~~~~~~~~
 

+ 13 - 0
docs/topics/email.txt

@@ -345,6 +345,11 @@ The class has the following methods:
     If you specify a ``mimetype`` of ``message/rfc822``, it will also accept
     :class:`django.core.mail.EmailMessage` and :py:class:`email.message.Message`.
 
+    For a ``mimetype`` starting with ``text/``, content is expected to be a
+    string. Binary data will be decoded using UTF-8, and if that fails, the
+    MIME type will be changed to ``application/octet-stream`` and the data will
+    be attached unchanged.
+
     In addition, ``message/rfc822`` attachments will no longer be
     base64-encoded in violation of :rfc:`2046#section-5.2.1`, which can cause
     issues with displaying the attachments in `Evolution`__ and `Thunderbird`__.
@@ -359,6 +364,14 @@ The class has the following methods:
 
     message.attach_file('/images/weather_map.png')
 
+  For MIME types starting with ``text/``, binary data is handled as in
+  ``attach()``.
+
+.. versionchanged:: 1.11
+
+    Added the fallback to MIME type ``application/octet-stream`` when binary
+    data for a ``text/*`` attachment cannot be decoded.
+
 Sending alternative content types
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

+ 25 - 0
tests/mail/tests.py

@@ -422,6 +422,31 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
         self.assertEqual(content, b'file content')
         self.assertEqual(mimetype, 'text/plain')
 
+    def test_attach_utf8_text_as_bytes(self):
+        """
+        Non-ASCII characters encoded as valid UTF-8 are correctly transported
+        and decoded.
+        """
+        msg = EmailMessage('subject', 'body', 'from@example.com', ['to@example.com'])
+        msg.attach('file.txt', b'\xc3\xa4')  # UTF-8 encoded a umlaut.
+        filename, content, mimetype = self.get_decoded_attachments(msg)[0]
+        self.assertEqual(filename, 'file.txt')
+        self.assertEqual(content, b'\xc3\xa4')
+        self.assertEqual(mimetype, 'text/plain')
+
+    def test_attach_non_utf8_text_as_bytes(self):
+        """
+        Binary data that can't be decoded as UTF-8 overrides the MIME type
+        instead of decoding the data.
+        """
+        msg = EmailMessage('subject', 'body', 'from@example.com', ['to@example.com'])
+        msg.attach('file.txt', b'\xff')  # Invalid UTF-8.
+        filename, content, mimetype = self.get_decoded_attachments(msg)[0]
+        self.assertEqual(filename, 'file.txt')
+        # Content should be passed through unmodified.
+        self.assertEqual(content, b'\xff')
+        self.assertEqual(mimetype, 'application/octet-stream')
+
     def test_dummy_backend(self):
         """
         Make sure that dummy backends returns correct number of sent messages