Sfoglia il codice sorgente

Fixed #25986 -- Fixed crash sending email with non-ASCII in local part of the address.

On Python 3, sending emails failed for addresses containing non-ASCII
characters due to the usage of the legacy Python email.utils.formataddr()
function. This is fixed by using the proper Address object on Python 3.
Sergei Maertens 9 anni fa
parent
commit
ec009ef1d8
4 ha cambiato i file con 104 aggiunte e 13 eliminazioni
  1. 1 0
      AUTHORS
  2. 3 3
      django/core/mail/backends/smtp.py
  3. 53 9
      django/core/mail/message.py
  4. 47 1
      tests/mail/tests.py

+ 1 - 0
AUTHORS

@@ -660,6 +660,7 @@ answer newbie questions, and generally made Django that much better:
     Sengtha Chay <sengtha@e-khmer.com>
     Senko Rašić <senko.rasic@dobarkod.hr>
     serbaut@gmail.com
+    Sergei Maertens <sergeimaertens@gmail.com>
     Sergey Fedoseev <fedoseev.sergey@gmail.com>
     Sergey Kolosov <m17.admin@gmail.com>
     Seth Hill <sethrh@gmail.com>

+ 3 - 3
django/core/mail/backends/smtp.py

@@ -115,9 +115,9 @@ class EmailBackend(BaseEmailBackend):
         """A helper method that does the actual sending."""
         if not email_message.recipients():
             return False
-        from_email = sanitize_address(email_message.from_email, email_message.encoding)
-        recipients = [sanitize_address(addr, email_message.encoding)
-                      for addr in email_message.recipients()]
+        encoding = email_message.encoding or settings.DEFAULT_CHARSET
+        from_email = sanitize_address(email_message.from_email, encoding)
+        recipients = [sanitize_address(addr, encoding) for addr in email_message.recipients()]
         message = email_message.message()
         try:
             self.connection.sendmail(from_email, recipients, message.as_bytes(linesep='\r\n'))

+ 53 - 9
django/core/mail/message.py

@@ -13,7 +13,7 @@ from email.mime.base import MIMEBase
 from email.mime.message import MIMEMessage
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
-from email.utils import formataddr, formatdate, getaddresses, parseaddr
+from email.utils import formatdate, getaddresses, parseaddr
 from io import BytesIO
 
 from django.conf import settings
@@ -103,22 +103,66 @@ def forbid_multi_line_headers(name, val, encoding):
     return str(name), val
 
 
+def split_addr(addr, encoding):
+    """
+    Split the address into local part and domain, properly encoded.
+
+    When non-ascii characters are present in the local part, it must be
+    MIME-word encoded. The domain name must be idna-encoded if it contains
+    non-ascii characters.
+    """
+    if '@' in addr:
+        localpart, domain = addr.split('@', 1)
+        # Try to get the simplest encoding - ascii if possible so that
+        # to@example.com doesn't become =?utf-8?q?to?=@example.com. This
+        # makes unit testing a bit easier and more readable.
+        try:
+            localpart.encode('ascii')
+        except UnicodeEncodeError:
+            localpart = Header(localpart, encoding).encode()
+        domain = domain.encode('idna').decode('ascii')
+    else:
+        localpart = Header(addr, encoding).encode()
+        domain = ''
+    return (localpart, domain)
+
+
 def sanitize_address(addr, encoding):
+    """
+    Format a pair of (name, address) or an email address string.
+    """
     if not isinstance(addr, tuple):
         addr = parseaddr(force_text(addr))
     nm, addr = addr
+    localpart, domain = None, None
     nm = Header(nm, encoding).encode()
     try:
         addr.encode('ascii')
-    except UnicodeEncodeError:  # IDN
-        if '@' in addr:
-            localpart, domain = addr.split('@', 1)
-            localpart = str(Header(localpart, encoding))
-            domain = domain.encode('idna').decode('ascii')
+    except UnicodeEncodeError:  # IDN or non-ascii in the local part
+        localpart, domain = split_addr(addr, encoding)
+
+    if six.PY2:
+        # On Python 2, use the stdlib since `email.headerregistry` doesn't exist.
+        from email.utils import formataddr
+        if localpart and domain:
             addr = '@'.join([localpart, domain])
-        else:
-            addr = Header(addr, encoding).encode()
-    return formataddr((nm, addr))
+        return formataddr((nm, addr))
+
+    # On Python 3, an `email.headerregistry.Address` object is used since
+    # email.utils.formataddr() naively encodes the name as ascii (see #25986).
+    from email.headerregistry import Address
+    from email.errors import InvalidHeaderDefect, NonASCIILocalPartDefect
+
+    if localpart and domain:
+        address = Address(nm, username=localpart, domain=domain)
+        return str(address)
+
+    try:
+        address = Address(nm, addr_spec=addr)
+    except (InvalidHeaderDefect, NonASCIILocalPartDefect):
+        localpart, domain = split_addr(addr, encoding)
+        address = Address(nm, username=localpart, domain=domain)
+    return str(address)
 
 
 class MIMEMixin():

+ 47 - 1
tests/mail/tests.py

@@ -9,6 +9,7 @@ import smtpd
 import sys
 import tempfile
 import threading
+from email.header import Header
 from email.mime.text import MIMEText
 from smtplib import SMTP, SMTPException
 from ssl import SSLError
@@ -19,7 +20,7 @@ from django.core.mail import (
     send_mail, send_mass_mail,
 )
 from django.core.mail.backends import console, dummy, filebased, locmem, smtp
-from django.core.mail.message import BadHeaderError
+from django.core.mail.message import BadHeaderError, sanitize_address
 from django.test import SimpleTestCase, override_settings
 from django.utils._os import upath
 from django.utils.encoding import force_bytes, force_text
@@ -567,6 +568,42 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
         # Verify that the child message header is not base64 encoded
         self.assertIn(str('Child Subject'), parent_s)
 
+    def test_sanitize_address(self):
+        """
+        Email addresses are properly sanitized.
+        """
+        # Simple ASCII address - string form
+        self.assertEqual(sanitize_address('to@example.com', 'ascii'), 'to@example.com')
+        self.assertEqual(sanitize_address('to@example.com', 'utf-8'), 'to@example.com')
+        # Bytestrings are transformed to normal strings.
+        self.assertEqual(sanitize_address(b'to@example.com', 'utf-8'), 'to@example.com')
+
+        # Simple ASCII address - tuple form
+        self.assertEqual(
+            sanitize_address(('A name', 'to@example.com'), 'ascii'),
+            'A name <to@example.com>'
+        )
+        if PY3:
+            self.assertEqual(
+                sanitize_address(('A name', 'to@example.com'), 'utf-8'),
+                '=?utf-8?q?A_name?= <to@example.com>'
+            )
+        else:
+            self.assertEqual(
+                sanitize_address(('A name', 'to@example.com'), 'utf-8'),
+                'A name <to@example.com>'
+            )
+
+        # Unicode characters are are supported in RFC-6532.
+        self.assertEqual(
+            sanitize_address('tó@example.com', 'utf-8'),
+            '=?utf-8?b?dMOz?=@example.com'
+        )
+        self.assertEqual(
+            sanitize_address(('Tó Example', 'tó@example.com'), 'utf-8'),
+            '=?utf-8?q?T=C3=B3_Example?= <=?utf-8?b?dMOz?=@example.com>'
+        )
+
 
 class PythonGlobalState(SimpleTestCase):
     """
@@ -1026,6 +1063,15 @@ class FakeSMTPServer(smtpd.SMTPServer, threading.Thread):
             data = data.encode('utf-8')
         m = message_from_bytes(data)
         maddr = parseaddr(m.get('from'))[1]
+
+        if mailfrom != maddr:
+            # According to the spec, mailfrom does not necessarily match the
+            # From header - on Python 3 this is the case where the local part
+            # isn't encoded, so try to correct that.
+            lp, domain = mailfrom.split('@', 1)
+            lp = Header(lp, 'utf-8').encode()
+            mailfrom = '@'.join([lp, domain])
+
         if mailfrom != maddr:
             return "553 '%s' != '%s'" % (mailfrom, maddr)
         with self.sink_lock: