Parcourir la source

Fixed #30512 -- Used email.headerregistry.parser for parsing emails in sanitize_address().

Joachim Jablon il y a 6 ans
Parent
commit
2628ea9515
2 fichiers modifiés avec 53 ajouts et 45 suppressions
  1. 33 45
      django/core/mail/message.py
  2. 20 0
      tests/mail/tests.py

+ 33 - 45
django/core/mail/message.py

@@ -2,15 +2,15 @@ import mimetypes
 from email import (
     charset as Charset, encoders as Encoders, generator, message_from_string,
 )
-from email.errors import InvalidHeaderDefect, NonASCIILocalPartDefect
+from email.errors import HeaderParseError
 from email.header import Header
-from email.headerregistry import Address
+from email.headerregistry import Address, parser
 from email.message import Message
 from email.mime.base import MIMEBase
 from email.mime.message import MIMEMessage
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
-from email.utils import formatdate, getaddresses, make_msgid, parseaddr
+from email.utils import formatdate, getaddresses, make_msgid
 from io import BytesIO, StringIO
 from pathlib import Path
 
@@ -71,56 +71,44 @@ def forbid_multi_line_headers(name, val, encoding):
     return name, val
 
 
-def split_addr(addr, encoding):
-    """
-    Split the address into local part and domain and encode them.
-
-    When non-ascii characters are present in the local part, it must be
-    MIME-word encoded. The domain name must be idna-encoded if it contains
-    non-ascii characters.
-    """
-    if '@' in addr:
-        localpart, domain = addr.split('@', 1)
-        # Try to get the simplest encoding - ascii if possible so that
-        # to@example.com doesn't become =?utf-8?q?to?=@example.com. This
-        # makes unit testing a bit easier and more readable.
-        try:
-            localpart.encode('ascii')
-        except UnicodeEncodeError:
-            localpart = Header(localpart, encoding).encode()
-        domain = domain.encode('idna').decode('ascii')
-    else:
-        localpart = Header(addr, encoding).encode()
-        domain = ''
-    return (localpart, domain)
-
-
 def sanitize_address(addr, encoding):
     """
     Format a pair of (name, address) or an email address string.
     """
+    address = None
     if not isinstance(addr, tuple):
-        addr = parseaddr(addr)
-    nm, addr = addr
-    localpart, domain = None, None
+        addr = force_str(addr)
+        try:
+            token, rest = parser.get_mailbox(addr)
+        except (HeaderParseError, ValueError, IndexError):
+            raise ValueError('Invalid address "%s"' % addr)
+        else:
+            if rest:
+                # The entire email address must be parsed.
+                raise ValueError(
+                    'Invalid adddress; only %s could be parsed from "%s"'
+                    % (token, addr)
+                )
+            nm = token.display_name or ''
+            localpart = token.local_part
+            domain = token.domain or ''
+    else:
+        nm, address = addr
+        localpart, domain = address.rsplit('@', 1)
+
     nm = Header(nm, encoding).encode()
+    # Avoid UTF-8 encode, if it's possible.
     try:
-        addr.encode('ascii')
-    except UnicodeEncodeError:  # IDN or non-ascii in the local part
-        localpart, domain = split_addr(addr, encoding)
-
-    # An `email.headerregistry.Address` object is used since
-    # email.utils.formataddr() naively encodes the name as ascii (see #25986).
-    if localpart and domain:
-        address = Address(nm, username=localpart, domain=domain)
-        return str(address)
-
+        localpart.encode('ascii')
+    except UnicodeEncodeError:
+        localpart = Header(localpart, encoding).encode()
     try:
-        address = Address(nm, addr_spec=addr)
-    except (InvalidHeaderDefect, NonASCIILocalPartDefect):
-        localpart, domain = split_addr(addr, encoding)
-        address = Address(nm, username=localpart, domain=domain)
-    return str(address)
+        domain.encode('ascii')
+    except UnicodeEncodeError:
+        domain = domain.encode('idna').decode('ascii')
+
+    parsed_address = Address(nm, username=localpart, domain=domain)
+    return str(parsed_address)
 
 
 class MIMEMixin:

+ 20 - 0
tests/mail/tests.py

@@ -748,10 +748,30 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
                 'utf-8',
                 '=?utf-8?q?to=40other=2Ecom?= <to@example.com>',
             ),
+            (
+                ('To Example', 'to@other.com@example.com'),
+                'utf-8',
+                '=?utf-8?q?To_Example?= <"to@other.com"@example.com>',
+            ),
         ):
             with self.subTest(email_address=email_address, encoding=encoding):
                 self.assertEqual(sanitize_address(email_address, encoding), expected_result)
 
+    def test_sanitize_address_invalid(self):
+        for email_address in (
+            # Invalid address with two @ signs.
+            'to@other.com@example.com',
+            # Invalid address without the quotes.
+            'to@other.com <to@example.com>',
+            # Other invalid addresses.
+            '@',
+            'to@',
+            '@example.com',
+        ):
+            with self.subTest(email_address=email_address):
+                with self.assertRaises(ValueError):
+                    sanitize_address(email_address, encoding='utf-8')
+
 
 @requires_tz_support
 class MailTimeZoneTests(SimpleTestCase):