From 0abca3e10d13eb925e1a2f92627cf7f15ac67c01 Mon Sep 17 00:00:00 2001
From: Mike Edmunds <medmunds@gmail.com>
Date: Wed, 31 Jul 2024 17:12:14 -0700
Subject: [PATCH 01/10] Stop incorrectly RFC 2047 encoding non-ASCII email
 addresses

Email generators had been incorrectly flattening non-ASCII email
addresses to RFC 2047 encoded-word format, leaving them undeliverable.
(RFC 2047 prohibits use of encoded-word in an addr-spec.)
This change raises a ValueError when attempting to flatten an
EmailMessage with a non-ASCII addr-spec and a policy with utf8=False.
(Exception: If the non-ASCII address originated from parsing a message,
it will be flattened as originally parsed, without error.)

Non-ASCII email addresses are supported when using a policy with
utf8=True (such as email.policy.SMTPUTF8) under RFCs 6531 and 6532.

Non-ASCII email address domains (but not localparts) can also be used
with non-SMTPUTF8 policies by encoding the domain as an IDNA A-label.
(The email package does not perform this encoding, because it cannot
know whether the caller wants IDNA 2003, IDNA 2008, or some other
variant such as UTS #46.)
---
 Doc/library/email.policy.rst          | 10 ++++-
 Lib/email/_header_value_parser.py     | 11 +++++
 Lib/test/test_email/test_generator.py | 58 ++++++++++++++++++++++++++-
 3 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst
index 314767d0802a08..d5be4d6625176a 100644
--- a/Doc/library/email.policy.rst
+++ b/Doc/library/email.policy.rst
@@ -406,11 +406,17 @@ added matters.  To illustrate::
    .. attribute:: utf8
 
       If ``False``, follow :rfc:`5322`, supporting non-ASCII characters in
-      headers by encoding them as "encoded words".  If ``True``, follow
-      :rfc:`6532` and use ``utf-8`` encoding for headers.  Messages
+      headers by encoding them as :rfc:`2047` "encoded words".  If ``True``,
+      follow :rfc:`6532` and use ``utf-8`` encoding for headers.  Messages
       formatted in this way may be passed to SMTP servers that support
       the ``SMTPUTF8`` extension (:rfc:`6531`).
 
+      .. versionchanged:: 3.13
+         If ``False``, the generator will raise a ``ValueError`` if any email
+         address contains non-ASCII characters. To send to a non-ASCII domain
+         with ``utf8=False``, encode the domain using the third-party
+         :pypi:`idna` module or :mod:`encodings.idna`. No RFC allows a non-ASCII
+         username ("localpart") in an email address with ``utf8=False``.
 
    .. attribute:: refold_source
 
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index ec2215a5e5f33c..ff75b9acd81fd8 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -2829,6 +2829,17 @@ def _refold_parse_tree(parse_tree, *, policy):
             _fold_mime_parameters(part, lines, maxlen, encoding)
             continue
 
+        if want_encoding and part.token_type == 'addr-spec':
+            # RFC2047 forbids encoded-word in any part of an addr-spec.
+            if charset == 'unknown-8bit':
+                # Non-ASCII addr-spec came from parsed message; leave unchanged.
+                want_encoding = False
+            else:
+                raise ValueError(
+                    "Non-ASCII address requires policy with utf8=True:"
+                    " '{}'".format(part)
+                )
+
         if want_encoding and not wrap_as_ew_blocked:
             if not part.as_ew_allowed:
                 want_encoding = False
diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py
index c75a842c33578e..f6621e7bd96078 100644
--- a/Lib/test/test_email/test_generator.py
+++ b/Lib/test/test_email/test_generator.py
@@ -1,4 +1,5 @@
 import io
+import re
 import textwrap
 import unittest
 from email import message_from_string, message_from_bytes
@@ -288,6 +289,28 @@ def test_keep_long_encoded_newlines(self):
         g.flatten(msg)
         self.assertEqual(s.getvalue(), self.typ(expected))
 
+    def test_non_ascii_addr_spec_raises(self):
+        # RFC2047 encoded-word is not permitted in any part of an addr-spec.
+        # (See also test_non_ascii_addr_spec_preserved below.)
+        g = self.genclass(self.ioclass(), policy=self.policy.clone(utf8=False))
+        cases = [
+            'wők@example.com',
+            'wok@exàmple.com',
+            'wők@exàmple.com',
+            '"Name, for display" <wők@example.com>',
+            'Näyttönimi <wők@example.com>',
+        ]
+        for address in cases:
+            with self.subTest(address=address):
+                msg = EmailMessage()
+                msg['To'] = address
+                expected_error = re.escape(
+                    "Non-ASCII address requires policy with utf8=True:"
+                    " '{}'".format(msg['To'].addresses[0].addr_spec)
+                )
+                with self.assertRaisesRegex(ValueError, expected_error):
+                    g.flatten(msg)
+
 
 class TestGenerator(TestGeneratorBase, TestEmailBase):
 
@@ -432,12 +455,12 @@ def test_cte_type_7bit_transforms_8bit_cte(self):
 
     def test_smtputf8_policy(self):
         msg = EmailMessage()
-        msg['From'] = "Páolo <főo@bar.com>"
+        msg['From'] = "Páolo <főo@bàr.com>"
         msg['To'] = 'Dinsdale'
         msg['Subject'] = 'Nudge nudge, wink, wink \u1F609'
         msg.set_content("oh là là, know what I mean, know what I mean?")
         expected = textwrap.dedent("""\
-            From: Páolo <főo@bar.com>
+            From: Páolo <főo@bàr.com>
             To: Dinsdale
             Subject: Nudge nudge, wink, wink \u1F609
             Content-Type: text/plain; charset="utf-8"
@@ -472,6 +495,37 @@ def test_smtp_policy(self):
         g.flatten(msg)
         self.assertEqual(s.getvalue(), expected)
 
+    def test_non_ascii_addr_spec_preserved(self):
+        # A defective non-ASCII addr-spec parsed from the original
+        # message is left unchanged when flattening.
+        # (See also test_non_ascii_addr_spec_raises above.)
+        source = (
+            'To: jörg@example.com, "But a long name still works with refold_source" <jörg@example.com>'
+        ).encode()
+        expected = (
+            b'To: j\xc3\xb6rg@example.com,\n'
+            b' "But a long name still works with refold_source" <j\xc3\xb6rg@example.com>\n'
+            b'\n'
+        )
+        msg = message_from_bytes(source, policy=policy.default)
+        s = io.BytesIO()
+        g = BytesGenerator(s, policy=policy.default)
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), expected)
+
+    def test_idna_encoding_preserved(self):
+        # Nothing tries to decode a pre-encoded IDNA domain.
+        msg = EmailMessage()
+        msg["To"] = Address(
+            username='jörg',
+            domain='☕.example'.encode('idna').decode()  # IDNA 2003
+        )
+        expected = 'To: jörg@xn--53h.example\n\n'.encode()
+        s = io.BytesIO()
+        g = BytesGenerator(s, policy=policy.default.clone(utf8=True))
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), expected)
+
 
 if __name__ == '__main__':
     unittest.main()

From faa40063315616479fdcdc6b095160719477d687 Mon Sep 17 00:00:00 2001
From: Mike Edmunds <medmunds@gmail.com>
Date: Wed, 31 Jul 2024 17:23:42 -0700
Subject: [PATCH 02/10] Blurbs

---
 .../next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst | 3 +++
 .../Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst     | 3 +++
 2 files changed, 6 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst
 create mode 100644 Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst

diff --git a/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst b/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst
new file mode 100644
index 00000000000000..673bdd2309dcc6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst
@@ -0,0 +1,3 @@
+Stop incorrectly using RFC 2047 "encoded words" for email addresses with
+non-ASCII characters when email.generator is called using a policy with
+``utf8=False``.
diff --git a/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst b/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst
new file mode 100644
index 00000000000000..673bdd2309dcc6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst
@@ -0,0 +1,3 @@
+Stop incorrectly using RFC 2047 "encoded words" for email addresses with
+non-ASCII characters when email.generator is called using a policy with
+``utf8=False``.

From bd6845dc6341704b0ea861f67170c620e47fa56a Mon Sep 17 00:00:00 2001
From: Mike Edmunds <medmunds@gmail.com>
Date: Tue, 1 Apr 2025 13:02:16 -0700
Subject: [PATCH 03/10] fixup! Stop incorrectly RFC 2047 encoding non-ASCII
 email addresses

- Incorporate PR review feedback
- Improve docs
---
 Doc/library/email.errors.rst                    |  9 +++++++++
 Doc/library/email.policy.rst                    | 17 +++++++++++------
 Lib/email/_header_value_parser.py               |  2 +-
 Lib/email/errors.py                             |  4 ++++
 Lib/test/test_email/test_generator.py           | 10 ++++++----
 ...024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst |  8 +++++---
 ...24-07-31-17-23-06.gh-issue-122476.TtUa-c.rst |  8 +++++---
 7 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst
index 689e7397cbcf1f..d9254039d882a1 100644
--- a/Doc/library/email.errors.rst
+++ b/Doc/library/email.errors.rst
@@ -59,6 +59,15 @@ The following exception classes are defined in the :mod:`email.errors` module:
    headers.
 
 
+.. exception:: InvalidMailboxError()
+
+   Raised when serializing a message with an address header that contains
+   a mailbox incompatible with the policy in use.
+   (See :attr:`email.policy.EmailPolicy.utf8`.)
+
+   .. versionadded:: 3.14
+
+
 .. exception:: MessageDefect()
 
    This is the base class for all defects found when parsing email messages.
diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst
index 3d5f29e21de151..a3e0065cfe469d 100644
--- a/Doc/library/email.policy.rst
+++ b/Doc/library/email.policy.rst
@@ -411,12 +411,17 @@ added matters.  To illustrate::
       formatted in this way may be passed to SMTP servers that support
       the ``SMTPUTF8`` extension (:rfc:`6531`).
 
-      .. versionchanged:: 3.13
-         If ``False``, the generator will raise a ``ValueError`` if any email
-         address contains non-ASCII characters. To send to a non-ASCII domain
-         with ``utf8=False``, encode the domain using the third-party
-         :pypi:`idna` module or :mod:`encodings.idna`. No RFC allows a non-ASCII
-         username ("localpart") in an email address with ``utf8=False``.
+      When ``False``, the generator will raise an
+      :exc:`~email.errors.InvalidMailboxError` if any address header includes
+      a mailbox ("addr-spec") with non-ASCII characters. To use a mailbox with
+      an internationalized domain name, first encode the domain using the
+      third-party :pypi:`idna` or :pypi:`uts46` module or with
+      :mod:`encodings.idna`. It is not possible to use a non-ASCII username
+      ("local-part") in a mailbox when ``utf8=False``.
+
+      .. versionchanged:: 3.14
+         Raises :exc:`~email.errors.InvalidMailboxError`. (Earlier versions
+         incorrectly applied :rfc:`2047` to non-ASCII addr-specs.)
 
    .. attribute:: refold_source
 
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index c0318f6c988d5a..bff9beb32aab6b 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -2843,7 +2843,7 @@ def _refold_parse_tree(parse_tree, *, policy):
                 # Non-ASCII addr-spec came from parsed message; leave unchanged.
                 want_encoding = False
             else:
-                raise ValueError(
+                raise errors.InvalidMailboxError(
                     "Non-ASCII address requires policy with utf8=True:"
                     " '{}'".format(part)
                 )
diff --git a/Lib/email/errors.py b/Lib/email/errors.py
index 6bc744bd59c5bb..e5601132d024fe 100644
--- a/Lib/email/errors.py
+++ b/Lib/email/errors.py
@@ -33,6 +33,10 @@ class HeaderWriteError(MessageError):
     """Error while writing headers."""
 
 
+class InvalidMailboxError(MessageError, ValueError):
+    """A mailbox was not compatible with the policy in use."""
+
+
 # These are parsing defects which the parser was able to work around.
 class MessageDefect(ValueError):
     """Base class for a message defect."""
diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py
index f6621e7bd96078..f28cbf1ebdcb3b 100644
--- a/Lib/test/test_email/test_generator.py
+++ b/Lib/test/test_email/test_generator.py
@@ -304,11 +304,13 @@ def test_non_ascii_addr_spec_raises(self):
             with self.subTest(address=address):
                 msg = EmailMessage()
                 msg['To'] = address
-                expected_error = re.escape(
-                    "Non-ASCII address requires policy with utf8=True:"
-                    " '{}'".format(msg['To'].addresses[0].addr_spec)
+                addr_spec = msg['To'].addresses[0].addr_spec
+                expected_error = (
+                    fr"(?i)(?=.*non-ascii)(?=.*utf8.*True)(?=.*{re.escape(addr_spec)})"
                 )
-                with self.assertRaisesRegex(ValueError, expected_error):
+                with self.assertRaisesRegex(
+                    email.errors.InvalidMailboxError, expected_error
+                ):
                     g.flatten(msg)
 
 
diff --git a/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst b/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst
index 673bdd2309dcc6..fb1574fb4ef709 100644
--- a/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst
+++ b/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst
@@ -1,3 +1,5 @@
-Stop incorrectly using RFC 2047 "encoded words" for email addresses with
-non-ASCII characters when email.generator is called using a policy with
-``utf8=False``.
+The :mod:`email` module no longer incorrectly encodes non-ASCII characters
+in email addresses using :rfc:`2047` encoding. Under a policy with ``utf8=True``
+this means the addresses will be correctly passed through. Under a policy with
+``utf8=False``, attempting to serialize a message with non-ASCII email addresses
+will now result in an :exc:`~email.errors.InvalidMailboxError`.
diff --git a/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst b/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst
index 673bdd2309dcc6..fb1574fb4ef709 100644
--- a/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst
+++ b/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst
@@ -1,3 +1,5 @@
-Stop incorrectly using RFC 2047 "encoded words" for email addresses with
-non-ASCII characters when email.generator is called using a policy with
-``utf8=False``.
+The :mod:`email` module no longer incorrectly encodes non-ASCII characters
+in email addresses using :rfc:`2047` encoding. Under a policy with ``utf8=True``
+this means the addresses will be correctly passed through. Under a policy with
+``utf8=False``, attempting to serialize a message with non-ASCII email addresses
+will now result in an :exc:`~email.errors.InvalidMailboxError`.

From 43eaea1fb8aa279c9d1056048e7c2d49b8ed19ad Mon Sep 17 00:00:00 2001
From: Mike Edmunds <medmunds@gmail.com>
Date: Mon, 26 May 2025 16:19:02 -0700
Subject: [PATCH 04/10] fixup! Stop incorrectly RFC 2047 encoding non-ASCII
 email addresses

- Incorporate PR feedback
- Tailor blurbs to individual issues
---
 Lib/email/_header_value_parser.py                   |  4 ++--
 Lib/test/test_email/test_generator.py               |  2 +-
 .../2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst   | 13 ++++++++-----
 .../2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst  | 12 +++++++-----
 4 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index f51b311d2b8f93..eb86b22860b2c2 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -2846,8 +2846,8 @@ def _refold_parse_tree(parse_tree, *, policy):
                 want_encoding = False
             else:
                 raise errors.InvalidMailboxError(
-                    "Non-ASCII address requires policy with utf8=True:"
-                    " '{}'".format(part)
+                    f"Non-ASCII mailbox '{part}' is invalid"
+                    " under current policy setting (utf8=False)"
                 )
 
         if want_encoding and not wrap_as_ew_blocked:
diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py
index f28cbf1ebdcb3b..5c36a5a54323fa 100644
--- a/Lib/test/test_email/test_generator.py
+++ b/Lib/test/test_email/test_generator.py
@@ -306,7 +306,7 @@ def test_non_ascii_addr_spec_raises(self):
                 msg['To'] = address
                 addr_spec = msg['To'].addresses[0].addr_spec
                 expected_error = (
-                    fr"(?i)(?=.*non-ascii)(?=.*utf8.*True)(?=.*{re.escape(addr_spec)})"
+                    fr"(?i)(?=.*non-ascii)(?=.*{re.escape(addr_spec)})(?=.*policy.*utf8)"
                 )
                 with self.assertRaisesRegex(
                     email.errors.InvalidMailboxError, expected_error
diff --git a/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst b/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst
index fb1574fb4ef709..d4b07bf06b9236 100644
--- a/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst
+++ b/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst
@@ -1,5 +1,8 @@
-The :mod:`email` module no longer incorrectly encodes non-ASCII characters
-in email addresses using :rfc:`2047` encoding. Under a policy with ``utf8=True``
-this means the addresses will be correctly passed through. Under a policy with
-``utf8=False``, attempting to serialize a message with non-ASCII email addresses
-will now result in an :exc:`~email.errors.InvalidMailboxError`.
+The :mod:`email` module no longer incorrectly uses :rfc:`2047` encoding for
+a mailbox with non-ASCII characters in its domain. Under a policy with
+:attr:`~email.policy.EmailPolicy.utf8` set ``False``, attempting to serialize
+such a message will now raise an :exc:`~email.errors.InvalidMailboxError`.
+Either apply an appropriate IDNA encoding to convert the domain to ASCII before
+serialization, or use :data:`email.policy.SMTPUTF8` (or another policy with
+``utf8=True``) to correctly pass through the internationalized domain name
+as Unicode characters.
diff --git a/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst b/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst
index fb1574fb4ef709..6fca53c1b2a409 100644
--- a/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst
+++ b/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst
@@ -1,5 +1,7 @@
-The :mod:`email` module no longer incorrectly encodes non-ASCII characters
-in email addresses using :rfc:`2047` encoding. Under a policy with ``utf8=True``
-this means the addresses will be correctly passed through. Under a policy with
-``utf8=False``, attempting to serialize a message with non-ASCII email addresses
-will now result in an :exc:`~email.errors.InvalidMailboxError`.
+The :mod:`email` module no longer incorrectly uses :rfc:`2047` encoding for
+a mailbox with non-ASCII characters in its local-part. Under a policy with
+:attr:`~email.policy.EmailPolicy.utf8` set ``False``, attempting to serialize
+such a message will now raise an :exc:`~email.errors.InvalidMailboxError`.
+There is no valid 7-bit encoding for an internationalized local-part. Use
+:data:`email.policy.SMTPUTF8` (or another policy with ``utf8=True``) to
+correctly pass through the local-part as Unicode characters.

From 5aafc33dff828cb8c81fb43946fe7fb4f18474ec Mon Sep 17 00:00:00 2001
From: "R. David Murray" <rdmurray@bitdance.com>
Date: Wed, 22 Apr 2026 15:30:50 -0400
Subject: [PATCH 05/10] Bump versionadded to 3.15

---
 Doc/library/email.errors.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst
index d9254039d882a1..0d9270c85165f4 100644
--- a/Doc/library/email.errors.rst
+++ b/Doc/library/email.errors.rst
@@ -1,4 +1,4 @@
-:mod:`!email.errors`: Exception and Defect classes
+/:mod:`!email.errors`: Exception and Defect classes
 --------------------------------------------------
 
 .. module:: email.errors
@@ -65,7 +65,7 @@ The following exception classes are defined in the :mod:`email.errors` module:
    a mailbox incompatible with the policy in use.
    (See :attr:`email.policy.EmailPolicy.utf8`.)
 
-   .. versionadded:: 3.14
+   .. versionadded:: 3.15
 
 
 .. exception:: MessageDefect()

From 3df70b636f501bbb7b413ad6970f8153a7daaaf4 Mon Sep 17 00:00:00 2001
From: "R. David Murray" <rdmurray@bitdance.com>
Date: Wed, 22 Apr 2026 16:15:56 -0400
Subject: [PATCH 06/10] fix inadvertent typo

---
 Doc/library/email.errors.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst
index 460a9e58fc1174..c65e7e9f96cc23 100644
--- a/Doc/library/email.errors.rst
+++ b/Doc/library/email.errors.rst
@@ -1,4 +1,4 @@
-/:mod:`!email.errors`: Exception and Defect classes
+:mod:`!email.errors`: Exception and Defect classes
 --------------------------------------------------
 
 .. module:: email.errors

From 8f3f6fd53512c89043ddf6d169ab42ef5670825f Mon Sep 17 00:00:00 2001
From: R David Murray <rdmurray@bitdance.com>
Date: Sun, 26 Apr 2026 13:44:12 -0400
Subject: [PATCH 07/10] Remove incomplete fix and temporarily disable new
 tests.

---
 Lib/email/_header_value_parser.py     | 11 -----------
 Lib/test/test_email/test_generator.py |  6 ++++--
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index ee70c0b1119259..4c5394ab6353ac 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -2885,17 +2885,6 @@ def _refold_parse_tree(parse_tree, *, policy):
             last_word_is_ew = False
             continue
 
-        if want_encoding and part.token_type == 'addr-spec':
-            # RFC2047 forbids encoded-word in any part of an addr-spec.
-            if charset == 'unknown-8bit':
-                # Non-ASCII addr-spec came from parsed message; leave unchanged.
-                want_encoding = False
-            else:
-                raise errors.InvalidMailboxError(
-                    f"Non-ASCII mailbox '{part}' is invalid"
-                    " under current policy setting (utf8=False)"
-                )
-
         if want_encoding and not wrap_as_ew_blocked:
             if not part.as_ew_allowed:
                 want_encoding = False
diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py
index bb45f1c9a4e6ef..fb9d9be3e20df0 100644
--- a/Lib/test/test_email/test_generator.py
+++ b/Lib/test/test_email/test_generator.py
@@ -296,7 +296,8 @@ def test_keep_long_encoded_newlines(self):
         g.flatten(msg)
         self.assertEqual(s.getvalue(), self.typ(expected))
 
-    def test_non_ascii_addr_spec_raises(self):
+    # XXX renable after fix.
+    def xest_non_ascii_addr_spec_raises(self):
         # RFC2047 encoded-word is not permitted in any part of an addr-spec.
         # (See also test_non_ascii_addr_spec_preserved below.)
         g = self.genclass(self.ioclass(), policy=self.policy.clone(utf8=False))
@@ -579,7 +580,8 @@ def test_smtp_policy(self):
         g.flatten(msg)
         self.assertEqual(s.getvalue(), expected)
 
-    def test_non_ascii_addr_spec_preserved(self):
+    # XXX renable after fix.
+    def xest_non_ascii_addr_spec_preserved(self):
         # A defective non-ASCII addr-spec parsed from the original
         # message is left unchanged when flattening.
         # (See also test_non_ascii_addr_spec_raises above.)

From f47e029a134be21e0e576873ad9702974b3dbc2e Mon Sep 17 00:00:00 2001
From: R David Murray <rdmurray@bitdance.com>
Date: Sun, 26 Apr 2026 13:45:00 -0400
Subject: [PATCH 08/10] Move mime-parameter folding to top of loop.

The mime parameter folder doesn't make use of the encoding check
done be the code that is now below it, it does its own.  So it
makes more sense to take that branch first.  This will simplify
subsequent changes.
---
 Lib/email/_header_value_parser.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 4c5394ab6353ac..2d234cdfffea7f 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -2858,6 +2858,11 @@ def _refold_parse_tree(parse_tree, *, policy):
         if part is end_ew_not_allowed:
             wrap_as_ew_blocked -= 1
             continue
+        if part.token_type == 'mime-parameters':
+            # Mime parameter folding (using RFC2231) is extra special.
+            _fold_mime_parameters(part, lines, maxlen, encoding)
+            last_word_is_ew = False
+            continue
         tstr = str(part)
         if not want_encoding:
             if part.token_type in ('ptext', 'vtext'):
@@ -2879,12 +2884,6 @@ def _refold_parse_tree(parse_tree, *, policy):
                 charset = 'utf-8'
             want_encoding = True
 
-        if part.token_type == 'mime-parameters':
-            # Mime parameter folding (using RFC2231) is extra special.
-            _fold_mime_parameters(part, lines, maxlen, encoding)
-            last_word_is_ew = False
-            continue
-
         if want_encoding and not wrap_as_ew_blocked:
             if not part.as_ew_allowed:
                 want_encoding = False

From 73c7b6b60036ee28a7195c1bcccf07c621b11f9c Mon Sep 17 00:00:00 2001
From: R David Murray <rdmurray@bitdance.com>
Date: Sun, 26 Apr 2026 14:49:51 -0400
Subject: [PATCH 09/10] Fix the bug, update the doc changes.

This is a more complete fix, covering any syntax part where encoded
words are not permitted, and the doc changes are adjusted accordingly.
There is also no need for a new exception, since HeaderWriteError
already exists.

The fix itself is to use a separate code loop to fold parts that
may not have encoded words, guaranteeing that we do not do incorrect
encoding.  This opens a door to simplifying the main folding loop,
but that is a much bigger refactoring job better left for another time.
---
 Doc/library/email.errors.rst                  |  9 ---
 Doc/library/email.policy.rst                  | 19 +++--
 Lib/email/_header_value_parser.py             | 80 ++++++++++++++++---
 Lib/email/errors.py                           |  4 -
 .../test_email/test__header_value_parser.py   |  8 +-
 Lib/test/test_email/test_generator.py         | 42 ++++++----
 ...4-07-31-17-22-10.gh-issue-83938.TtUa-c.rst |  2 +-
 ...-07-31-17-23-06.gh-issue-122476.TtUa-c.rst |  2 +-
 8 files changed, 112 insertions(+), 54 deletions(-)

diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst
index c65e7e9f96cc23..2f7c9140cfcbe5 100644
--- a/Doc/library/email.errors.rst
+++ b/Doc/library/email.errors.rst
@@ -59,15 +59,6 @@ The following exception classes are defined in the :mod:`!email.errors` module:
    headers.
 
 
-.. exception:: InvalidMailboxError()
-
-   Raised when serializing a message with an address header that contains
-   a mailbox incompatible with the policy in use.
-   (See :attr:`email.policy.EmailPolicy.utf8`.)
-
-   .. versionadded:: 3.15
-
-
 .. exception:: MessageDefect()
 
    This is the base class for all defects found when parsing email messages.
diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst
index b6ebfbd782c30a..8983b406edecb5 100644
--- a/Doc/library/email.policy.rst
+++ b/Doc/library/email.policy.rst
@@ -409,16 +409,19 @@ added matters.  To illustrate::
       the ``SMTPUTF8`` extension (:rfc:`6531`).
 
       When ``False``, the generator will raise an
-      :exc:`~email.errors.InvalidMailboxError` if any address header includes
-      a mailbox ("addr-spec") with non-ASCII characters. To use a mailbox with
-      an internationalized domain name, first encode the domain using the
-      third-party :pypi:`idna` or :pypi:`uts46` module or with
-      :mod:`encodings.idna`. It is not possible to use a non-ASCII username
-      ("local-part") in a mailbox when ``utf8=False``.
+      :exc:`~email.errors.HeaderWriteErrr` if any header includes non-ASCII
+      characters in a context where :rfc:`2047` does not permit encoded words.
+      This particularly applies to mailboxes ("addr-spec") with non-ASCII
+      characters, which can be created via :mod:~email.headerregistry.Address`.
+      To use a mailbox with non-ASCII domain name with ``utf8=False``, first
+      encode the domain using the third-party :pypi:`idna` or :pypi:`uts46`
+      module or with :mod:`encodings.idna`. It is not possible to use a
+      non-ASCII username ("local-part") in a mailbox when ``utf8=False``.
 
       .. versionchanged:: 3.14
-         Raises :exc:`~email.errors.InvalidMailboxError`. (Earlier versions
-         incorrectly applied :rfc:`2047` to non-ASCII addr-specs.)
+         Can trigger the raising of :exc:`~email.errors.HeaderWriteError`.
+         (Earlier versions incorrectly applied :rfc:`2047` in certain contexts,
+         mostly notably in addr-specs.)
 
    .. attribute:: refold_source
 
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 2d234cdfffea7f..43216b0af84326 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -157,10 +157,7 @@ def all_defects(self):
     def startswith_fws(self):
         return self[0].startswith_fws()
 
-    @property
-    def as_ew_allowed(self):
-        """True if all top level tokens of this part may be RFC2047 encoded."""
-        return all(part.as_ew_allowed for part in self)
+    as_ew_allowed = True
 
     @property
     def comments(self):
@@ -429,6 +426,7 @@ def addr_spec(self):
 class AngleAddr(TokenList):
 
     token_type = 'angle-addr'
+    as_ew_allowed = False
 
     @property
     def local_part(self):
@@ -847,26 +845,22 @@ def params(self):
 
 class ContentType(ParameterizedHeaderValue):
     token_type = 'content-type'
-    as_ew_allowed = False
     maintype = 'text'
     subtype = 'plain'
 
 
 class ContentDisposition(ParameterizedHeaderValue):
     token_type = 'content-disposition'
-    as_ew_allowed = False
     content_disposition = None
 
 
 class ContentTransferEncoding(TokenList):
     token_type = 'content-transfer-encoding'
-    as_ew_allowed = False
     cte = '7bit'
 
 
 class HeaderLabel(TokenList):
     token_type = 'header-label'
-    as_ew_allowed = False
 
 
 class MsgID(TokenList):
@@ -2838,13 +2832,68 @@ def _steal_trailing_WSP_if_exists(lines):
 
 
 def _refold_parse_tree(parse_tree, *, policy):
-    """Return string of contents of parse_tree folded according to RFC rules.
-
-    """
     # max_line_length 0/None means no limit, ie: infinitely long.
     maxlen = policy.max_line_length or sys.maxsize
     encoding = 'utf-8' if policy.utf8 else 'us-ascii'
     lines = ['']  # Folded lines to be output
+    if parse_tree.as_ew_allowed:
+        _refold_with_ew(parse_tree, lines, maxlen, encoding, policy=policy)
+    else:
+        _refold_without_ew(parse_tree, lines, maxlen, encoding, policy=policy)
+    return policy.linesep.join(lines) + policy.linesep
+
+def _refold_without_ew(parse_tree, lines, maxlen, encoding, *, policy):
+    parts = list(parse_tree)
+    while parts:
+        part = parts.pop(0)
+        tstr = str(part)
+        try:
+            tstr.encode(encoding)
+        except UnicodeEncodeError:
+            if any(isinstance(x, errors.UndecodableBytesDefect)
+                   for x in part.all_defects):
+                # There is garbage data from parsing a message in binary mode,
+                # just pass it through.  Not good, but the best we can do.
+                pass
+            elif policy.utf8:
+                # If this happens, it's a programmer error.
+                raise
+            else:
+                raise errors.HeaderWriteError(
+                    f"Non-ASCII {part.token_type} '{part}' is invalid"
+                    " under current policy setting (utf8=False)"
+                )
+        if len(tstr) <= maxlen - len(lines[-1]):
+            lines[-1] += tstr
+            continue
+        # This part is too long to fit.  The RFC wants us to break at
+        # "major syntactic breaks", so unless we don't consider this
+        # to be one, check if it will fit on the next line by itself.
+        if (part.syntactic_break and
+                len(tstr) + 1 <= maxlen):
+            newline = _steal_trailing_WSP_if_exists(lines)
+            if newline or part.startswith_fws():
+                lines.append(newline + tstr)
+                continue
+        if not hasattr(part, 'encode'):
+            # It's not a terminal, try folding the subparts.
+            newparts = list(part)
+            parts = newparts + parts
+            continue
+        # We can't figure out how to wrap, it, so give up.
+        newline = _steal_trailing_WSP_if_exists(lines)
+        if newline or part.startswith_fws():
+            lines.append(newline + tstr)
+        else:
+            # We can't fold it onto the next line either...
+            lines[-1] += tstr
+    return
+
+
+def _refold_with_ew(parse_tree, lines, maxlen, encoding, *, policy):
+    """Return string of contents of parse_tree folded according to RFC rules.
+
+    """
     last_word_is_ew = False
     last_ew = None  # if there is an encoded word in the last line of lines,
                     # points to the encoded word's first character
@@ -2885,7 +2934,10 @@ def _refold_parse_tree(parse_tree, *, policy):
             want_encoding = True
 
         if want_encoding and not wrap_as_ew_blocked:
-            if not part.as_ew_allowed:
+            if any(
+                    not x.as_ew_allowed for x in part
+                    if hasattr(x, 'as_ew_allowed')
+                ):
                 want_encoding = False
                 last_ew = None
                 if part.syntactic_break:
@@ -2966,6 +3018,8 @@ def _refold_parse_tree(parse_tree, *, policy):
                     [ValueTerminal(make_quoted_pairs(p), 'ptext')
                      for p in newparts] +
                     [ValueTerminal('"', 'ptext')])
+                _refold_without_ew(newparts, lines, maxlen, encoding, policy=policy)
+                continue
             if part.token_type == 'comment':
                 newparts = (
                     [ValueTerminal('(', 'ptext')] +
@@ -2993,7 +3047,7 @@ def _refold_parse_tree(parse_tree, *, policy):
             lines[-1] += tstr
         last_word_is_ew = last_word_is_ew and not bool(tstr.strip(_WSP))
 
-    return policy.linesep.join(lines) + policy.linesep
+    return
 
 def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, last_word_is_ew):
     """Fold string to_encode into lines as encoded word, combining if allowed.
diff --git a/Lib/email/errors.py b/Lib/email/errors.py
index e5601132d024fe..6bc744bd59c5bb 100644
--- a/Lib/email/errors.py
+++ b/Lib/email/errors.py
@@ -33,10 +33,6 @@ class HeaderWriteError(MessageError):
     """Error while writing headers."""
 
 
-class InvalidMailboxError(MessageError, ValueError):
-    """A mailbox was not compatible with the policy in use."""
-
-
 # These are parsing defects which the parser was able to work around.
 class MessageDefect(ValueError):
     """Base class for a message defect."""
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index e28fe3892015b9..f8f5c41b4474c8 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -3364,10 +3364,12 @@ def test_fold_unfoldable_element_stealing_whitespace(self):
         self._test(token, expected, policy=policy)
 
     def test_encoded_word_with_undecodable_bytes(self):
-        self._test(parser.get_address_list(
-            ' =?utf-8?Q?=E5=AE=A2=E6=88=B6=E6=AD=A3=E8=A6=8F=E4=BA=A4=E7?='
+        self._test(
+            parser.get_address_list(
+                ' =?utf-8?Q?=E5=AE=A2=E6=88=B6=E6=AD=A3=E8=A6=8F=E4=BA=A4=E7?='
+                ' <xyz@abc.com>'
                 )[0],
-            ' =?unknown-8bit?b?5a6i5oi25q2j6KaP5Lqk5w==?=\n',
+            ' =?unknown-8bit?b?5a6i5oi25q2j6KaP5Lqk5w==?= <xyz@abc.com>\n',
             )
 
 
diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py
index fb9d9be3e20df0..f34e1f214a362b 100644
--- a/Lib/test/test_email/test_generator.py
+++ b/Lib/test/test_email/test_generator.py
@@ -296,30 +296,43 @@ def test_keep_long_encoded_newlines(self):
         g.flatten(msg)
         self.assertEqual(s.getvalue(), self.typ(expected))
 
-    # XXX renable after fix.
-    def xest_non_ascii_addr_spec_raises(self):
-        # RFC2047 encoded-word is not permitted in any part of an addr-spec.
-        # (See also test_non_ascii_addr_spec_preserved below.)
+    def test_non_ascii_addr_spec_raises(self):
+        # non-ascii is not permitted in any part of an addr-spec.  If the
+        # programmer generated it, it's an error.  (See also
+        # test_non_ascii_addr_spec_preserved below.)
         g = self.genclass(self.ioclass(), policy=self.policy.clone(utf8=False))
+        # XXX The particular part detected here isn't part of a behavioral
+        # spec and may change in the future.
         cases = [
-            'wők@example.com',
-            'wok@exàmple.com',
-            'wők@exàmple.com',
-            '"Name, for display" <wők@example.com>',
-            'Näyttönimi <wők@example.com>',
+            ('wők@example.com', 'wők', 'local-part'),
+            ('wok@exàmple.com', 'exàmple.com', 'domain'),
+            ('wők@exàmple.com', 'wők', 'local-part'),
+            (
+                '"Name, for display" <wők@example.com>',
+                'wők@example.com',
+                'addr-spec',
+                ),
+            (
+                'Näyttönimi <wők@example.com>',
+                'wők@example.com',
+                'addr-spec',
+                ),
         ]
-        for address in cases:
+        for address, badtoken, partname in cases:
             with self.subTest(address=address):
                 msg = EmailMessage()
                 msg['To'] = address
-                addr_spec = msg['To'].addresses[0].addr_spec
                 expected_error = (
-                    fr"(?i)(?=.*non-ascii)(?=.*{re.escape(addr_spec)})(?=.*policy.*utf8)"
+                    fr"(?i)(?=.*non-ascii)"
+                    fr"(?=.*{re.escape(badtoken)})"
+                    fr"(?=.*{partname})"
+                    fr"(?=.*policy.*utf8)"
                 )
                 with self.assertRaisesRegex(
-                    email.errors.InvalidMailboxError, expected_error
+                    email.errors.HeaderWriteError, expected_error
                 ):
                     g.flatten(msg)
+
     def _test_boundary_detection(self, linesep):
         # Generate a boundary token in the same way as _make_boundary
         token = random.randrange(sys.maxsize)
@@ -580,8 +593,7 @@ def test_smtp_policy(self):
         g.flatten(msg)
         self.assertEqual(s.getvalue(), expected)
 
-    # XXX renable after fix.
-    def xest_non_ascii_addr_spec_preserved(self):
+    def test_non_ascii_addr_spec_preserved(self):
         # A defective non-ASCII addr-spec parsed from the original
         # message is left unchanged when flattening.
         # (See also test_non_ascii_addr_spec_raises above.)
diff --git a/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst b/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst
index d4b07bf06b9236..7082c72f685b05 100644
--- a/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst
+++ b/Misc/NEWS.d/next/Library/2024-07-31-17-22-10.gh-issue-83938.TtUa-c.rst
@@ -1,7 +1,7 @@
 The :mod:`email` module no longer incorrectly uses :rfc:`2047` encoding for
 a mailbox with non-ASCII characters in its domain. Under a policy with
 :attr:`~email.policy.EmailPolicy.utf8` set ``False``, attempting to serialize
-such a message will now raise an :exc:`~email.errors.InvalidMailboxError`.
+such a message will now raise an :exc:`~email.errors.HeaderWriteError`.
 Either apply an appropriate IDNA encoding to convert the domain to ASCII before
 serialization, or use :data:`email.policy.SMTPUTF8` (or another policy with
 ``utf8=True``) to correctly pass through the internationalized domain name
diff --git a/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst b/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst
index 6fca53c1b2a409..29c076d3a746c6 100644
--- a/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst
+++ b/Misc/NEWS.d/next/Library/2024-07-31-17-23-06.gh-issue-122476.TtUa-c.rst
@@ -1,7 +1,7 @@
 The :mod:`email` module no longer incorrectly uses :rfc:`2047` encoding for
 a mailbox with non-ASCII characters in its local-part. Under a policy with
 :attr:`~email.policy.EmailPolicy.utf8` set ``False``, attempting to serialize
-such a message will now raise an :exc:`~email.errors.InvalidMailboxError`.
+such a message will now raise an :exc:`~email.errors.HeaderWriteError`.
 There is no valid 7-bit encoding for an internationalized local-part. Use
 :data:`email.policy.SMTPUTF8` (or another policy with ``utf8=True``) to
 correctly pass through the local-part as Unicode characters.

From 0a259f4800c80db4d72f4a30ce5f266e87d4866c Mon Sep 17 00:00:00 2001
From: R David Murray <rdmurray@bitdance.com>
Date: Sun, 26 Apr 2026 15:17:23 -0400
Subject: [PATCH 10/10] Add some tests where the local part is folded.

Behavior when folding in parts versus rendering on one line takes
different code paths, so make sure both work.
---
 Lib/test/test_email/test_generator.py | 28 ++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py
index f34e1f214a362b..8d912738029f78 100644
--- a/Lib/test/test_email/test_generator.py
+++ b/Lib/test/test_email/test_generator.py
@@ -300,7 +300,8 @@ def test_non_ascii_addr_spec_raises(self):
         # non-ascii is not permitted in any part of an addr-spec.  If the
         # programmer generated it, it's an error.  (See also
         # test_non_ascii_addr_spec_preserved below.)
-        g = self.genclass(self.ioclass(), policy=self.policy.clone(utf8=False))
+        p = self.policy.clone(utf8=False, max_line_length=20)
+        g = self.genclass(self.ioclass(), policy=p)
         # XXX The particular part detected here isn't part of a behavioral
         # spec and may change in the future.
         cases = [
@@ -317,6 +318,12 @@ def test_non_ascii_addr_spec_raises(self):
                 'wők@example.com',
                 'addr-spec',
                 ),
+            (
+                '"a lőng quoted string as the local part"@example.com',
+                'a lőng quoted string as the local part',
+                'local-part',
+                ),
+
         ]
         for address, badtoken, partname in cases:
             with self.subTest(address=address):
@@ -333,6 +340,25 @@ def test_non_ascii_addr_spec_raises(self):
                 ):
                     g.flatten(msg)
 
+    def test_local_part_quoted_string_wrapped_correctly(self):
+        msg = self.msgmaker(self.typ(textwrap.dedent("""\
+            To: <"a long local part in a quoted string"@example.com>
+            Subject: test
+
+            None
+            """)), policy=self.policy.clone(max_line_length=20))
+        expected = textwrap.dedent("""\
+            To: <"a long local part in a
+             quoted string"@example.com>
+            Subject: test
+
+            None
+            """)
+        s = self.ioclass()
+        g = self.genclass(s, policy=self.policy.clone(max_line_length=30))
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), self.typ(expected))
+
     def _test_boundary_detection(self, linesep):
         # Generate a boundary token in the same way as _make_boundary
         token = random.randrange(sys.maxsize)