Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e292283

Browse files
committed
Merge #14291: if a header has non-ascii unicode, default to CTE using utf-8
In Python2, if a unicode string was assigned as the value of a header, email would automatically CTE encode it using the UTF8 charset. This capability was lost in the Python3 translation, and this patch restores it. Patch by Ali Ikinci, assisted by R. David Murray. I also added a fix for the mailbox test that was depending (with a comment that it was a bad idea to so depend) on non-ASCII causing message_from_string to raise an error. It now uses support.patch to induce an error during message serialization.
2 parents b20a019 + 7441a7a commit e292283

5 files changed

Lines changed: 33 additions & 7 deletions

File tree

Lib/email/header.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,12 @@ def append(self, s, charset=None, errors='strict'):
283283
# character set, otherwise an early error is thrown.
284284
output_charset = charset.output_codec or 'us-ascii'
285285
if output_charset != _charset.UNKNOWN8BIT:
286-
s.encode(output_charset, errors)
286+
try:
287+
s.encode(output_charset, errors)
288+
except UnicodeEncodeError:
289+
if output_charset!='us-ascii':
290+
raise
291+
charset = UTF8
287292
self._chunks.append((s, charset))
288293

289294
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):

Lib/test/test_email/test_email.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,19 @@ def test_embeded_header_via_string_rejected(self):
604604
msg['Dummy'] = 'dummy\nX-Injected-Header: test'
605605
self.assertRaises(errors.HeaderParseError, msg.as_string)
606606

607+
def test_unicode_header_defaults_to_utf8_encoding(self):
608+
# Issue 14291
609+
m = MIMEText('abc\n')
610+
m['Subject'] = 'É test'
611+
self.assertEqual(str(m),textwrap.dedent("""\
612+
Content-Type: text/plain; charset="us-ascii"
613+
MIME-Version: 1.0
614+
Content-Transfer-Encoding: 7bit
615+
Subject: =?utf-8?q?=C3=89_test?=
616+
617+
abc
618+
"""))
619+
607620
# Test the email.encoders module
608621
class TestEncoders(unittest.TestCase):
609622

@@ -1045,9 +1058,13 @@ def test_long_8bit_header_no_charset(self):
10451058
'f\xfcr Offshore-Windkraftprojekte '
10461059
10471060
msg['Reply-To'] = header_string
1048-
self.assertRaises(UnicodeEncodeError, msg.as_string)
1061+
eq(msg.as_string(maxheaderlen=78), """\
1062+
Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1063+
=?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1064+
1065+
""")
10491066
msg = Message()
1050-
msg['Reply-To'] = Header(header_string, 'utf-8',
1067+
msg['Reply-To'] = Header(header_string,
10511068
header_name='Reply-To')
10521069
eq(msg.as_string(maxheaderlen=78), """\
10531070
Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=

Lib/test/test_mailbox.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,10 @@ def test_add_nonascii_string_header_raises(self):
111111
self.assertMailboxEmpty()
112112

113113
def test_add_that_raises_leaves_mailbox_empty(self):
114-
# XXX This test will start failing when Message learns to handle
115-
# non-ASCII string headers, and a different internal failure will
116-
# need to be found or manufactured.
117-
with self.assertRaises(ValueError):
114+
def raiser(*args, **kw):
115+
raise Exception("a fake error")
116+
support.patch(self, email.generator.BytesGenerator, 'flatten', raiser)
117+
with self.assertRaises(Exception):
118118
self._box.add(email.message_from_string("From: Alphöso"))
119119
self.assertEqual(len(self._box), 0)
120120
self._box.close()

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@ Gerhard Häring
470470
Fredrik Håård
471471
Catalin Iacob
472472
Mihai Ibanescu
473+
Ali Ikinci
473474
Lars Immisch
474475
Bobby Impollonia
475476
Meador Inge

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ Core and Builtins
2424
Library
2525
-------
2626

27+
- Issue #14291: Email now defaults to utf-8 for non-ASCII unicode headers
28+
instead of raising an error. This fixes a regression relative to 2.7.
29+
2730
- Issue #989712: Support using Tk without a mainloop.
2831

2932
- Issue #5219: Prevent event handler cascade in IDLE.

0 commit comments

Comments
 (0)