Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d5c4c74

Browse files
committed
#19063: partially fix set_payload handling of non-ASCII string input.
This is a backward compatible partial fix, the complete fix requires raising an error instead of accepting the invalid input, so the real fix is only suitable for 3.4.
1 parent 31a6554 commit d5c4c74

4 files changed

Lines changed: 63 additions & 11 deletions

File tree

Lib/email/charset.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,8 @@ def body_encode(self, string):
386386
string using the ascii codec produces the correct string version
387387
of the content.
388388
"""
389-
# 7bit/8bit encodings return the string unchanged (module conversions)
389+
if not string:
390+
return string
390391
if self.body_encoding is BASE64:
391392
if isinstance(string, str):
392393
string = string.encode(self.output_charset)
@@ -398,13 +399,9 @@ def body_encode(self, string):
398399
# character set, then, we must turn it into pseudo bytes via the
399400
# latin1 charset, which will encode any byte as a single code point
400401
# between 0 and 255, which is what body_encode is expecting.
401-
#
402-
# Note that this clause doesn't handle the case of a _payload that
403-
# is already bytes. It never did, and the semantics of _payload
404-
# being bytes has never been nailed down, so fixing that is a
405-
# longer term TODO.
406402
if isinstance(string, str):
407-
string = string.encode(self.output_charset).decode('latin1')
403+
string = string.encode(self.output_charset)
404+
string = string.decode('latin1')
408405
return email.quoprimime.body_encode(string)
409406
else:
410407
if isinstance(string, str):

Lib/email/message.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -275,9 +275,19 @@ def set_payload(self, payload, charset=None):
275275
Optional charset sets the message's default character set. See
276276
set_charset() for details.
277277
"""
278-
if isinstance(payload, bytes):
279-
payload = payload.decode('ascii', 'surrogateescape')
280-
self._payload = payload
278+
if hasattr(payload, 'encode'):
279+
if charset is None:
280+
# We should check for ASCII-only here, but we can't do that
281+
# for backward compatibility reasons. Fixed in 3.4.
282+
self._payload = payload
283+
return
284+
if not isinstance(charset, Charset):
285+
charset = Charset(charset)
286+
payload = payload.encode(charset.output_charset)
287+
if hasattr(payload, 'decode'):
288+
self._payload = payload.decode('ascii', 'surrogateescape')
289+
else:
290+
self._payload = payload
281291
if charset is not None:
282292
self.set_charset(charset)
283293

@@ -316,7 +326,15 @@ def set_charset(self, charset):
316326
try:
317327
cte(self)
318328
except TypeError:
319-
self._payload = charset.body_encode(self._payload)
329+
# This if is for backward compatibility and will be removed
330+
# in 3.4 when the ascii check is added to set_payload.
331+
payload = self._payload
332+
if payload:
333+
try:
334+
payload = payload.encode('ascii', 'surrogateescape')
335+
except UnicodeError:
336+
payload = payload.encode(charset.output_charset)
337+
self._payload = charset.body_encode(payload)
320338
self.add_header('Content-Transfer-Encoding', cte)
321339

322340
def get_charset(self):

Lib/test/test_email/test_email.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,38 @@ def test_set_payload_with_charset(self):
9292
msg.set_payload('This is a string payload', charset)
9393
self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
9494

95+
def test_set_payload_with_8bit_data_and_charset(self):
96+
data = b'\xd0\x90\xd0\x91\xd0\x92'
97+
charset = Charset('utf-8')
98+
msg = Message()
99+
msg.set_payload(data, charset)
100+
self.assertEqual(msg['content-transfer-encoding'], 'base64')
101+
self.assertEqual(msg.get_payload(decode=True), data)
102+
self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
103+
104+
def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
105+
data = b'\xd0\x90\xd0\x91\xd0\x92'
106+
charset = Charset('utf-8')
107+
charset.body_encoding = None # Disable base64 encoding
108+
msg = Message()
109+
msg.set_payload(data.decode('utf-8'), charset)
110+
self.assertEqual(msg['content-transfer-encoding'], '8bit')
111+
self.assertEqual(msg.get_payload(decode=True), data)
112+
113+
def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
114+
data = b'\xd0\x90\xd0\x91\xd0\x92'
115+
charset = Charset('utf-8')
116+
charset.body_encoding = None # Disable base64 encoding
117+
msg = Message()
118+
msg.set_payload(data, charset)
119+
self.assertEqual(msg['content-transfer-encoding'], '8bit')
120+
self.assertEqual(msg.get_payload(decode=True), data)
121+
122+
def test_set_payload_to_list(self):
123+
msg = Message()
124+
msg.set_payload([])
125+
self.assertEqual(msg.get_payload(), [])
126+
95127
def test_get_charsets(self):
96128
eq = self.assertEqual
97129

Misc/NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ Core and Builtins
2323
Library
2424
-------
2525

26+
- Issue #19063: if a Charset's body_encoding was set to None, the email
27+
package would generate a message claiming the Content-Transfer-Encoding
28+
was 7bit, and produce garbage output for the content. This now works.
29+
A couple of other set_payload mishandlings of non-ASCII are also fixed.
30+
2631
- Issue #17200: telnetlib's read_until and expect timeout was broken by the
2732
fix to Issue #14635 in Python 3.3.0 to be interpreted as milliseconds
2833
instead of seconds when the platform supports select.poll (ie: everywhere).

0 commit comments

Comments
 (0)