Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 00ae435

Browse files
committed
#18324: set_payload now correctly handles binary input.
This also backs out the previous fixes for for #14360, #1717, and #16564. Those bugs were actually caused by the fact that set_payload didn't decode to str, thus rendering the model inconsistent. This fix does mean the data processed by the encoder functions goes through an extra encode/decode cycle, but it means the model is always consistent. Future API updates will provide a better way to encode payloads, which will bypass this minor de-optimization. Tests by Vajrasky Kok.
1 parent 0b16912 commit 00ae435

4 files changed

Lines changed: 45 additions & 16 deletions

File tree

Lib/email/encoders.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def encode_base64(msg):
2828
2929
Also, add an appropriate Content-Transfer-Encoding header.
3030
"""
31-
orig = msg.get_payload()
31+
orig = msg.get_payload(decode=True)
3232
encdata = str(_bencode(orig), 'ascii')
3333
msg.set_payload(encdata)
3434
msg['Content-Transfer-Encoding'] = 'base64'
@@ -40,20 +40,16 @@ def encode_quopri(msg):
4040
4141
Also, add an appropriate Content-Transfer-Encoding header.
4242
"""
43-
orig = msg.get_payload()
44-
if isinstance(orig, str):
45-
# If it is a string, the model data may have binary data encoded in via
46-
# surrogateescape. Convert back to bytes so we can CTE encode it.
47-
orig = orig.encode('ascii', 'surrogateescape')
43+
orig = msg.get_payload(decode=True)
4844
encdata = _qencode(orig)
49-
msg.set_payload(encdata.decode('ascii', 'surrogateescape'))
45+
msg.set_payload(encdata)
5046
msg['Content-Transfer-Encoding'] = 'quoted-printable'
5147

5248

5349

5450
def encode_7or8bit(msg):
5551
"""Set the Content-Transfer-Encoding header to 7bit or 8bit."""
56-
orig = msg.get_payload()
52+
orig = msg.get_payload(decode=True)
5753
if orig is None:
5854
# There's no payload. For backwards compatibility we use 7bit
5955
msg['Content-Transfer-Encoding'] = '7bit'
@@ -75,16 +71,8 @@ def encode_7or8bit(msg):
7571
msg['Content-Transfer-Encoding'] = '8bit'
7672
else:
7773
msg['Content-Transfer-Encoding'] = '7bit'
78-
if not isinstance(orig, str):
79-
msg.set_payload(orig.decode('ascii', 'surrogateescape'))
8074

8175

8276

8377
def encode_noop(msg):
8478
"""Do nothing."""
85-
# Well, not quite *nothing*: in Python3 we have to turn bytes into a string
86-
# in our internal surrogateescaped form in order to keep the model
87-
# consistent.
88-
orig = msg.get_payload()
89-
if not isinstance(orig, str):
90-
msg.set_payload(orig.decode('ascii', 'surrogateescape'))

Lib/email/message.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,8 @@ def set_payload(self, payload, charset=None):
275275
Optional charset sets the message's default character set. See
276276
set_charset() for details.
277277
"""
278+
if isinstance(payload, bytes):
279+
payload = payload.decode('ascii', 'surrogateescape')
278280
self._payload = payload
279281
if charset is not None:
280282
self.set_charset(charset)

Lib/test/test_email/test_email.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,42 @@ def test_nonascii_add_header_with_tspecial(self):
593593
"attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
594594
msg['Content-Disposition'])
595595

596+
def test_binary_quopri_payload(self):
597+
for charset in ('latin-1', 'ascii'):
598+
msg = Message()
599+
msg['content-type'] = 'text/plain; charset=%s' % charset
600+
msg['content-transfer-encoding'] = 'quoted-printable'
601+
msg.set_payload(b'foo=e6=96=87bar')
602+
self.assertEqual(
603+
msg.get_payload(decode=True),
604+
b'foo\xe6\x96\x87bar',
605+
'get_payload returns wrong result with charset %s.' % charset)
606+
607+
def test_binary_base64_payload(self):
608+
for charset in ('latin-1', 'ascii'):
609+
msg = Message()
610+
msg['content-type'] = 'text/plain; charset=%s' % charset
611+
msg['content-transfer-encoding'] = 'base64'
612+
msg.set_payload(b'Zm9v5paHYmFy')
613+
self.assertEqual(
614+
msg.get_payload(decode=True),
615+
b'foo\xe6\x96\x87bar',
616+
'get_payload returns wrong result with charset %s.' % charset)
617+
618+
def test_binary_uuencode_payload(self):
619+
for charset in ('latin-1', 'ascii'):
620+
for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
621+
msg = Message()
622+
msg['content-type'] = 'text/plain; charset=%s' % charset
623+
msg['content-transfer-encoding'] = encoding
624+
msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
625+
self.assertEqual(
626+
msg.get_payload(decode=True),
627+
b'foo\xe6\x96\x87bar',
628+
str(('get_payload returns wrong result ',
629+
'with charset {0} and encoding {1}.')).\
630+
format(charset, encoding))
631+
596632
def test_add_header_with_name_only_param(self):
597633
msg = Message()
598634
msg.add_header('Content-Disposition', 'inline', foo_bar=None)

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ Core and Builtins
6666
Library
6767
-------
6868

69+
- Issue #18324: set_payload now correctly handles binary input. This also
70+
supersedes the previous fixes for #14360, #1717, and #16564.
71+
6972
- Issue #17119: Fixed integer overflows when processing large strings and tuples
7073
in the tkinter module.
7174

0 commit comments

Comments
 (0)