diff --git a/Lib/email/header.py b/Lib/email/header.py index 4ab0032bc66123..76987daef6d5b2 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -61,7 +61,7 @@ def decode_header(header): """Decode a message header value without converting charset. - Returns a list of (string, charset) pairs containing each of the decoded + Returns a list of (bytes, charset) pairs containing each of the decoded parts of the header. Charset is None for non-encoded parts of the header, otherwise a lower-case string containing the name of the character set specified in the encoded string. @@ -78,7 +78,7 @@ def decode_header(header): for string, charset in header._chunks] # If no encoding, just return the header with no charset. if not ecre.search(header): - return [(header, None)] + return [bytes(header, 'raw-unicode-escape'), None)] # First step is to parse all the encoded parts into triplets of the form # (encoded_string, encoding, charset). For unencoded strings, the last # two parts will be None. diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index a3ccbbbabfb328..d89bd87aaf118f 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -2432,6 +2432,18 @@ def test_multiline_header(self): self.assertEqual(str(make_header(decode_header(s))), '"Müller T" ') + def test_unencoded_ascii(self): + # issue 22833 + s = 'header without encoded words' + self.assertEqual(decode_header(s), + [(b'header without encoded words', None)]) + + def test_unencoded_utf8(self): + # issue 22833 + s = 'header with unexpected non ASCII caract\xe8res' + self.assertEqual(decode_header(s), + [(b'header with unexpected non ASCII caract\xe8res', None)]) + # Test the MIMEMessage class class TestMIMEMessage(TestEmailBase): diff --git a/Misc/NEWS.d/next/Library/2022-01-11-21-40-14.bpo-22833.WB-JWw.rst b/Misc/NEWS.d/next/Library/2022-01-11-21-40-14.bpo-22833.WB-JWw.rst new file mode 100644 index 00000000000000..5ca8dc7da62399 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-01-11-21-40-14.bpo-22833.WB-JWw.rst @@ -0,0 +1,3 @@ +The :func:`email.header.decode_header` function now always provides :class:`bytes`, +never :class:`str`, as the first member of the tuples it returns. Previously, it would +return (str, None) when decoding a header consisting only of a single, unencoded part. \ No newline at end of file