From c98f6b9d0f81e0c828627afa8f21b487d731e5d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20P=C3=A1ssaro?= Date: Fri, 6 Mar 2026 12:30:31 -0300 Subject: [PATCH 1/6] Refactor CVE-2023-27043 patch to support Unicode characters --- Lib/email/test/test_email.py | 16 ++++++++++++++++ Lib/email/test/test_email_renamed.py | 16 ++++++++++++++++ Lib/email/utils.py | 7 ++++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index 801b31cc05985c..fa84ca8cc426a1 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -2425,6 +2425,22 @@ def test_getaddresses_nasty(self): eq(Utils.getaddresses( ['foo: ;', '"Jason R. Mastaler" ']), [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) + + def test_getaddresses_nasty_unicode(self): + """Test parseaddr with unicode strings in Python 2""" + + test_cases = [ + u'user@example.com', + u'Test User ', + u'"Test User" ', + ] + + for addr in test_cases: + result = Utils.parseaddr(addr, strict=True) + self.assertNotEqual(result, ('', '')) + + result_non_strict = Utils.parseaddr(addr, strict=False) + self.assertEqual(result, result_non_strict) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" diff --git a/Lib/email/test/test_email_renamed.py b/Lib/email/test/test_email_renamed.py index e3c9af9f7e2be1..2fe72d7be8a132 100644 --- a/Lib/email/test/test_email_renamed.py +++ b/Lib/email/test/test_email_renamed.py @@ -2286,6 +2286,22 @@ def test_getaddresses_nasty(self): eq(utils.getaddresses( ['foo: ;', '"Jason R. Mastaler" ']), [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) + + def test_getaddresses_nasty_unicode(self): + """Test parseaddr with unicode strings in Python 2""" + + test_cases = [ + u'user@example.com', + u'Test User ', + u'"Test User" ', + ] + + for addr in test_cases: + result = utils.parseaddr(addr, strict=True) + self.assertNotEqual(result, ('', '')) + + result_non_strict = utils.parseaddr(addr, strict=False) + self.assertEqual(result, result_non_strict) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 56578ba800abb6..26c654a6601004 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -339,9 +339,14 @@ def parseaddr(addr, strict=True): if isinstance(addr, list): addr = addr[0] - if not isinstance(addr, str): + # FIX: Support both str and unicode in Python 2 + if not isinstance(addr, (str, unicode)): # Python 2 compatible return ('', '') + # Convert unicode to str for consistent processing + if isinstance(addr, unicode): + addr = addr.encode('utf-8') + addr = _pre_parse_validation([addr])[0] addrs = _post_parse_validation(_AddressList(addr).addresslist) From 4d5392639a095965f92899f8c794d762236d63b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20P=C3=A1ssaro?= Date: Fri, 6 Mar 2026 12:34:45 -0300 Subject: [PATCH 2/6] remove comment --- Lib/email/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 26c654a6601004..5416f0320ef12d 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -340,7 +340,7 @@ def parseaddr(addr, strict=True): addr = addr[0] # FIX: Support both str and unicode in Python 2 - if not isinstance(addr, (str, unicode)): # Python 2 compatible + if not isinstance(addr, (str, unicode)): return ('', '') # Convert unicode to str for consistent processing From cfe4763917949cf0e4791ab8fab2d55219396ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20P=C3=A1ssaro?= Date: Fri, 6 Mar 2026 13:14:13 -0300 Subject: [PATCH 3/6] 2.7.18.13 Release --- Include/patchlevel.h | 2 +- Misc/NEWS.d/2.7.18.13.rst | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/2.7.18.13.rst diff --git a/Include/patchlevel.h b/Include/patchlevel.h index 95c1ac9f755477..4b830a2abea6a3 100644 --- a/Include/patchlevel.h +++ b/Include/patchlevel.h @@ -27,7 +27,7 @@ #define PY_RELEASE_SERIAL 0 /* Version as a string */ -#define PY_VERSION "2.7.18.12" +#define PY_VERSION "2.7.18.13" /*--end constants--*/ /* Subversion Revision number of this file (not of the repository). Empty diff --git a/Misc/NEWS.d/2.7.18.13.rst b/Misc/NEWS.d/2.7.18.13.rst new file mode 100644 index 00000000000000..df28689cd5ff1a --- /dev/null +++ b/Misc/NEWS.d/2.7.18.13.rst @@ -0,0 +1,7 @@ +.. bpo: ? +.. date: 2026-03-06 +.. nonce: +.. release date: 2026-03-06 +.. section: Core and Builtins + +Refactor CVE-2023-27043 patch to support Unicode characters From 24ac66a09505e50e4c7f0c0e61b62eef6f20d51d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20P=C3=A1ssaro?= Date: Fri, 13 Mar 2026 18:00:48 -0300 Subject: [PATCH 4/6] Add better tests for CVE 2023-27043 --- Lib/email/test/test_email.py | 46 ++++++++++++++++---------- Lib/email/test/test_email_renamed.py | 48 ++++++++++++++++++---------- 2 files changed, 61 insertions(+), 33 deletions(-) diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index fa84ca8cc426a1..f143689a937c1b 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -2320,6 +2320,22 @@ def test_parseaddr_multiple_domains(self): ('', '') ) + def test_parseaddr_unicode(self): + """Test parseaddr with unicode strings""" + + test_cases = [ + u'user@example.com', + u'Test User ', + u'"Test User" ', + ] + + for addr in test_cases: + result = Utils.parseaddr(addr, strict=True) + self.assertNotEqual(result, ('', '')) + + result_non_strict = Utils.parseaddr(addr, strict=False) + self.assertEqual(result, result_non_strict) + def test_noquote_dump(self): self.assertEqual( Utils.formataddr(('A Silly Person', 'person@dom.ain')), @@ -2425,22 +2441,6 @@ def test_getaddresses_nasty(self): eq(Utils.getaddresses( ['foo: ;', '"Jason R. Mastaler" ']), [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) - - def test_getaddresses_nasty_unicode(self): - """Test parseaddr with unicode strings in Python 2""" - - test_cases = [ - u'user@example.com', - u'Test User ', - u'"Test User" ', - ] - - for addr in test_cases: - result = Utils.parseaddr(addr, strict=True) - self.assertNotEqual(result, ('', '')) - - result_non_strict = Utils.parseaddr(addr, strict=False) - self.assertEqual(result, result_non_strict) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" @@ -2448,6 +2448,20 @@ def test_getaddresses_embedded_comment(self): addrs = Utils.getaddresses(['User ((nested comment)) ']) eq(addrs[0][1], 'foo@bar.com') + def test_getaddresses_unicode(self): + """Test getaddresses with unicode strings in Python 2""" + + test_cases = [ + ([u'user@example.com'], [('', 'user@example.com')]), + ([u'Test User '], [('Test User', 'user@example.com')]), + ([u'"Test User" '], [('Test User', 'user@example.com')]), + ([u'user1@example.com', u'user2@example.com'], [('', 'user1@example.com'), ('', 'user2@example.com')]), + ] + + for addrs, expected in test_cases: + result = Utils.getaddresses(addrs) + self.assertEqual(result, expected) + def test_make_msgid_collisions(self): # Test make_msgid uniqueness, even with multiple threads class MsgidsThread(Thread): diff --git a/Lib/email/test/test_email_renamed.py b/Lib/email/test/test_email_renamed.py index 2fe72d7be8a132..9778f863daa13f 100644 --- a/Lib/email/test/test_email_renamed.py +++ b/Lib/email/test/test_email_renamed.py @@ -2199,6 +2199,22 @@ def test_parseaddr_empty(self): self.assertEqual(utils.parseaddr('<>'), ('', '')) self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '') + def test_parseaddr_unicode(self): + """Test parseaddr with unicode strings""" + + test_cases = [ + u'user@example.com', + u'Test User ', + u'"Test User" ', + ] + + for addr in test_cases: + result = utils.parseaddr(addr, strict=True) + self.assertNotEqual(result, ('', '')) + + result_non_strict = utils.parseaddr(addr, strict=False) + self.assertEqual(result, result_non_strict) + def test_noquote_dump(self): self.assertEqual( utils.formataddr(('A Silly Person', 'person@dom.ain')), @@ -2286,22 +2302,6 @@ def test_getaddresses_nasty(self): eq(utils.getaddresses( ['foo: ;', '"Jason R. Mastaler" ']), [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) - - def test_getaddresses_nasty_unicode(self): - """Test parseaddr with unicode strings in Python 2""" - - test_cases = [ - u'user@example.com', - u'Test User ', - u'"Test User" ', - ] - - for addr in test_cases: - result = utils.parseaddr(addr, strict=True) - self.assertNotEqual(result, ('', '')) - - result_non_strict = utils.parseaddr(addr, strict=False) - self.assertEqual(result, result_non_strict) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" @@ -2309,7 +2309,21 @@ def test_getaddresses_embedded_comment(self): addrs = utils.getaddresses(['User ((nested comment)) ']) eq(addrs[0][1], 'foo@bar.com') - def test_utils_quote_unquote(self): + def test_getaddresses_unicode(self): + """Test getaddresses with unicode strings in Python 2""" + + test_cases = [ + ([u'user@example.com'], [('', 'user@example.com')]), + ([u'Test User '], [('Test User', 'user@example.com')]), + ([u'"Test User" '], [('Test User', 'user@example.com')]), + ([u'user1@example.com', u'user2@example.com'], [('', 'user1@example.com'), ('', 'user2@example.com')]), + ] + + for addrs, expected in test_cases: + result = utils.getaddresses(addrs) + self.assertEqual(result, expected) + + def test__quote_unquote(self): eq = self.assertEqual msg = Message() msg.add_header('content-disposition', 'attachment', From 6f55a242140cd438e8b024cc03de5cec27b05c45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20P=C3=A1ssaro?= Date: Fri, 13 Mar 2026 18:54:42 -0300 Subject: [PATCH 5/6] Patch `getaddresses` to support unicode strings --- Lib/email/utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 5416f0320ef12d..d9094fbc967155 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -162,7 +162,15 @@ def getaddresses(fieldvalues, strict=True): a = _AddressList(all) return a.addresslist - fieldvalues = [str(v) for v in fieldvalues] + converted_values = [] + for v in fieldvalues: + if isinstance(v, unicode): + v = v.encode('utf-8') + elif not isinstance(v, str): + v = str(v) + converted_values.append(v) + + fieldvalues = converted_values fieldvalues = _pre_parse_validation(fieldvalues) addr = COMMASPACE.join(fieldvalues) a = _AddressList(addr) From 3df688e96f258a1e89e019e1890e6ce32eae79e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20P=C3=A1ssaro?= Date: Wed, 18 Mar 2026 12:21:38 -0300 Subject: [PATCH 6/6] Make parseaddr and getaddresses return Unicode when given Unicode input --- Lib/email/test/test_email.py | 31 ++++++++++------- Lib/email/test/test_email_renamed.py | 31 ++++++++++------- Lib/email/utils.py | 50 +++++++++++++++++++++++----- 3 files changed, 80 insertions(+), 32 deletions(-) diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index f143689a937c1b..024caa06bb375b 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -2322,16 +2322,19 @@ def test_parseaddr_multiple_domains(self): def test_parseaddr_unicode(self): """Test parseaddr with unicode strings""" - test_cases = [ - u'user@example.com', - u'Test User ', - u'"Test User" ', + (u'user@example.com', ('', u'user@example.com')), + (u'Test User ', (u'Test User', u'user@example.com')), + (u'"Test User" ', (u'Test User', u'user@example.com')), ] - for addr in test_cases: + for addr, expected in test_cases: result = Utils.parseaddr(addr, strict=True) - self.assertNotEqual(result, ('', '')) + self.assertEqual(result, expected) + if result[0]: + self.assertIsInstance(result[0], unicode) + if result[1]: + self.assertIsInstance(result[1], unicode) result_non_strict = Utils.parseaddr(addr, strict=False) self.assertEqual(result, result_non_strict) @@ -2449,18 +2452,22 @@ def test_getaddresses_embedded_comment(self): eq(addrs[0][1], 'foo@bar.com') def test_getaddresses_unicode(self): - """Test getaddresses with unicode strings in Python 2""" - + """Test getaddresses with unicode strings""" test_cases = [ - ([u'user@example.com'], [('', 'user@example.com')]), - ([u'Test User '], [('Test User', 'user@example.com')]), - ([u'"Test User" '], [('Test User', 'user@example.com')]), - ([u'user1@example.com', u'user2@example.com'], [('', 'user1@example.com'), ('', 'user2@example.com')]), + ([u'user@example.com'], [('', u'user@example.com')]), + ([u'Test User '], [(u'Test User', u'user@example.com')]), + ([u'"Test User" '], [(u'Test User', u'user@example.com')]), + ([u'user1@example.com', u'user2@example.com'], [('', u'user1@example.com'), ('', u'user2@example.com')]), ] for addrs, expected in test_cases: result = Utils.getaddresses(addrs) self.assertEqual(result, expected) + for realname, email in result: + if realname: + self.assertIsInstance(realname, unicode) + if email: + self.assertIsInstance(email, unicode) def test_make_msgid_collisions(self): # Test make_msgid uniqueness, even with multiple threads diff --git a/Lib/email/test/test_email_renamed.py b/Lib/email/test/test_email_renamed.py index 9778f863daa13f..f8d6d930c8afb3 100644 --- a/Lib/email/test/test_email_renamed.py +++ b/Lib/email/test/test_email_renamed.py @@ -2201,16 +2201,19 @@ def test_parseaddr_empty(self): def test_parseaddr_unicode(self): """Test parseaddr with unicode strings""" - test_cases = [ - u'user@example.com', - u'Test User ', - u'"Test User" ', + (u'user@example.com', ('', u'user@example.com')), + (u'Test User ', (u'Test User', u'user@example.com')), + (u'"Test User" ', (u'Test User', u'user@example.com')), ] - for addr in test_cases: + for addr, expected in test_cases: result = utils.parseaddr(addr, strict=True) - self.assertNotEqual(result, ('', '')) + self.assertEqual(result, expected) + if result[0]: + self.assertIsInstance(result[0], unicode) + if result[1]: + self.assertIsInstance(result[1], unicode) result_non_strict = utils.parseaddr(addr, strict=False) self.assertEqual(result, result_non_strict) @@ -2310,18 +2313,22 @@ def test_getaddresses_embedded_comment(self): eq(addrs[0][1], 'foo@bar.com') def test_getaddresses_unicode(self): - """Test getaddresses with unicode strings in Python 2""" - + """Test getaddresses with unicode strings""" test_cases = [ - ([u'user@example.com'], [('', 'user@example.com')]), - ([u'Test User '], [('Test User', 'user@example.com')]), - ([u'"Test User" '], [('Test User', 'user@example.com')]), - ([u'user1@example.com', u'user2@example.com'], [('', 'user1@example.com'), ('', 'user2@example.com')]), + ([u'user@example.com'], [('', u'user@example.com')]), + ([u'Test User '], [(u'Test User', u'user@example.com')]), + ([u'"Test User" '], [(u'Test User', u'user@example.com')]), + ([u'user1@example.com', u'user2@example.com'], [('', u'user1@example.com'), ('', u'user2@example.com')]), ] for addrs, expected in test_cases: result = utils.getaddresses(addrs) self.assertEqual(result, expected) + for realname, email in result: + if realname: + self.assertIsInstance(realname, unicode) + if email: + self.assertIsInstance(email, unicode) def test__quote_unquote(self): eq = self.assertEqual diff --git a/Lib/email/utils.py b/Lib/email/utils.py index d9094fbc967155..11097ad603f92b 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -162,16 +162,19 @@ def getaddresses(fieldvalues, strict=True): a = _AddressList(all) return a.addresslist + unicode_flags = [] converted_values = [] for v in fieldvalues: - if isinstance(v, unicode): + is_unicode = isinstance(v, unicode) + unicode_flags.append(is_unicode) + + if is_unicode: v = v.encode('utf-8') elif not isinstance(v, str): v = str(v) converted_values.append(v) - fieldvalues = converted_values - fieldvalues = _pre_parse_validation(fieldvalues) + fieldvalues = _pre_parse_validation(converted_values) addr = COMMASPACE.join(fieldvalues) a = _AddressList(addr) result = _post_parse_validation(a.addresslist) @@ -188,7 +191,29 @@ def getaddresses(fieldvalues, strict=True): if len(result) != n: return [('', '')] - return result + final_result = [] + result_idx = 0 + + for i, was_unicode in enumerate(unicode_flags): + if result_idx >= len(result): + break + + realname, email = result[result_idx] + + if was_unicode: + if realname: + realname = realname.decode('utf-8') + if email: + email = email.decode('utf-8') + + final_result.append((realname, email)) + result_idx += 1 + + while result_idx < len(result): + final_result.append(result[result_idx]) + result_idx += 1 + + return final_result def _check_parenthesis(addr): @@ -347,12 +372,12 @@ def parseaddr(addr, strict=True): if isinstance(addr, list): addr = addr[0] - # FIX: Support both str and unicode in Python 2 + is_unicode = isinstance(addr, unicode) + if not isinstance(addr, (str, unicode)): return ('', '') - # Convert unicode to str for consistent processing - if isinstance(addr, unicode): + if is_unicode: addr = addr.encode('utf-8') addr = _pre_parse_validation([addr])[0] @@ -361,8 +386,17 @@ def parseaddr(addr, strict=True): if not addrs or len(addrs) > 1: return ('', '') - return addrs[0] + result = addrs[0] + if is_unicode: + realname, email = result + if realname: + realname = realname.decode('utf-8') + if email: + email = email.decode('utf-8') + return (realname, email) + + return result # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.