From 2bba99abb3c779a51f9962b9339ead4eeda6b67e Mon Sep 17 00:00:00 2001 From: Licht-T Date: Sun, 20 May 2018 00:03:43 +0900 Subject: [PATCH 1/6] BUG: Fix inconsistent behavior of non-ascii handling in EmailPolicy.fold --- Lib/email/policy.py | 8 ++++++-- Lib/email/utils.py | 8 ++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 5131311ac5ef76..890358e2cfc0ed 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -4,7 +4,7 @@ import re from email._policybase import Policy, Compat32, compat32, _extend_docstrings -from email.utils import _has_surrogates +from email.utils import _has_non_ascii, _has_surrogates from email.headerregistry import HeaderRegistry as HeaderRegistry from email.contentmanager import raw_data_manager from email.message import EmailMessage @@ -209,8 +209,12 @@ def _fold(self, name, value, refold_binary=False): self.refold_source == 'long' and (lines and len(lines[0])+len(name)+2 > maxlen or any(len(x) > maxlen for x in lines[1:]))) - if refold or refold_binary and _has_surrogates(value): + + if (_has_non_ascii(value) and not self.utf8) \ + or refold \ + or (refold_binary and _has_surrogates(value)): return self.header_factory(name, ''.join(lines)).fold(policy=self) + return name + ': ' + self.linesep.join(lines) + self.linesep diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 858f620e25bfb0..0e33d5c5d34fff 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -48,6 +48,14 @@ specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[\\"]') +def _has_non_ascii(s): + """Return True if s contains non-ascii character.""" + try: + s.encode('ascii') + return False + except UnicodeEncodeError: + return True + def _has_surrogates(s): """Return True if s contains surrogate-escaped binary data.""" # This check is based on the fact that unless there are surrogates, utf8 From 09c0f097a0cb2aef982e7c20fb382309dbb0752b Mon Sep 17 00:00:00 2001 From: Licht-T Date: Sun, 20 May 2018 00:04:23 +0900 Subject: [PATCH 2/6] TST: Add test for non-ascii handling in EmailPolicy.fold --- Lib/test/test_email/test_policy.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py index 8fecb8a5fcd5a8..077e1ccbbc3013 100644 --- a/Lib/test/test_email/test_policy.py +++ b/Lib/test/test_email/test_policy.py @@ -134,6 +134,35 @@ def test_policy_addition(self): for attr, value in expected.items(): self.assertEqual(getattr(added, attr), value) + def test_fold_utf8(self): + expected_ascii = 'Subject: =?utf-8?q?=C3=A1?=\n' + expected_utf8 = 'Subject: á\n' + + msg = email.message.EmailMessage() + s = 'á' + msg['Subject'] = s + + p_ascii = email.policy.default.clone() + p_utf8 = email.policy.default.clone(utf8=True) + + self.assertEqual( + p_ascii.fold('Subject', msg['Subject']), + expected_ascii + ) + self.assertEqual( + p_utf8.fold('Subject', msg['Subject']), + expected_utf8 + ) + + self.assertEqual( + p_ascii.fold('Subject', s), + expected_ascii + ) + self.assertEqual( + p_utf8.fold('Subject', s), + expected_utf8 + ) + def test_register_defect(self): class Dummy: def __init__(self): From 1b132e1db99ea355d8f4bdfa14dc2b6dc4a93203 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 26 Jan 2024 16:24:46 +0200 Subject: [PATCH 3/6] Compactify tests. --- Lib/test/test_email/test_policy.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py index 5b1f6c56a5be2a..c6b9c80efe1b54 100644 --- a/Lib/test/test_email/test_policy.py +++ b/Lib/test/test_email/test_policy.py @@ -146,23 +146,11 @@ def test_fold_utf8(self): p_ascii = email.policy.default.clone() p_utf8 = email.policy.default.clone(utf8=True) - self.assertEqual( - p_ascii.fold('Subject', msg['Subject']), - expected_ascii - ) - self.assertEqual( - p_utf8.fold('Subject', msg['Subject']), - expected_utf8 - ) + self.assertEqual(p_ascii.fold('Subject', msg['Subject']), expected_ascii) + self.assertEqual(p_utf8.fold('Subject', msg['Subject']), expected_utf8) - self.assertEqual( - p_ascii.fold('Subject', s), - expected_ascii - ) - self.assertEqual( - p_utf8.fold('Subject', s), - expected_utf8 - ) + self.assertEqual(p_ascii.fold('Subject', s), expected_ascii) + self.assertEqual(p_utf8.fold('Subject', s), expected_utf8) def test_fold_zero_max_line_length(self): expected = 'Subject: =?utf-8?q?=C3=A1?=\n' From f13304bf8ac980cb483d1dffb7a2f193185c558b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 26 Jan 2024 16:40:07 +0200 Subject: [PATCH 4/6] Optimize code. --- Lib/email/policy.py | 11 +++++++---- Lib/email/utils.py | 7 ------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 16d0d7e9b22f74..8816c84ed175a7 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -5,7 +5,7 @@ import re import sys from email._policybase import Policy, Compat32, compat32, _extend_docstrings -from email.utils import _has_non_ascii, _has_surrogates +from email.utils import _has_surrogates from email.headerregistry import HeaderRegistry as HeaderRegistry from email.contentmanager import raw_data_manager from email.message import EmailMessage @@ -211,9 +211,12 @@ def _fold(self, name, value, refold_binary=False): (lines and len(lines[0])+len(name)+2 > maxlen or any(len(x) > maxlen for x in lines[1:]))) - if (_has_non_ascii(value) and not self.utf8) \ - or refold \ - or (refold_binary and _has_surrogates(value)): + if not refold: + if not self.utf8: + refold = not value.isascii() + elif refold_binary: + refold = _has_surrogates(value) + if refold: return self.header_factory(name, ''.join(lines)).fold(policy=self) return name + ': ' + self.linesep.join(lines) + self.linesep diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 02b2b0666e5fd8..103cef61a83538 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -43,13 +43,6 @@ specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[\\"]') -def _has_non_ascii(s): - """Return True if s contains non-ascii character.""" - try: - s.encode('ascii') - return False - except UnicodeEncodeError: - return True def _has_surrogates(s): """Return True if s may contain surrogate-escaped binary data.""" From f9d0a35559922c6eeb1c7e33275fdb05616297a6 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 26 Jan 2024 16:50:02 +0200 Subject: [PATCH 5/6] Add a NEWS entry. --- .../next/Library/2024-01-26-16-46-21.gh-issue-6986.NY_7TS.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-6986.NY_7TS.rst diff --git a/Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-6986.NY_7TS.rst b/Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-6986.NY_7TS.rst new file mode 100644 index 00000000000000..f1c99c09d2dfe1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-6986.NY_7TS.rst @@ -0,0 +1,2 @@ +:meth:`email.policy.EmailPolicy.fold` now always encodes non-ASCII characters +in headers if :attr:`~email.policy.EmailPolicy.utf8` is false. From ede1fd3a5320419501906147dd25ca8671128fdd Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 26 Jan 2024 16:56:26 +0200 Subject: [PATCH 6/6] Rename the NEWS file. --- ...6.NY_7TS.rst => 2024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/Library/{2024-01-26-16-46-21.gh-issue-6986.NY_7TS.rst => 2024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst} (100%) diff --git a/Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-6986.NY_7TS.rst b/Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst similarity index 100% rename from Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-6986.NY_7TS.rst rename to Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst