From 54c64a74374aa6b15d3ccbb08b45c6a7e4e06c89 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Wed, 2 Apr 2025 23:49:01 +0200 Subject: [PATCH 1/3] Always escape non-printable characters --- Lib/test/test_tools/i18n_data/ascii-escapes.pot | 2 +- Tools/i18n/pygettext.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_tools/i18n_data/ascii-escapes.pot b/Lib/test/test_tools/i18n_data/ascii-escapes.pot index f8e0f53b256934..cc5a9f6ba619db 100644 --- a/Lib/test/test_tools/i18n_data/ascii-escapes.pot +++ b/Lib/test/test_tools/i18n_data/ascii-escapes.pot @@ -41,7 +41,7 @@ msgstr "" #. some characters in the 128-255 range #: escapes.py:20 -msgid "€   ÿ" +msgid "\302\200 \302\240 ÿ" msgstr "" #. some characters >= 256 encoded as 2, 3 and 4 bytes, respectively diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index f0ee2ea386f18f..c161242c2e87dd 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -190,12 +190,10 @@ def make_escapes(pass_nonascii): # Allow non-ascii characters to pass through so that e.g. 'msgid # "Höhe"' would not result in 'msgid "H\366he"'. Otherwise we # escape any character outside the 32..126 range. - mod = 128 escape = escape_ascii else: - mod = 256 escape = escape_nonascii - escapes = [r"\%03o" % i for i in range(mod)] + escapes = [r"\%03o" % i for i in range(256)] for i in range(32, 127): escapes[i] = chr(i) escapes[ord('\\')] = r'\\' @@ -206,7 +204,15 @@ def make_escapes(pass_nonascii): def escape_ascii(s, encoding): - return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s) + escaped = '' + for c in s: + if ord(c) < 128: + escaped += escapes[ord(c)] + elif c.isprintable(): + escaped += c + else: + escaped += escape_nonascii(c, encoding) + return escaped def escape_nonascii(s, encoding): From b9e37ebe18c06807457f66aba666b921694895cc Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sat, 5 Apr 2025 14:52:42 +0200 Subject: [PATCH 2/3] Add news entry --- .../Tools-Demos/2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst b/Misc/NEWS.d/next/Tools-Demos/2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst new file mode 100644 index 00000000000000..1235360f9c6fad --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst @@ -0,0 +1 @@ +Always escape non-printable Unicode characters in :program:`pygettext`. From 8c2150f984332692c14fcca84665367ad5b4af50 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sun, 6 Apr 2025 10:58:49 +0200 Subject: [PATCH 3/3] Keep list comprehension --- Tools/i18n/pygettext.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index c161242c2e87dd..a4af1d2be82914 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -204,15 +204,9 @@ def make_escapes(pass_nonascii): def escape_ascii(s, encoding): - escaped = '' - for c in s: - if ord(c) < 128: - escaped += escapes[ord(c)] - elif c.isprintable(): - escaped += c - else: - escaped += escape_nonascii(c, encoding) - return escaped + return ''.join(escapes[ord(c)] if ord(c) < 128 else c + if c.isprintable() else escape_nonascii(c, encoding) + for c in s) def escape_nonascii(s, encoding):