From a40d52201639c82ec3f15e8795f8c83897deb9b5 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Fri, 18 Oct 2024 10:40:24 +0200 Subject: [PATCH 1/7] Reject invalid unicode escapes --- Lib/json/decoder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index d69a45d6793069..e9486752c4f6f8 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -50,17 +50,17 @@ def __reduce__(self): } +HEXDIGITS = re.compile(r'[0-9A-Fa-f]{4}', FLAGS) STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { '"': '"', '\\': '\\', '/': '/', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', } -def _decode_uXXXX(s, pos): - esc = s[pos + 1:pos + 5] - if len(esc) == 4 and esc[1] not in 'xX': +def _decode_uXXXX(s, pos, _m=HEXDIGITS.match): + if match := _m(s, end): try: - return int(esc, 16) + return int(match.group(), 16) except ValueError: pass msg = "Invalid \\uXXXX escape" From 8587c66dec6eca6a57ecda4630e70bfe043f9182 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Fri, 18 Oct 2024 10:48:45 +0200 Subject: [PATCH 2/7] Add tests --- Lib/test/test_json/test_scanstring.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Lib/test/test_json/test_scanstring.py b/Lib/test/test_json/test_scanstring.py index 2d3ee8a8bf0f92..cca556a3b95bab 100644 --- a/Lib/test/test_json/test_scanstring.py +++ b/Lib/test/test_json/test_scanstring.py @@ -116,6 +116,11 @@ def test_bad_escapes(self): '"\\u012z"', '"\\u0x12"', '"\\u0X12"', + '"\\u{0}"'.format("\uff10" * 4), + '"\\u 123"', + '"\\u-123"', + '"\\u+123"', + '"\\u1_23"', '"\\ud834\\"', '"\\ud834\\u"', '"\\ud834\\ud"', @@ -127,6 +132,11 @@ def test_bad_escapes(self): '"\\ud834\\udd2z"', '"\\ud834\\u0x20"', '"\\ud834\\u0X20"', + '"\\ud834\\u{0}"'.format("\uff10" * 4), + '"\\ud834\\u 123"', + '"\\ud834\\u-123"', + '"\\ud834\\u+123"', + '"\\ud834\\u1_23"', ] for s in bad_escapes: with self.assertRaises(self.JSONDecodeError, msg=s): From 8819807ab454ff4316313fe8f839209819bdd0ff Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Fri, 18 Oct 2024 10:50:56 +0200 Subject: [PATCH 3/7] Wrong variable --- Lib/json/decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index e9486752c4f6f8..26e3840bef8b0a 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -58,7 +58,7 @@ def __reduce__(self): } def _decode_uXXXX(s, pos, _m=HEXDIGITS.match): - if match := _m(s, end): + if match := _m(s, pos): try: return int(match.group(), 16) except ValueError: From f5c2be5f0b27cf9f958cbeb1c494a4a7a0698b7e Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Fri, 18 Oct 2024 10:55:37 +0200 Subject: [PATCH 4/7] Remove walrus operator --- Lib/json/decoder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index 26e3840bef8b0a..71e9376ed8e2c2 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -58,7 +58,8 @@ def __reduce__(self): } def _decode_uXXXX(s, pos, _m=HEXDIGITS.match): - if match := _m(s, pos): + esc = _m(s, pos) + if esc is not None: try: return int(match.group(), 16) except ValueError: From fbebc0c6f73fb7aa286097fd95b2020d0b3a2fc6 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 18 Oct 2024 08:58:12 +0000 Subject: [PATCH 5/7] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst new file mode 100644 index 00000000000000..74d76c7bddae7d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst @@ -0,0 +1 @@ +Reject invalid unicode escapes for Python implementation of :func:`json.loads`. From aa3358711c696b91475d6079a03c449d4f8762d0 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Fri, 18 Oct 2024 11:07:51 +0200 Subject: [PATCH 6/7] Fix start and variable --- Lib/json/decoder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index 71e9376ed8e2c2..ff4bfcdcc407b9 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -58,10 +58,10 @@ def __reduce__(self): } def _decode_uXXXX(s, pos, _m=HEXDIGITS.match): - esc = _m(s, pos) + esc = _m(s, pos + 1) if esc is not None: try: - return int(match.group(), 16) + return int(esc.group(), 16) except ValueError: pass msg = "Invalid \\uXXXX escape" From be74e21455c1068e607c34062bb8b564d40e8a46 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Fri, 18 Oct 2024 11:48:38 +0200 Subject: [PATCH 7/7] Rename 2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst to 2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst --- .../2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/{Core_and_Builtins => Library}/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst (100%) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst b/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst similarity index 100% rename from Misc/NEWS.d/next/Core_and_Builtins/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst rename to Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst