From 96963f1e977d3537a950f389fd694a3183050642 Mon Sep 17 00:00:00 2001 From: Ankit Goel Date: Sun, 31 Mar 2024 00:27:09 +0000 Subject: [PATCH] gh-59598: Ignore leading whitespace in `JSONDecoder.raw_decode` Whitespace is allowed before JSON objects according to RFC 4627. --- Doc/library/json.rst | 3 +++ Lib/json/decoder.py | 10 +++++++--- Lib/test/test_json/test_decode.py | 14 ++++++++++++++ .../2024-04-22-07-05-05.gh-issue-59598.LyEKW3.rst | 1 + 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-04-22-07-05-05.gh-issue-59598.LyEKW3.rst diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 26f85b5ddf8d82..1e8717b51a57f1 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -402,6 +402,9 @@ Encoders and Decoders This can be used to decode a JSON document from a string that may have extraneous data at the end. + .. versionchanged:: 3.14 + Now ignores any leading whitespace instead of returning an error + .. class:: JSONEncoder(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None) diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index d69a45d6793069..ab39da1a2cc998 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -341,23 +341,27 @@ def decode(self, s, _w=WHITESPACE.match): containing a JSON document). """ - obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + obj, end = self.raw_decode(s) end = _w(s, end).end() if end != len(s): raise JSONDecodeError("Extra data", s, end) return obj - def raw_decode(self, s, idx=0): + def raw_decode(self, s, idx=0, _w=WHITESPACE.match): """Decode a JSON document from ``s`` (a ``str`` beginning with a JSON document) and return a 2-tuple of the Python representation and the index in ``s`` where the document ended. + Whitespace at the beginning of the document will be ignored. + + Optionally, ``idx`` can be used to specify an offset in ``s`` + where the document begins. This can be used to decode a JSON document from a string that may have extraneous data at the end. """ try: - obj, end = self.scan_once(s, idx) + obj, end = self.scan_once(s, idx=_w(s, idx).end()) except StopIteration as err: raise JSONDecodeError("Expecting value", s, err.value) from None return obj, end diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 79fb239b35d3f2..ee757b79b25948 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -124,6 +124,20 @@ def test_limit_int(self): with self.assertRaises(ValueError): self.loads('1' * (maxdigits + 1)) +class TestRawDecode: + def test_whitespace(self): + decoder = self.json.JSONDecoder() + self.assertEqual(decoder.raw_decode(' {}'), ({}, 3)) + self.assertEqual(decoder.raw_decode(' []'), ([], 4)) + self.assertEqual(decoder.raw_decode(' ""'), ('', 5)) + s = ' { "key" : "value" , "k":"v" } \n' \ + ' { "key": "value", "k" :"v"} ' + val1, n1 = decoder.raw_decode(s) + val2, n2 = decoder.raw_decode(s[n1:]) + self.assertEqual(val1, {"key":"value", "k":"v"}) + self.assertEqual(val2, {"key":"value", "k":"v"}) class TestPyDecode(TestDecode, PyTest): pass class TestCDecode(TestDecode, CTest): pass +class TestPyRawDecode(TestRawDecode, PyTest): pass +class TestCRawDecode(TestRawDecode, CTest): pass diff --git a/Misc/NEWS.d/next/Library/2024-04-22-07-05-05.gh-issue-59598.LyEKW3.rst b/Misc/NEWS.d/next/Library/2024-04-22-07-05-05.gh-issue-59598.LyEKW3.rst new file mode 100644 index 00000000000000..b7c14e1558a73f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-22-07-05-05.gh-issue-59598.LyEKW3.rst @@ -0,0 +1 @@ +Ignore leading whitespace in :func:`JSONDecoder.raw_decode`