Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c4a35da

Browse files
Issue #28541: Improve test coverage for encoding detection in json library.
Original patch by Eric Appelt.
1 parent a0d9c68 commit c4a35da

2 files changed

Lines changed: 15 additions & 1 deletion

File tree

Lib/json/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,8 @@ def detect_encoding(b):
257257
return 'utf-16-be' if b[1] else 'utf-32-be'
258258
if not b[1]:
259259
# XX 00 00 00 - utf-32-le
260-
# XX 00 XX XX - utf-16-le
260+
# XX 00 00 XX - utf-16-le
261+
# XX 00 XX -- - utf-16-le
261262
return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
262263
elif len(b) == 2:
263264
if not b[0]:

Lib/test/test_json/test_unicode.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,19 @@ def test_bytes_decode(self):
6565
self.assertEqual(self.loads(bom + encoded), data)
6666
self.assertEqual(self.loads(encoded), data)
6767
self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]')
68+
# RFC-7159 and ECMA-404 extend JSON to allow documents that
69+
# consist of only a string, which can present a special case
70+
# not covered by the encoding detection patterns specified in
71+
# RFC-4627 for utf-16-le (XX 00 XX 00).
72+
self.assertEqual(self.loads('"\u2600"'.encode('utf-16-le')),
73+
'\u2600')
74+
# Encoding detection for small (<4) bytes objects
75+
# is implemented as a special case. RFC-7159 and ECMA-404
76+
# allow single codepoint JSON documents which are only two
77+
# bytes in utf-16 encodings w/o BOM.
78+
self.assertEqual(self.loads(b'5\x00'), 5)
79+
self.assertEqual(self.loads(b'\x007'), 7)
80+
self.assertEqual(self.loads(b'57'), 57)
6881

6982
def test_object_pairs_hook_with_unicode(self):
7083
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'

0 commit comments

Comments
 (0)