Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 55e092f

Browse files
Issue #19279: UTF-7 decoder no more produces illegal strings.
2 parents f19a6ef + 35804e4 commit 55e092f

3 files changed

Lines changed: 34 additions & 0 deletions

File tree

Lib/test/test_codecs.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,36 @@ def test_partial(self):
820820
]
821821
)
822822

823+
def test_errors(self):
824+
tests = [
825+
(b'a\xffb', 'a\ufffdb'),
826+
(b'a+IK', 'a\ufffd'),
827+
(b'a+IK-b', 'a\ufffdb'),
828+
(b'a+IK,b', 'a\ufffdb'),
829+
(b'a+IKx', 'a\u20ac\ufffd'),
830+
(b'a+IKx-b', 'a\u20ac\ufffdb'),
831+
(b'a+IKwgr', 'a\u20ac\ufffd'),
832+
(b'a+IKwgr-b', 'a\u20ac\ufffdb'),
833+
(b'a+IKwgr,', 'a\u20ac\ufffd'),
834+
(b'a+IKwgr,-b', 'a\u20ac\ufffd-b'),
835+
(b'a+IKwgrB', 'a\u20ac\u20ac\ufffd'),
836+
(b'a+IKwgrB-b', 'a\u20ac\u20ac\ufffdb'),
837+
(b'a+/,+IKw-b', 'a\ufffd\u20acb'),
838+
(b'a+//,+IKw-b', 'a\ufffd\u20acb'),
839+
(b'a+///,+IKw-b', 'a\uffff\ufffd\u20acb'),
840+
(b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'),
841+
]
842+
for raw, expected in tests:
843+
with self.subTest(raw=raw):
844+
self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode,
845+
raw, 'strict', True)
846+
self.assertEqual(raw.decode('utf-7', 'replace'), expected)
847+
848+
def test_nonbmp(self):
849+
self.assertEqual('\U000104A0'.encode(self.encoding), b'+2AHcoA-')
850+
self.assertEqual('\ud801\udca0'.encode(self.encoding), b'+2AHcoA-')
851+
self.assertEqual(b'+2AHcoA-'.decode(self.encoding), '\U000104A0')
852+
823853
class UTF16ExTest(unittest.TestCase):
824854

825855
def test_errors(self):

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ Projected release date: 2013-10-20
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #19279: UTF-7 decoder no more produces illegal strings.
14+
1315
- Issue #16612: Add "Argument Clinic", a compile-time preprocessor for
1416
C files to generate argument parsing code. (See PEP 436.)
1517

Objects/unicodeobject.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4341,6 +4341,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
43414341
Py_UCS4 outCh = (Py_UCS4)(base64buffer >> (base64bits-16));
43424342
base64bits -= 16;
43434343
base64buffer &= (1 << base64bits) - 1; /* clear high bits */
4344+
assert(outCh <= 0xffff);
43444345
if (surrogate) {
43454346
/* expecting a second surrogate */
43464347
if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) {
@@ -4408,6 +4409,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
44084409
inShift = 1;
44094410
shiftOutStart = writer.pos;
44104411
base64bits = 0;
4412+
base64buffer = 0;
44114413
}
44124414
}
44134415
else if (DECODE_DIRECT(ch)) { /* character decodes as itself */

0 commit comments

Comments
 (0)