Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 78edf75

Browse files
committed
Issue #13333: The UTF-7 decoder now accepts lone surrogates
(the encoder already accepts them).
2 parents 9a812cb + 5418ee0 commit 78edf75

3 files changed

Lines changed: 22 additions & 13 deletions

File tree

Lib/test/test_unicode.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,10 +1108,18 @@ def test_codecs_utf7(self):
11081108
for (x, y) in utfTests:
11091109
self.assertEqual(x.encode('utf-7'), y)
11101110

1111-
# Unpaired surrogates not supported
1112-
self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7')
1113-
1114-
self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd\ufffd')
1111+
# Unpaired surrogates are passed through
1112+
self.assertEqual('\uD801'.encode('utf-7'), b'+2AE-')
1113+
self.assertEqual('\uD801x'.encode('utf-7'), b'+2AE-x')
1114+
self.assertEqual('\uDC01'.encode('utf-7'), b'+3AE-')
1115+
self.assertEqual('\uDC01x'.encode('utf-7'), b'+3AE-x')
1116+
self.assertEqual(b'+2AE-'.decode('utf-7'), '\uD801')
1117+
self.assertEqual(b'+2AE-x'.decode('utf-7'), '\uD801x')
1118+
self.assertEqual(b'+3AE-'.decode('utf-7'), '\uDC01')
1119+
self.assertEqual(b'+3AE-x'.decode('utf-7'), '\uDC01x')
1120+
1121+
self.assertEqual('\uD801\U000abcde'.encode('utf-7'), b'+2AHab9ze-')
1122+
self.assertEqual(b'+2AHab9ze-'.decode('utf-7'), '\uD801\U000abcde')
11151123

11161124
# Issue #2242: crash on some Windows/MSVC versions
11171125
self.assertEqual(b'+\xc1'.decode('utf-7'), '\xc1')

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1?
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #13333: The UTF-7 decoder now accepts lone surrogates (the encoder
14+
already accepts them).
15+
1316
- Issue #13389: Full garbage collection passes now clear the freelists for
1417
list and dict objects. They already cleared other freelists in the
1518
interpreter.

Objects/unicodeobject.c

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3884,21 +3884,18 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
38843884
if (unicode_putchar(&unicode, &outpos, ch2) < 0)
38853885
goto onError;
38863886
surrogate = 0;
3887+
continue;
38873888
}
38883889
else {
3890+
if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
3891+
goto onError;
38893892
surrogate = 0;
3890-
errmsg = "second surrogate missing";
3891-
goto utf7Error;
38923893
}
38933894
}
3894-
else if (outCh >= 0xD800 && outCh <= 0xDBFF) {
3895+
if (outCh >= 0xD800 && outCh <= 0xDBFF) {
38953896
/* first surrogate */
38963897
surrogate = outCh;
38973898
}
3898-
else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
3899-
errmsg = "unexpected second surrogate";
3900-
goto utf7Error;
3901-
}
39023899
else {
39033900
if (unicode_putchar(&unicode, &outpos, outCh) < 0)
39043901
goto onError;
@@ -3909,8 +3906,9 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
39093906
inShift = 0;
39103907
s++;
39113908
if (surrogate) {
3912-
errmsg = "second surrogate missing at end of shift sequence";
3913-
goto utf7Error;
3909+
if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
3910+
goto onError;
3911+
surrogate = 0;
39143912
}
39153913
if (base64bits > 0) { /* left-over bits */
39163914
if (base64bits >= 6) {

0 commit comments

Comments
 (0)