Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e3b4715

Browse files
author
Victor Stinner
committed
Write tests for invalid characters (U+00110000)
Test the following functions: * codecs.raw_unicode_escape_decode() * PyUnicode_FromWideChar() * PyUnicode_FromUnicode() * "unicode_internal" and "unicode_escape" decoders
1 parent db62389 commit e3b4715

2 files changed

Lines changed: 34 additions & 0 deletions

File tree

Lib/test/test_codecs.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,16 @@ def test_bug1251300(self):
10341034
'deprecated', DeprecationWarning)):
10351035
self.assertRaises(UnicodeDecodeError, internal.decode,
10361036
"unicode_internal")
1037+
if sys.byteorder == "little":
1038+
invalid = b"\x00\x00\x11\x00"
1039+
else:
1040+
invalid = b"\x00\x11\x00\x00"
1041+
with support.check_warnings():
1042+
self.assertRaises(UnicodeDecodeError,
1043+
invalid.decode, "unicode_internal")
1044+
with support.check_warnings():
1045+
self.assertEqual(invalid.decode("unicode_internal", "replace"),
1046+
'\ufffd')
10371047

10381048
@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
10391049
def test_decode_error_attributes(self):
@@ -1729,6 +1739,12 @@ def test_unicode_escape(self):
17291739
self.assertEqual(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
17301740
self.assertEqual(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
17311741

1742+
self.assertRaises(UnicodeDecodeError, codecs.unicode_escape_decode, br"\U00110000")
1743+
self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
1744+
1745+
self.assertRaises(UnicodeDecodeError, codecs.raw_unicode_escape_decode, br"\U00110000")
1746+
self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
1747+
17321748
class SurrogateEscapeTest(unittest.TestCase):
17331749

17341750
def test_utf8(self):

Modules/_testcapimodule.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,7 @@ test_widechar(PyObject *self)
14091409
#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
14101410
const wchar_t wtext[2] = {(wchar_t)0x10ABCDu};
14111411
size_t wtextlen = 1;
1412+
const wchar_t invalid[1] = {(wchar_t)0x110000u};
14121413
#else
14131414
const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu};
14141415
size_t wtextlen = 2;
@@ -1444,6 +1445,23 @@ test_widechar(PyObject *self)
14441445

14451446
Py_DECREF(wide);
14461447
Py_DECREF(utf8);
1448+
1449+
#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
1450+
wide = PyUnicode_FromWideChar(invalid, 1);
1451+
if (wide == NULL)
1452+
PyErr_Clear();
1453+
else
1454+
return raiseTestError("test_widechar",
1455+
"PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail");
1456+
1457+
wide = PyUnicode_FromUnicode(invalid, 1);
1458+
if (wide == NULL)
1459+
PyErr_Clear();
1460+
else
1461+
return raiseTestError("test_widechar",
1462+
"PyUnicode_FromUnicode(L\"\\U00110000\", 1) didn't fail");
1463+
#endif
1464+
14471465
Py_RETURN_NONE;
14481466
}
14491467

0 commit comments

Comments
 (0)