Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 98d156b

Browse files
Increased coverage of standard codec error handlers.
1 parent 39430da commit 98d156b

1 file changed

Lines changed: 158 additions & 76 deletions

File tree

Lib/test/test_codeccallbacks.py

Lines changed: 158 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,6 @@
66
import unittest
77
import warnings
88

9-
try:
10-
import ctypes
11-
except ImportError:
12-
ctypes = None
13-
SIZEOF_WCHAR_T = -1
14-
else:
15-
SIZEOF_WCHAR_T = ctypes.sizeof(ctypes.c_wchar)
16-
179
class PosReturn:
1810
# this can be used for configurable callbacks
1911

@@ -212,14 +204,12 @@ def test_decodeunicodeinternal(self):
212204
b"\x00\x00\x00\x00\x00".decode,
213205
"unicode-internal",
214206
)
215-
if SIZEOF_WCHAR_T == 4:
216-
def handler_unicodeinternal(exc):
217-
if not isinstance(exc, UnicodeDecodeError):
218-
raise TypeError("don't know how to handle %r" % exc)
219-
return ("\x01", 1)
220-
221-
with test.support.check_warnings(('unicode_internal codec has been '
222-
'deprecated', DeprecationWarning)):
207+
if len('\0'.encode('unicode-internal')) == 4:
208+
def handler_unicodeinternal(exc):
209+
if not isinstance(exc, UnicodeDecodeError):
210+
raise TypeError("don't know how to handle %r" % exc)
211+
return ("\x01", 1)
212+
223213
self.assertEqual(
224214
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
225215
"\u0000"
@@ -364,12 +354,11 @@ def test_unicodeencodeerror(self):
364354
["ascii", "\uffffx", 0, 1, "ouch"],
365355
"'ascii' codec can't encode character '\\uffff' in position 0: ouch"
366356
)
367-
if SIZEOF_WCHAR_T == 4:
368-
self.check_exceptionobjectargs(
369-
UnicodeEncodeError,
370-
["ascii", "\U00010000x", 0, 1, "ouch"],
371-
"'ascii' codec can't encode character '\\U00010000' in position 0: ouch"
372-
)
357+
self.check_exceptionobjectargs(
358+
UnicodeEncodeError,
359+
["ascii", "\U00010000x", 0, 1, "ouch"],
360+
"'ascii' codec can't encode character '\\U00010000' in position 0: ouch"
361+
)
373362

374363
def test_unicodedecodeerror(self):
375364
self.check_exceptionobjectargs(
@@ -399,12 +388,11 @@ def test_unicodetranslateerror(self):
399388
["g\uffffrk", 1, 2, "ouch"],
400389
"can't translate character '\\uffff' in position 1: ouch"
401390
)
402-
if SIZEOF_WCHAR_T == 4:
403-
self.check_exceptionobjectargs(
404-
UnicodeTranslateError,
405-
["g\U00010000rk", 1, 2, "ouch"],
406-
"can't translate character '\\U00010000' in position 1: ouch"
407-
)
391+
self.check_exceptionobjectargs(
392+
UnicodeTranslateError,
393+
["g\U00010000rk", 1, 2, "ouch"],
394+
"can't translate character '\\U00010000' in position 1: ouch"
395+
)
408396
self.check_exceptionobjectargs(
409397
UnicodeTranslateError,
410398
["g\xfcrk", 1, 3, "ouch"],
@@ -431,6 +419,16 @@ def test_badandgoodstrictexceptions(self):
431419
codecs.strict_errors,
432420
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")
433421
)
422+
self.assertRaises(
423+
UnicodeDecodeError,
424+
codecs.strict_errors,
425+
UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
426+
)
427+
self.assertRaises(
428+
UnicodeTranslateError,
429+
codecs.strict_errors,
430+
UnicodeTranslateError("\u3042", 0, 1, "ouch")
431+
)
434432

435433
def test_badandgoodignoreexceptions(self):
436434
# "ignore" complains about a non-exception passed in
@@ -527,13 +525,15 @@ def test_badandgoodxmlcharrefreplaceexceptions(self):
527525
UnicodeTranslateError("\u3042", 0, 1, "ouch")
528526
)
529527
# Use the correct exception
530-
cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
528+
cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 99999, 100000,
529+
999999, 1000000)
530+
cs += (0xd800, 0xdfff)
531531
s = "".join(chr(c) for c in cs)
532532
self.assertEqual(
533533
codecs.xmlcharrefreplace_errors(
534534
UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
535535
),
536-
("".join("&#%d;" % ord(c) for c in s), len(s))
536+
("".join("&#%d;" % c for c in cs), len(s))
537537
)
538538

539539
def test_badandgoodbackslashreplaceexceptions(self):
@@ -561,55 +561,138 @@ def test_badandgoodbackslashreplaceexceptions(self):
561561
UnicodeTranslateError("\u3042", 0, 1, "ouch")
562562
)
563563
# Use the correct exception
564-
self.assertEqual(
565-
codecs.backslashreplace_errors(
566-
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
567-
("\\u3042", 1)
564+
tests = [
565+
("\u3042", "\\u3042"),
566+
("\n", "\\x0a"),
567+
("a", "\\x61"),
568+
("\x00", "\\x00"),
569+
("\xff", "\\xff"),
570+
("\u0100", "\\u0100"),
571+
("\uffff", "\\uffff"),
572+
("\U00010000", "\\U00010000"),
573+
("\U0010ffff", "\\U0010ffff"),
574+
# Lone surrogates
575+
("\ud800", "\\ud800"),
576+
("\udfff", "\\udfff"),
577+
("\ud800\udfff", "\\ud800\\udfff"),
578+
]
579+
for s, r in tests:
580+
with self.subTest(str=s):
581+
self.assertEqual(
582+
codecs.backslashreplace_errors(
583+
UnicodeEncodeError("ascii", s, 0, len(s), "ouch")),
584+
(r, len(s))
585+
)
586+
587+
def test_badandgoodsurrogateescapeexceptions(self):
588+
surrogateescape_errors = codecs.lookup_error('surrogateescape')
589+
# "surrogateescape" complains about a non-exception passed in
590+
self.assertRaises(
591+
TypeError,
592+
surrogateescape_errors,
593+
42
568594
)
569-
self.assertEqual(
570-
codecs.backslashreplace_errors(
571-
UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")),
572-
("\\x00", 1)
595+
# "surrogateescape" complains about the wrong exception types
596+
self.assertRaises(
597+
TypeError,
598+
surrogateescape_errors,
599+
UnicodeError("ouch")
573600
)
574-
self.assertEqual(
575-
codecs.backslashreplace_errors(
576-
UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")),
577-
("\\xff", 1)
601+
# "surrogateescape" can not be used for translating
602+
self.assertRaises(
603+
TypeError,
604+
surrogateescape_errors,
605+
UnicodeTranslateError("\udc80", 0, 1, "ouch")
578606
)
607+
# Use the correct exception
608+
for s in ("a", "\udc7f", "\udd00"):
609+
with self.subTest(str=s):
610+
self.assertRaises(
611+
UnicodeEncodeError,
612+
surrogateescape_errors,
613+
UnicodeEncodeError("ascii", s, 0, 1, "ouch")
614+
)
579615
self.assertEqual(
580-
codecs.backslashreplace_errors(
581-
UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")),
582-
("\\u0100", 1)
616+
surrogateescape_errors(
617+
UnicodeEncodeError("ascii", "\udc80", 0, 1, "ouch")),
618+
(b"\x80", 1)
619+
)
620+
self.assertRaises(
621+
UnicodeDecodeError,
622+
surrogateescape_errors,
623+
UnicodeDecodeError("ascii", bytearray(b"a"), 0, 1, "ouch")
583624
)
584625
self.assertEqual(
585-
codecs.backslashreplace_errors(
586-
UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
587-
("\\uffff", 1)
588-
)
589-
if SIZEOF_WCHAR_T > 0:
590-
self.assertEqual(
591-
codecs.backslashreplace_errors(
592-
UnicodeEncodeError("ascii", "\U00010000",
593-
0, 1, "ouch")),
594-
("\\U00010000", 1)
595-
)
596-
self.assertEqual(
597-
codecs.backslashreplace_errors(
598-
UnicodeEncodeError("ascii", "\U0010ffff",
599-
0, 1, "ouch")),
600-
("\\U0010ffff", 1)
601-
)
602-
# Lone surrogates (regardless of unicode width)
603-
self.assertEqual(
604-
codecs.backslashreplace_errors(
605-
UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")),
606-
("\\ud800", 1)
607-
)
608-
self.assertEqual(
609-
codecs.backslashreplace_errors(
610-
UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")),
611-
("\\udfff", 1)
612-
)
626+
surrogateescape_errors(
627+
UnicodeDecodeError("ascii", bytearray(b"\x80"), 0, 1, "ouch")),
628+
("\udc80", 1)
629+
)
630+
631+
def test_badandgoodsurrogatepassexceptions(self):
632+
surrogatepass_errors = codecs.lookup_error('surrogatepass')
633+
# "surrogatepass" complains about a non-exception passed in
634+
self.assertRaises(
635+
TypeError,
636+
surrogatepass_errors,
637+
42
638+
)
639+
# "surrogatepass" complains about the wrong exception types
640+
self.assertRaises(
641+
TypeError,
642+
surrogatepass_errors,
643+
UnicodeError("ouch")
644+
)
645+
# "surrogatepass" can not be used for translating
646+
self.assertRaises(
647+
TypeError,
648+
surrogatepass_errors,
649+
UnicodeTranslateError("\ud800", 0, 1, "ouch")
650+
)
651+
# Use the correct exception
652+
for enc in ("utf-8", "utf-16le", "utf-16be", "utf-32le", "utf-32be"):
653+
with self.subTest(encoding=enc):
654+
self.assertRaises(
655+
UnicodeEncodeError,
656+
surrogatepass_errors,
657+
UnicodeEncodeError(enc, "a", 0, 1, "ouch")
658+
)
659+
self.assertRaises(
660+
UnicodeDecodeError,
661+
surrogatepass_errors,
662+
UnicodeDecodeError(enc, "a".encode(enc), 0, 1, "ouch")
663+
)
664+
tests = [
665+
("ascii", "\ud800", b'\xed\xa0\x80', 3),
666+
("utf-8", "\ud800", b'\xed\xa0\x80', 3),
667+
("utf-16le", "\ud800", b'\x00\xd8', 2),
668+
("utf-16be", "\ud800", b'\xd8\x00', 2),
669+
("utf-32le", "\ud800", b'\x00\xd8\x00\x00', 4),
670+
("utf-32be", "\ud800", b'\x00\x00\xd8\x00', 4),
671+
("ascii", "\udfff", b'\xed\xbf\xbf', 3),
672+
("utf-8", "\udfff", b'\xed\xbf\xbf', 3),
673+
("utf-16le", "\udfff", b'\xff\xdf', 2),
674+
("utf-16be", "\udfff", b'\xdf\xff', 2),
675+
("utf-32le", "\udfff", b'\xff\xdf\x00\x00', 4),
676+
("utf-32be", "\udfff", b'\x00\x00\xdf\xff', 4),
677+
("ascii", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3),
678+
("utf-8", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3),
679+
("utf-16le", "\ud800\udfff", b'\x00\xd8\xff\xdf', 2),
680+
("utf-16be", "\ud800\udfff", b'\xd8\x00\xdf\xff', 2),
681+
("utf-32le", "\ud800\udfff", b'\x00\xd8\x00\x00\xff\xdf\x00\x00', 4),
682+
("utf-32be", "\ud800\udfff", b'\x00\x00\xd8\x00\x00\x00\xdf\xff', 4),
683+
]
684+
for enc, s, b, n in tests:
685+
with self.subTest(encoding=enc, str=s, bytes=b):
686+
self.assertEqual(
687+
surrogatepass_errors(
688+
UnicodeEncodeError(enc, s, 0, len(s), "ouch")),
689+
(b, len(s))
690+
)
691+
self.assertEqual(
692+
surrogatepass_errors(
693+
UnicodeDecodeError(enc, bytearray(b[:n]), 0, n, "ouch")),
694+
(s[:1], n)
695+
)
613696

614697
def test_badhandlerresults(self):
615698
results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
@@ -688,9 +771,8 @@ def test_xmlcharrefvalues(self):
688771
# enhance coverage of:
689772
# Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
690773
# and inline implementations
691-
v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
692-
if SIZEOF_WCHAR_T == 4:
693-
v += (100000, 500000, 1000000)
774+
v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000,
775+
500000, 1000000)
694776
s = "".join([chr(x) for x in v])
695777
codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
696778
for enc in ("ascii", "iso-8859-15"):

0 commit comments

Comments
 (0)