66import unittest
77import warnings
88
9- try :
10- import ctypes
11- except ImportError :
12- ctypes = None
13- SIZEOF_WCHAR_T = - 1
14- else :
15- SIZEOF_WCHAR_T = ctypes .sizeof (ctypes .c_wchar )
16-
179class PosReturn :
1810 # this can be used for configurable callbacks
1911
@@ -212,14 +204,12 @@ def test_decodeunicodeinternal(self):
212204 b"\x00 \x00 \x00 \x00 \x00 " .decode ,
213205 "unicode-internal" ,
214206 )
215- if SIZEOF_WCHAR_T == 4 :
216- def handler_unicodeinternal (exc ):
217- if not isinstance (exc , UnicodeDecodeError ):
218- raise TypeError ("don't know how to handle %r" % exc )
219- return ("\x01 " , 1 )
220-
221- with test .support .check_warnings (('unicode_internal codec has been '
222- 'deprecated' , DeprecationWarning )):
207+ if len ('\0 ' .encode ('unicode-internal' )) == 4 :
208+ def handler_unicodeinternal (exc ):
209+ if not isinstance (exc , UnicodeDecodeError ):
210+ raise TypeError ("don't know how to handle %r" % exc )
211+ return ("\x01 " , 1 )
212+
223213 self .assertEqual (
224214 b"\x00 \x00 \x00 \x00 \x00 " .decode ("unicode-internal" , "ignore" ),
225215 "\u0000 "
@@ -364,12 +354,11 @@ def test_unicodeencodeerror(self):
364354 ["ascii" , "\uffff x" , 0 , 1 , "ouch" ],
365355 "'ascii' codec can't encode character '\\ uffff' in position 0: ouch"
366356 )
367- if SIZEOF_WCHAR_T == 4 :
368- self .check_exceptionobjectargs (
369- UnicodeEncodeError ,
370- ["ascii" , "\U00010000 x" , 0 , 1 , "ouch" ],
371- "'ascii' codec can't encode character '\\ U00010000' in position 0: ouch"
372- )
357+ self .check_exceptionobjectargs (
358+ UnicodeEncodeError ,
359+ ["ascii" , "\U00010000 x" , 0 , 1 , "ouch" ],
360+ "'ascii' codec can't encode character '\\ U00010000' in position 0: ouch"
361+ )
373362
374363 def test_unicodedecodeerror (self ):
375364 self .check_exceptionobjectargs (
@@ -399,12 +388,11 @@ def test_unicodetranslateerror(self):
399388 ["g\uffff rk" , 1 , 2 , "ouch" ],
400389 "can't translate character '\\ uffff' in position 1: ouch"
401390 )
402- if SIZEOF_WCHAR_T == 4 :
403- self .check_exceptionobjectargs (
404- UnicodeTranslateError ,
405- ["g\U00010000 rk" , 1 , 2 , "ouch" ],
406- "can't translate character '\\ U00010000' in position 1: ouch"
407- )
391+ self .check_exceptionobjectargs (
392+ UnicodeTranslateError ,
393+ ["g\U00010000 rk" , 1 , 2 , "ouch" ],
394+ "can't translate character '\\ U00010000' in position 1: ouch"
395+ )
408396 self .check_exceptionobjectargs (
409397 UnicodeTranslateError ,
410398 ["g\xfc rk" , 1 , 3 , "ouch" ],
@@ -431,6 +419,16 @@ def test_badandgoodstrictexceptions(self):
431419 codecs .strict_errors ,
432420 UnicodeEncodeError ("ascii" , "\u3042 " , 0 , 1 , "ouch" )
433421 )
422+ self .assertRaises (
423+ UnicodeDecodeError ,
424+ codecs .strict_errors ,
425+ UnicodeDecodeError ("ascii" , bytearray (b"\xff " ), 0 , 1 , "ouch" )
426+ )
427+ self .assertRaises (
428+ UnicodeTranslateError ,
429+ codecs .strict_errors ,
430+ UnicodeTranslateError ("\u3042 " , 0 , 1 , "ouch" )
431+ )
434432
435433 def test_badandgoodignoreexceptions (self ):
436434 # "ignore" complains about a non-exception passed in
@@ -527,13 +525,15 @@ def test_badandgoodxmlcharrefreplaceexceptions(self):
527525 UnicodeTranslateError ("\u3042 " , 0 , 1 , "ouch" )
528526 )
529527 # Use the correct exception
530- cs = (0 , 1 , 9 , 10 , 99 , 100 , 999 , 1000 , 9999 , 10000 , 0x3042 )
528+ cs = (0 , 1 , 9 , 10 , 99 , 100 , 999 , 1000 , 9999 , 10000 , 99999 , 100000 ,
529+ 999999 , 1000000 )
530+ cs += (0xd800 , 0xdfff )
531531 s = "" .join (chr (c ) for c in cs )
532532 self .assertEqual (
533533 codecs .xmlcharrefreplace_errors (
534534 UnicodeEncodeError ("ascii" , s , 0 , len (s ), "ouch" )
535535 ),
536- ("" .join ("&#%d;" % ord ( c ) for c in s ), len (s ))
536+ ("" .join ("&#%d;" % c for c in cs ), len (s ))
537537 )
538538
539539 def test_badandgoodbackslashreplaceexceptions (self ):
@@ -561,55 +561,138 @@ def test_badandgoodbackslashreplaceexceptions(self):
561561 UnicodeTranslateError ("\u3042 " , 0 , 1 , "ouch" )
562562 )
563563 # Use the correct exception
564- self .assertEqual (
565- codecs .backslashreplace_errors (
566- UnicodeEncodeError ("ascii" , "\u3042 " , 0 , 1 , "ouch" )),
567- ("\\ u3042" , 1 )
564+ tests = [
565+ ("\u3042 " , "\\ u3042" ),
566+ ("\n " , "\\ x0a" ),
567+ ("a" , "\\ x61" ),
568+ ("\x00 " , "\\ x00" ),
569+ ("\xff " , "\\ xff" ),
570+ ("\u0100 " , "\\ u0100" ),
571+ ("\uffff " , "\\ uffff" ),
572+ ("\U00010000 " , "\\ U00010000" ),
573+ ("\U0010ffff " , "\\ U0010ffff" ),
574+ # Lone surrogates
575+ ("\ud800 " , "\\ ud800" ),
576+ ("\udfff " , "\\ udfff" ),
577+ ("\ud800 \udfff " , "\\ ud800\\ udfff" ),
578+ ]
579+ for s , r in tests :
580+ with self .subTest (str = s ):
581+ self .assertEqual (
582+ codecs .backslashreplace_errors (
583+ UnicodeEncodeError ("ascii" , s , 0 , len (s ), "ouch" )),
584+ (r , len (s ))
585+ )
586+
587+ def test_badandgoodsurrogateescapeexceptions (self ):
588+ surrogateescape_errors = codecs .lookup_error ('surrogateescape' )
589+ # "surrogateescape" complains about a non-exception passed in
590+ self .assertRaises (
591+ TypeError ,
592+ surrogateescape_errors ,
593+ 42
568594 )
569- self .assertEqual (
570- codecs .backslashreplace_errors (
571- UnicodeEncodeError ("ascii" , "\x00 " , 0 , 1 , "ouch" )),
572- ("\\ x00" , 1 )
595+ # "surrogateescape" complains about the wrong exception types
596+ self .assertRaises (
597+ TypeError ,
598+ surrogateescape_errors ,
599+ UnicodeError ("ouch" )
573600 )
574- self .assertEqual (
575- codecs .backslashreplace_errors (
576- UnicodeEncodeError ("ascii" , "\xff " , 0 , 1 , "ouch" )),
577- ("\\ xff" , 1 )
601+ # "surrogateescape" can not be used for translating
602+ self .assertRaises (
603+ TypeError ,
604+ surrogateescape_errors ,
605+ UnicodeTranslateError ("\udc80 " , 0 , 1 , "ouch" )
578606 )
607+ # Use the correct exception
608+ for s in ("a" , "\udc7f " , "\udd00 " ):
609+ with self .subTest (str = s ):
610+ self .assertRaises (
611+ UnicodeEncodeError ,
612+ surrogateescape_errors ,
613+ UnicodeEncodeError ("ascii" , s , 0 , 1 , "ouch" )
614+ )
579615 self .assertEqual (
580- codecs .backslashreplace_errors (
581- UnicodeEncodeError ("ascii" , "\u0100 " , 0 , 1 , "ouch" )),
582- ("\\ u0100" , 1 )
616+ surrogateescape_errors (
617+ UnicodeEncodeError ("ascii" , "\udc80 " , 0 , 1 , "ouch" )),
618+ (b"\x80 " , 1 )
619+ )
620+ self .assertRaises (
621+ UnicodeDecodeError ,
622+ surrogateescape_errors ,
623+ UnicodeDecodeError ("ascii" , bytearray (b"a" ), 0 , 1 , "ouch" )
583624 )
584625 self .assertEqual (
585- codecs .backslashreplace_errors (
586- UnicodeEncodeError ("ascii" , "\uffff " , 0 , 1 , "ouch" )),
587- ("\\ uffff" , 1 )
588- )
589- if SIZEOF_WCHAR_T > 0 :
590- self .assertEqual (
591- codecs .backslashreplace_errors (
592- UnicodeEncodeError ("ascii" , "\U00010000 " ,
593- 0 , 1 , "ouch" )),
594- ("\\ U00010000" , 1 )
595- )
596- self .assertEqual (
597- codecs .backslashreplace_errors (
598- UnicodeEncodeError ("ascii" , "\U0010ffff " ,
599- 0 , 1 , "ouch" )),
600- ("\\ U0010ffff" , 1 )
601- )
602- # Lone surrogates (regardless of unicode width)
603- self .assertEqual (
604- codecs .backslashreplace_errors (
605- UnicodeEncodeError ("ascii" , "\ud800 " , 0 , 1 , "ouch" )),
606- ("\\ ud800" , 1 )
607- )
608- self .assertEqual (
609- codecs .backslashreplace_errors (
610- UnicodeEncodeError ("ascii" , "\udfff " , 0 , 1 , "ouch" )),
611- ("\\ udfff" , 1 )
612- )
626+ surrogateescape_errors (
627+ UnicodeDecodeError ("ascii" , bytearray (b"\x80 " ), 0 , 1 , "ouch" )),
628+ ("\udc80 " , 1 )
629+ )
630+
631+ def test_badandgoodsurrogatepassexceptions (self ):
632+ surrogatepass_errors = codecs .lookup_error ('surrogatepass' )
633+ # "surrogatepass" complains about a non-exception passed in
634+ self .assertRaises (
635+ TypeError ,
636+ surrogatepass_errors ,
637+ 42
638+ )
639+ # "surrogatepass" complains about the wrong exception types
640+ self .assertRaises (
641+ TypeError ,
642+ surrogatepass_errors ,
643+ UnicodeError ("ouch" )
644+ )
645+ # "surrogatepass" can not be used for translating
646+ self .assertRaises (
647+ TypeError ,
648+ surrogatepass_errors ,
649+ UnicodeTranslateError ("\ud800 " , 0 , 1 , "ouch" )
650+ )
651+ # Use the correct exception
652+ for enc in ("utf-8" , "utf-16le" , "utf-16be" , "utf-32le" , "utf-32be" ):
653+ with self .subTest (encoding = enc ):
654+ self .assertRaises (
655+ UnicodeEncodeError ,
656+ surrogatepass_errors ,
657+ UnicodeEncodeError (enc , "a" , 0 , 1 , "ouch" )
658+ )
659+ self .assertRaises (
660+ UnicodeDecodeError ,
661+ surrogatepass_errors ,
662+ UnicodeDecodeError (enc , "a" .encode (enc ), 0 , 1 , "ouch" )
663+ )
664+ tests = [
665+ ("ascii" , "\ud800 " , b'\xed \xa0 \x80 ' , 3 ),
666+ ("utf-8" , "\ud800 " , b'\xed \xa0 \x80 ' , 3 ),
667+ ("utf-16le" , "\ud800 " , b'\x00 \xd8 ' , 2 ),
668+ ("utf-16be" , "\ud800 " , b'\xd8 \x00 ' , 2 ),
669+ ("utf-32le" , "\ud800 " , b'\x00 \xd8 \x00 \x00 ' , 4 ),
670+ ("utf-32be" , "\ud800 " , b'\x00 \x00 \xd8 \x00 ' , 4 ),
671+ ("ascii" , "\udfff " , b'\xed \xbf \xbf ' , 3 ),
672+ ("utf-8" , "\udfff " , b'\xed \xbf \xbf ' , 3 ),
673+ ("utf-16le" , "\udfff " , b'\xff \xdf ' , 2 ),
674+ ("utf-16be" , "\udfff " , b'\xdf \xff ' , 2 ),
675+ ("utf-32le" , "\udfff " , b'\xff \xdf \x00 \x00 ' , 4 ),
676+ ("utf-32be" , "\udfff " , b'\x00 \x00 \xdf \xff ' , 4 ),
677+ ("ascii" , "\ud800 \udfff " , b'\xed \xa0 \x80 \xed \xbf \xbf ' , 3 ),
678+ ("utf-8" , "\ud800 \udfff " , b'\xed \xa0 \x80 \xed \xbf \xbf ' , 3 ),
679+ ("utf-16le" , "\ud800 \udfff " , b'\x00 \xd8 \xff \xdf ' , 2 ),
680+ ("utf-16be" , "\ud800 \udfff " , b'\xd8 \x00 \xdf \xff ' , 2 ),
681+ ("utf-32le" , "\ud800 \udfff " , b'\x00 \xd8 \x00 \x00 \xff \xdf \x00 \x00 ' , 4 ),
682+ ("utf-32be" , "\ud800 \udfff " , b'\x00 \x00 \xd8 \x00 \x00 \x00 \xdf \xff ' , 4 ),
683+ ]
684+ for enc , s , b , n in tests :
685+ with self .subTest (encoding = enc , str = s , bytes = b ):
686+ self .assertEqual (
687+ surrogatepass_errors (
688+ UnicodeEncodeError (enc , s , 0 , len (s ), "ouch" )),
689+ (b , len (s ))
690+ )
691+ self .assertEqual (
692+ surrogatepass_errors (
693+ UnicodeDecodeError (enc , bytearray (b [:n ]), 0 , n , "ouch" )),
694+ (s [:1 ], n )
695+ )
613696
614697 def test_badhandlerresults (self ):
615698 results = ( 42 , "foo" , (1 ,2 ,3 ), ("foo" , 1 , 3 ), ("foo" , None ), ("foo" ,), ("foo" , 1 , 3 ), ("foo" , None ), ("foo" ,) )
@@ -688,9 +771,8 @@ def test_xmlcharrefvalues(self):
688771 # enhance coverage of:
689772 # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
690773 # and inline implementations
691- v = (1 , 5 , 10 , 50 , 100 , 500 , 1000 , 5000 , 10000 , 50000 )
692- if SIZEOF_WCHAR_T == 4 :
693- v += (100000 , 500000 , 1000000 )
774+ v = (1 , 5 , 10 , 50 , 100 , 500 , 1000 , 5000 , 10000 , 50000 , 100000 ,
775+ 500000 , 1000000 )
694776 s = "" .join ([chr (x ) for x in v ])
695777 codecs .register_error ("test.xmlcharrefreplace" , codecs .xmlcharrefreplace_errors )
696778 for enc in ("ascii" , "iso-8859-15" ):
0 commit comments