2323 verify (repr (u"'\" " ) == """u'\\ '"'""" )
2424 verify (repr (u"'" ) == '''u"'"''' )
2525 verify (repr (u'"' ) == """u'"'""" )
26- verify (repr (u'' .join (map (unichr , range (256 )))) ==
27- "u'\\ x00\\ x01\\ x02\\ x03\\ x04\\ x05\\ x06\\ x07\\ x08\\ t\\ n\\ x0b\\ x0c\\ r"
28- "\\ x0e\\ x0f\\ x10\\ x11\\ x12\\ x13\\ x14\\ x15\\ x16\\ x17\\ x18\\ x19\\ x1a"
29- "\\ x1b\\ x1c\\ x1d\\ x1e\\ x1f !\" #$%&\\ '()*+,-./0123456789:;<=>?@ABCDEFGHI"
30- "JKLMNOPQRSTUVWXYZ[\\ \\ ]^_`abcdefghijklmnopqrstuvwxyz{|}~\\ x7f"
31- "\\ x80\\ x81\\ x82\\ x83\\ x84\\ x85\\ x86\\ x87\\ x88\\ x89\\ x8a\\ x8b\\ x8c\\ x8d"
32- "\\ x8e\\ x8f\\ x90\\ x91\\ x92\\ x93\\ x94\\ x95\\ x96\\ x97\\ x98\\ x99\\ x9a\\ x9b"
33- "\\ x9c\\ x9d\\ x9e\\ x9f\\ xa0\\ xa1\\ xa2\\ xa3\\ xa4\\ xa5\\ xa6\\ xa7\\ xa8\\ xa9"
34- "\\ xaa\\ xab\\ xac\\ xad\\ xae\\ xaf\\ xb0\\ xb1\\ xb2\\ xb3\\ xb4\\ xb5\\ xb6\\ xb7"
35- "\\ xb8\\ xb9\\ xba\\ xbb\\ xbc\\ xbd\\ xbe\\ xbf\\ xc0\\ xc1\\ xc2\\ xc3\\ xc4\\ xc5"
36- "\\ xc6\\ xc7\\ xc8\\ xc9\\ xca\\ xcb\\ xcc\\ xcd\\ xce\\ xcf\\ xd0\\ xd1\\ xd2\\ xd3"
37- "\\ xd4\\ xd5\\ xd6\\ xd7\\ xd8\\ xd9\\ xda\\ xdb\\ xdc\\ xdd\\ xde\\ xdf\\ xe0\\ xe1"
38- "\\ xe2\\ xe3\\ xe4\\ xe5\\ xe6\\ xe7\\ xe8\\ xe9\\ xea\\ xeb\\ xec\\ xed\\ xee\\ xef"
39- "\\ xf0\\ xf1\\ xf2\\ xf3\\ xf4\\ xf5\\ xf6\\ xf7\\ xf8\\ xf9\\ xfa\\ xfb\\ xfc\\ xfd"
40- "\\ xfe\\ xff'" )
26+ latin1repr = (
27+ "u'\\ x00\\ x01\\ x02\\ x03\\ x04\\ x05\\ x06\\ x07\\ x08\\ t\\ n\\ x0b\\ x0c\\ r"
28+ "\\ x0e\\ x0f\\ x10\\ x11\\ x12\\ x13\\ x14\\ x15\\ x16\\ x17\\ x18\\ x19\\ x1a"
29+ "\\ x1b\\ x1c\\ x1d\\ x1e\\ x1f !\" #$%&\\ '()*+,-./0123456789:;<=>?@ABCDEFGHI"
30+ "JKLMNOPQRSTUVWXYZ[\\ \\ ]^_`abcdefghijklmnopqrstuvwxyz{|}~\\ x7f"
31+ "\\ x80\\ x81\\ x82\\ x83\\ x84\\ x85\\ x86\\ x87\\ x88\\ x89\\ x8a\\ x8b\\ x8c\\ x8d"
32+ "\\ x8e\\ x8f\\ x90\\ x91\\ x92\\ x93\\ x94\\ x95\\ x96\\ x97\\ x98\\ x99\\ x9a\\ x9b"
33+ "\\ x9c\\ x9d\\ x9e\\ x9f\\ xa0\\ xa1\\ xa2\\ xa3\\ xa4\\ xa5\\ xa6\\ xa7\\ xa8\\ xa9"
34+ "\\ xaa\\ xab\\ xac\\ xad\\ xae\\ xaf\\ xb0\\ xb1\\ xb2\\ xb3\\ xb4\\ xb5\\ xb6\\ xb7"
35+ "\\ xb8\\ xb9\\ xba\\ xbb\\ xbc\\ xbd\\ xbe\\ xbf\\ xc0\\ xc1\\ xc2\\ xc3\\ xc4\\ xc5"
36+ "\\ xc6\\ xc7\\ xc8\\ xc9\\ xca\\ xcb\\ xcc\\ xcd\\ xce\\ xcf\\ xd0\\ xd1\\ xd2\\ xd3"
37+ "\\ xd4\\ xd5\\ xd6\\ xd7\\ xd8\\ xd9\\ xda\\ xdb\\ xdc\\ xdd\\ xde\\ xdf\\ xe0\\ xe1"
38+ "\\ xe2\\ xe3\\ xe4\\ xe5\\ xe6\\ xe7\\ xe8\\ xe9\\ xea\\ xeb\\ xec\\ xed\\ xee\\ xef"
39+ "\\ xf0\\ xf1\\ xf2\\ xf3\\ xf4\\ xf5\\ xf6\\ xf7\\ xf8\\ xf9\\ xfa\\ xfb\\ xfc\\ xfd"
40+ "\\ xfe\\ xff'" )
41+ testrepr = repr (u'' .join (map (unichr , range (256 ))))
42+ verify (testrepr == latin1repr )
4143
4244def test (method , input , output , * args ):
4345 if verbose :
@@ -495,6 +497,7 @@ def __str__(self):
495497verify (unicode ('+3ADYAA-' , 'utf-7' , 'replace' ) == u'\ufffd ' )
496498
497499# UTF-8 specific encoding tests:
500+ verify (u'' .encode ('utf-8' ) == '' )
498501verify (u'\u20ac ' .encode ('utf-8' ) == '\xe2 \x82 \xac ' )
499502verify (u'\ud800 \udc02 ' .encode ('utf-8' ) == '\xf0 \x90 \x80 \x82 ' )
500503verify (u'\ud84d \udc56 ' .encode ('utf-8' ) == '\xf0 \xa3 \x91 \x96 ' )
@@ -552,14 +555,7 @@ def __str__(self):
552555 'raw_unicode_escape' , 'unicode_escape' , 'unicode_internal' ):
553556 verify (unicode (u .encode (encoding ),encoding ) == u )
554557
555- # Roundtrip safety for non-BMP (just a few chars)
556- u = u'\U00010001 \U00020002 \U00030003 \U00040004 \U00050005 '
557- for encoding in ('utf-8' ,
558- 'utf-16' , 'utf-16-le' , 'utf-16-be' ,
559- #'raw_unicode_escape',
560- 'unicode_escape' , 'unicode_internal' ):
561- verify (unicode (u .encode (encoding ),encoding ) == u )
562-
558+ # Roundtrip safety for BMP (just the first 256 chars)
563559u = u'' .join (map (unichr , range (256 )))
564560for encoding in (
565561 'latin-1' ,
@@ -571,6 +567,7 @@ def __str__(self):
571567 except ValueError ,why :
572568 print '*** codec for "%s" failed: %s' % (encoding , why )
573569
570+ # Roundtrip safety for BMP (just the first 128 chars)
574571u = u'' .join (map (unichr , range (128 )))
575572for encoding in (
576573 'ascii' ,
@@ -582,6 +579,19 @@ def __str__(self):
582579 except ValueError ,why :
583580 print '*** codec for "%s" failed: %s' % (encoding , why )
584581
582+ # Roundtrip safety for non-BMP (just a few chars)
583+ u = u'\U00010001 \U00020002 \U00030003 \U00040004 \U00050005 '
584+ for encoding in ('utf-8' ,
585+ 'utf-16' , 'utf-16-le' , 'utf-16-be' ,
586+ #'raw_unicode_escape',
587+ 'unicode_escape' , 'unicode_internal' ):
588+ verify (unicode (u .encode (encoding ),encoding ) == u )
589+
590+ # UTF-8 must be roundtrip safe for all UCS-2 code points
591+ u = u'' .join (map (unichr , range (0x10000 )))
592+ for encoding in ('utf-8' ,):
593+ verify (unicode (u .encode (encoding ),encoding ) == u )
594+
585595print 'done.'
586596
587597print 'Testing standard mapping codecs...' ,
0 commit comments