@@ -549,6 +549,182 @@ def test_readlines(self):
549549 f = self .reader (self .stream )
550550 self .assertEquals (f .readlines (), [u'\ud55c \n ' , u'\uae00 ' ])
551551
552+ all_unicode_encodings = [
553+ "ascii" ,
554+ "base64_codec" ,
555+ "big5" ,
556+ "big5hkscs" ,
557+ "charmap" ,
558+ "cp037" ,
559+ "cp1006" ,
560+ "cp1026" ,
561+ "cp1140" ,
562+ "cp1250" ,
563+ "cp1251" ,
564+ "cp1252" ,
565+ "cp1253" ,
566+ "cp1254" ,
567+ "cp1255" ,
568+ "cp1256" ,
569+ "cp1257" ,
570+ "cp1258" ,
571+ "cp424" ,
572+ "cp437" ,
573+ "cp500" ,
574+ "cp737" ,
575+ "cp775" ,
576+ "cp850" ,
577+ "cp852" ,
578+ "cp855" ,
579+ "cp856" ,
580+ "cp857" ,
581+ "cp860" ,
582+ "cp861" ,
583+ "cp862" ,
584+ "cp863" ,
585+ "cp864" ,
586+ "cp865" ,
587+ "cp866" ,
588+ "cp869" ,
589+ "cp874" ,
590+ "cp875" ,
591+ "cp932" ,
592+ "cp949" ,
593+ "cp950" ,
594+ "euc_jis_2004" ,
595+ "euc_jisx0213" ,
596+ "euc_jp" ,
597+ "euc_kr" ,
598+ "gb18030" ,
599+ "gb2312" ,
600+ "gbk" ,
601+ "hex_codec" ,
602+ "hp_roman8" ,
603+ "hz" ,
604+ "idna" ,
605+ "iso2022_jp" ,
606+ "iso2022_jp_1" ,
607+ "iso2022_jp_2" ,
608+ "iso2022_jp_2004" ,
609+ "iso2022_jp_3" ,
610+ "iso2022_jp_ext" ,
611+ "iso2022_kr" ,
612+ "iso8859_1" ,
613+ "iso8859_10" ,
614+ "iso8859_11" ,
615+ "iso8859_13" ,
616+ "iso8859_14" ,
617+ "iso8859_15" ,
618+ "iso8859_16" ,
619+ "iso8859_2" ,
620+ "iso8859_3" ,
621+ "iso8859_4" ,
622+ "iso8859_5" ,
623+ "iso8859_6" ,
624+ "iso8859_7" ,
625+ "iso8859_8" ,
626+ "iso8859_9" ,
627+ "johab" ,
628+ "koi8_r" ,
629+ "koi8_u" ,
630+ "latin_1" ,
631+ "mac_cyrillic" ,
632+ "mac_greek" ,
633+ "mac_iceland" ,
634+ "mac_latin2" ,
635+ "mac_roman" ,
636+ "mac_turkish" ,
637+ "palmos" ,
638+ "ptcp154" ,
639+ "punycode" ,
640+ "raw_unicode_escape" ,
641+ "rot_13" ,
642+ "shift_jis" ,
643+ "shift_jis_2004" ,
644+ "shift_jisx0213" ,
645+ "tis_620" ,
646+ "unicode_escape" ,
647+ "unicode_internal" ,
648+ "utf_16" ,
649+ "utf_16_be" ,
650+ "utf_16_le" ,
651+ "utf_7" ,
652+ "utf_8" ,
653+ ]
654+
655+ if hasattr (codecs , "mbcs_encode" ):
656+ all_unicode_encodings .append ("mbcs" )
657+
658+ # The following encodings work only with str, not unicode
659+ all_string_encodings = [
660+ "quopri_codec" ,
661+ "string_escape" ,
662+ "uu_codec" ,
663+ ]
664+
665+ # The following encoding is not tested, because it's not supposed
666+ # to work:
667+ # "undefined"
668+
669+ # The following encodings don't work in stateful mode
670+ broken_unicode_with_streams = [
671+ "base64_codec" ,
672+ "hex_codec" ,
673+ "punycode" ,
674+ "unicode_internal"
675+ ]
676+
677+ try :
678+ import bz2
679+ except ImportError :
680+ pass
681+ else :
682+ all_unicode_encodings .append ("bz2_codec" )
683+ broken_unicode_with_streams .append ("bz2_codec" )
684+
685+ try :
686+ import zlib
687+ except ImportError :
688+ pass
689+ else :
690+ all_unicode_encodings .append ("zlib_codec" )
691+ broken_unicode_with_streams .append ("zlib_codec" )
692+
693+ class BasicUnicodeTest (unittest .TestCase ):
694+ def test_basics (self ):
695+ s = u"abc123" # all codecs should be able to encode these
696+ for encoding in all_unicode_encodings :
697+ (bytes , size ) = codecs .getencoder (encoding )(s )
698+ if encoding != "unicode_internal" :
699+ self .assertEqual (size , len (s ), "%r != %r (encoding=%r)" % (size , len (s ), encoding ))
700+ (chars , size ) = codecs .getdecoder (encoding )(bytes )
701+ self .assertEqual (chars , s , "%r != %r (encoding=%r)" % (chars , s , encoding ))
702+
703+ if encoding not in broken_unicode_with_streams :
704+ # check stream reader/writer
705+ q = Queue ()
706+ writer = codecs .getwriter (encoding )(q )
707+ encodedresult = ""
708+ for c in s :
709+ writer .write (c )
710+ encodedresult += q .read ()
711+ q = Queue ()
712+ reader = codecs .getreader (encoding )(q )
713+ decodedresult = u""
714+ for c in encodedresult :
715+ q .write (c )
716+ decodedresult += reader .read ()
717+ self .assertEqual (decodedresult , s , "%r != %r (encoding=%r)" % (decodedresult , s , encoding ))
718+
719+ class BasicStrTest (unittest .TestCase ):
720+ def test_basics (self ):
721+ s = "abc123"
722+ for encoding in all_string_encodings :
723+ (bytes , size ) = codecs .getencoder (encoding )(s )
724+ self .assertEqual (size , len (s ))
725+ (chars , size ) = codecs .getdecoder (encoding )(bytes )
726+ self .assertEqual (chars , s , "%r != %r (encoding=%r)" % (chars , s , encoding ))
727+
552728def test_main ():
553729 test_support .run_unittest (
554730 UTF16Test ,
@@ -561,7 +737,9 @@ def test_main():
561737 NameprepTest ,
562738 CodecTest ,
563739 CodecsModuleTest ,
564- StreamReaderTest
740+ StreamReaderTest ,
741+ BasicUnicodeTest ,
742+ BasicStrTest
565743 )
566744
567745
0 commit comments