@@ -679,8 +679,9 @@ def process_word(self):
679679 @classmethod
680680 def lookupTestDecoder (cls , name ):
681681 if cls .codecEnabled and name == 'test_decoder' :
682+ latin1 = codecs .lookup ('latin-1' )
682683 return codecs .CodecInfo (
683- name = 'test_decoder' , encode = None , decode = None ,
684+ name = 'test_decoder' , encode = latin1 . encode , decode = None ,
684685 incrementalencoder = None ,
685686 streamreader = None , streamwriter = None ,
686687 incrementaldecoder = cls )
@@ -840,8 +841,11 @@ def testNewlines(self):
840841 [ '\r \n ' , [ "unix\n windows\r \n " , "os9\r last\n nonl" ] ],
841842 [ '\r ' , [ "unix\n windows\r " , "\n os9\r " , "last\n nonl" ] ],
842843 ]
843-
844- encodings = ('utf-8' , 'latin-1' )
844+ encodings = (
845+ 'utf-8' , 'latin-1' ,
846+ 'utf-16' , 'utf-16-le' , 'utf-16-be' ,
847+ 'utf-32' , 'utf-32-le' , 'utf-32-be' ,
848+ )
845849
846850 # Try a range of buffer sizes to test the case where \r is the last
847851 # character in TextIOWrapper._pending_line.
@@ -1195,56 +1199,84 @@ def test_issue2282(self):
11951199
11961200 self .assertEqual (buffer .seekable (), txt .seekable ())
11971201
1198- def test_newline_decoder (self ):
1199- import codecs
1200- decoder = codecs .getincrementaldecoder ("utf-8" )()
1201- decoder = io .IncrementalNewlineDecoder (decoder , translate = True )
1202+ def check_newline_decoder_utf8 (self , decoder ):
1203+ # UTF-8 specific tests for a newline decoder
1204+ def _check_decode (b , s , ** kwargs ):
1205+ # We exercise getstate() / setstate() as well as decode()
1206+ state = decoder .getstate ()
1207+ self .assertEquals (decoder .decode (b , ** kwargs ), s )
1208+ decoder .setstate (state )
1209+ self .assertEquals (decoder .decode (b , ** kwargs ), s )
12021210
1203- self . assertEquals ( decoder . decode ( b'\xe8 \xa2 \x88 ' ) , "\u8888 " )
1211+ _check_decode ( b'\xe8 \xa2 \x88 ' , "\u8888 " )
12041212
1205- self . assertEquals ( decoder . decode ( b'\xe8 ' ) , "" )
1206- self . assertEquals ( decoder . decode ( b'\xa2 ' ) , "" )
1207- self . assertEquals ( decoder . decode ( b'\x88 ' ) , "\u8888 " )
1213+ _check_decode ( b'\xe8 ' , "" )
1214+ _check_decode ( b'\xa2 ' , "" )
1215+ _check_decode ( b'\x88 ' , "\u8888 " )
12081216
1209- self .assertEquals (decoder .decode (b'\xe8 ' ), "" )
1210- self .assertRaises (UnicodeDecodeError , decoder .decode , b'' , final = True )
1217+ _check_decode (b'\xe8 ' , "" )
1218+ _check_decode (b'\xa2 ' , "" )
1219+ _check_decode (b'\x88 ' , "\u8888 " )
12111220
1212- decoder .setstate ((b'' , 0 ))
1213- self .assertEquals (decoder .decode (b'\n ' ), "\n " )
1214- self .assertEquals (decoder .decode (b'\r ' ), "" )
1215- self .assertEquals (decoder .decode (b'' , final = True ), "\n " )
1216- self .assertEquals (decoder .decode (b'\r ' , final = True ), "\n " )
1217-
1218- self .assertEquals (decoder .decode (b'\r ' ), "" )
1219- self .assertEquals (decoder .decode (b'a' ), "\n a" )
1220-
1221- self .assertEquals (decoder .decode (b'\r \r \n ' ), "\n \n " )
1222- self .assertEquals (decoder .decode (b'\r ' ), "" )
1223- self .assertEquals (decoder .decode (b'\r ' ), "\n " )
1224- self .assertEquals (decoder .decode (b'\n a' ), "\n a" )
1225-
1226- self .assertEquals (decoder .decode (b'\xe8 \xa2 \x88 \r \n ' ), "\u8888 \n " )
1227- self .assertEquals (decoder .decode (b'\xe8 \xa2 \x88 ' ), "\u8888 " )
1228- self .assertEquals (decoder .decode (b'\n ' ), "\n " )
1229- self .assertEquals (decoder .decode (b'\xe8 \xa2 \x88 \r ' ), "\u8888 " )
1230- self .assertEquals (decoder .decode (b'\n ' ), "\n " )
1221+ _check_decode (b'\xe8 ' , "" )
1222+ self .assertRaises (UnicodeDecodeError , decoder .decode , b'' , final = True )
12311223
1232- decoder = codecs .getincrementaldecoder ("utf-8" )()
1233- decoder = io .IncrementalNewlineDecoder (decoder , translate = True )
1224+ decoder .reset ()
1225+ _check_decode (b'\n ' , "\n " )
1226+ _check_decode (b'\r ' , "" )
1227+ _check_decode (b'' , "\n " , final = True )
1228+ _check_decode (b'\r ' , "\n " , final = True )
1229+
1230+ _check_decode (b'\r ' , "" )
1231+ _check_decode (b'a' , "\n a" )
1232+
1233+ _check_decode (b'\r \r \n ' , "\n \n " )
1234+ _check_decode (b'\r ' , "" )
1235+ _check_decode (b'\r ' , "\n " )
1236+ _check_decode (b'\n a' , "\n a" )
1237+
1238+ _check_decode (b'\xe8 \xa2 \x88 \r \n ' , "\u8888 \n " )
1239+ _check_decode (b'\xe8 \xa2 \x88 ' , "\u8888 " )
1240+ _check_decode (b'\n ' , "\n " )
1241+ _check_decode (b'\xe8 \xa2 \x88 \r ' , "\u8888 " )
1242+ _check_decode (b'\n ' , "\n " )
1243+
1244+ def check_newline_decoder (self , decoder , encoding ):
1245+ result = []
1246+ encoder = codecs .getincrementalencoder (encoding )()
1247+ def _decode_bytewise (s ):
1248+ for b in encoder .encode (s ):
1249+ result .append (decoder .decode (bytes ([b ])))
12341250 self .assertEquals (decoder .newlines , None )
1235- decoder . decode ( b "abc\n \r " )
1251+ _decode_bytewise ( "abc\n \r " )
12361252 self .assertEquals (decoder .newlines , '\n ' )
1237- decoder . decode ( b "\n abc" )
1253+ _decode_bytewise ( "\n abc" )
12381254 self .assertEquals (decoder .newlines , ('\n ' , '\r \n ' ))
1239- decoder . decode ( b "abc\r " )
1255+ _decode_bytewise ( "abc\r " )
12401256 self .assertEquals (decoder .newlines , ('\n ' , '\r \n ' ))
1241- decoder . decode ( b "abc" )
1257+ _decode_bytewise ( "abc" )
12421258 self .assertEquals (decoder .newlines , ('\r ' , '\n ' , '\r \n ' ))
1243- decoder .decode (b"abc\r " )
1259+ _decode_bytewise ("abc\r " )
1260+ self .assertEquals ("" .join (result ), "abc\n \n abcabc\n abcabc" )
12441261 decoder .reset ()
1245- self .assertEquals (decoder .decode (b "abc" ), "abc" )
1262+ self .assertEquals (decoder .decode ("abc" . encode ( encoding ) ), "abc" )
12461263 self .assertEquals (decoder .newlines , None )
12471264
1265+ def test_newline_decoder (self ):
1266+ encodings = (
1267+ 'utf-8' , 'latin-1' ,
1268+ 'utf-16' , 'utf-16-le' , 'utf-16-be' ,
1269+ 'utf-32' , 'utf-32-le' , 'utf-32-be' ,
1270+ )
1271+ for enc in encodings :
1272+ decoder = codecs .getincrementaldecoder (enc )()
1273+ decoder = io .IncrementalNewlineDecoder (decoder , translate = True )
1274+ self .check_newline_decoder (decoder , enc )
1275+ decoder = codecs .getincrementaldecoder ("utf-8" )()
1276+ decoder = io .IncrementalNewlineDecoder (decoder , translate = True )
1277+ self .check_newline_decoder_utf8 (decoder )
1278+
1279+
12481280# XXX Tests for open()
12491281
12501282class MiscIOTest (unittest .TestCase ):
0 commit comments