@@ -1814,6 +1814,51 @@ def test_pickle_py2_array_latin1_hack(self):
18141814 # Should not segfault:
18151815 assert_raises (Exception , pickle .loads , data , encoding = 'koi8-r' )
18161816
1817+ def test_pickle_py2_scalar_latin1_hack (self ):
1818+ # Check that scalar unpickling hack in Py3 that supports
1819+ # encoding='latin1' work correctly.
1820+
1821+ # Python2 output for pickle.dumps(...)
1822+ datas = [
1823+ # (original, python2_pickle, koi8r_validity)
1824+ (np .unicode_ ('\u6bd2 ' ),
1825+ asbytes ("cnumpy.core.multiarray\n scalar\n p0\n (cnumpy\n dtype\n p1\n "
1826+ "(S'U1'\n p2\n I0\n I1\n tp3\n Rp4\n (I3\n S'<'\n p5\n NNNI4\n I4\n I0\n "
1827+ "tp6\n bS'\\ xd2k\\ x00\\ x00'\n p7\n tp8\n Rp9\n ." ),
1828+ 'invalid' ),
1829+
1830+ (np .float64 (9e123 ),
1831+ asbytes ("cnumpy.core.multiarray\n scalar\n p0\n (cnumpy\n dtype\n p1\n (S'f8'\n "
1832+ "p2\n I0\n I1\n tp3\n Rp4\n (I3\n S'<'\n p5\n NNNI-1\n I-1\n I0\n tp6\n "
1833+ "bS'O\\ x81\\ xb7Z\\ xaa:\\ xabY'\n p7\n tp8\n Rp9\n ." ),
1834+ 'invalid' ),
1835+
1836+ (np .bytes_ (asbytes ('\x9c ' )), # different 8-bit code point in KOI8-R vs latin1
1837+ asbytes ("cnumpy.core.multiarray\n scalar\n p0\n (cnumpy\n dtype\n p1\n (S'S1'\n p2\n "
1838+ "I0\n I1\n tp3\n Rp4\n (I3\n S'|'\n p5\n NNNI1\n I1\n I0\n tp6\n bS'\\ x9c'\n p7\n "
1839+ "tp8\n Rp9\n ." ),
1840+ 'different' ),
1841+ ]
1842+ if sys .version_info [0 ] >= 3 :
1843+ for original , data , koi8r_validity in datas :
1844+ result = pickle .loads (data , encoding = 'latin1' )
1845+ assert_equal (result , original )
1846+
1847+ # Decoding under non-latin1 encoding (e.g.) KOI8-R can
1848+ # produce bad results, but should not segfault.
1849+ if koi8r_validity == 'different' :
1850+ # Unicode code points happen to lie within latin1,
1851+ # but are different in koi8-r, resulting to silent
1852+ # bogus results
1853+ result = pickle .loads (data , encoding = 'koi8-r' )
1854+ assert_ (result != original )
1855+ elif koi8r_validity == 'invalid' :
1856+ # Unicode code points outside latin1, so results
1857+ # to an encoding exception
1858+ assert_raises (ValueError , pickle .loads , data , encoding = 'koi8-r' )
1859+ else :
1860+ raise ValueError (koi8r_validity )
1861+
18171862 def test_structured_type_to_object (self ):
18181863 a_rec = np .array ([(0 , 1 ), (3 , 2 )], dtype = 'i4,i8' )
18191864 a_obj = np .empty ((2 ,), dtype = object )
0 commit comments