Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4cbced2

Browse files
committed
Merge pull request #4883 from pv/fix-scalar-unpickle-py3
ENH: core: add hack enabling unpickling Py2 pickled scalars on Py3 under encoding='latin1'
2 parents 4e32035 + fcce027 commit 4cbced2

2 files changed

Lines changed: 64 additions & 1 deletion

File tree

numpy/core/src/multiarray/multiarraymodule.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1839,7 +1839,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
18391839

18401840
static char *kwlist[] = {"dtype","obj", NULL};
18411841
PyArray_Descr *typecode;
1842-
PyObject *obj = NULL;
1842+
PyObject *obj = NULL, *tmpobj = NULL;
18431843
int alloc = 0;
18441844
void *dptr;
18451845
PyObject *ret;
@@ -1871,14 +1871,31 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
18711871
alloc = 1;
18721872
}
18731873
else {
1874+
#if defined(NPY_PY3K)
1875+
/* Backward compatibility with Python 2 Numpy pickles */
1876+
if (PyUnicode_Check(obj)) {
1877+
tmpobj = PyUnicode_AsLatin1String(obj);
1878+
obj = tmpobj;
1879+
if (tmpobj == NULL) {
1880+
/* More informative error message */
1881+
PyErr_SetString(PyExc_ValueError,
1882+
("Failed to encode Numpy scalar data string to latin1. "
1883+
"pickle.load(a, encoding='latin1') is assumed if unpickling."));
1884+
return NULL;
1885+
}
1886+
}
1887+
#endif
1888+
18741889
if (!PyString_Check(obj)) {
18751890
PyErr_SetString(PyExc_TypeError,
18761891
"initializing object must be a string");
1892+
Py_XDECREF(tmpobj);
18771893
return NULL;
18781894
}
18791895
if (PyString_GET_SIZE(obj) < typecode->elsize) {
18801896
PyErr_SetString(PyExc_ValueError,
18811897
"initialization string is too small");
1898+
Py_XDECREF(tmpobj);
18821899
return NULL;
18831900
}
18841901
dptr = PyString_AS_STRING(obj);
@@ -1890,6 +1907,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
18901907
if (alloc) {
18911908
PyArray_free(dptr);
18921909
}
1910+
Py_XDECREF(tmpobj);
18931911
return ret;
18941912
}
18951913

numpy/core/tests/test_regression.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,6 +1814,51 @@ def test_pickle_py2_array_latin1_hack(self):
18141814
# Should not segfault:
18151815
assert_raises(Exception, pickle.loads, data, encoding='koi8-r')
18161816

1817+
def test_pickle_py2_scalar_latin1_hack(self):
1818+
# Check that scalar unpickling hack in Py3 that supports
1819+
# encoding='latin1' work correctly.
1820+
1821+
# Python2 output for pickle.dumps(...)
1822+
datas = [
1823+
# (original, python2_pickle, koi8r_validity)
1824+
(np.unicode_('\u6bd2'),
1825+
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
1826+
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\nI0\n"
1827+
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
1828+
'invalid'),
1829+
1830+
(np.float64(9e123),
1831+
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
1832+
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI-1\nI-1\nI0\ntp6\n"
1833+
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
1834+
'invalid'),
1835+
1836+
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
1837+
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
1838+
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
1839+
"tp8\nRp9\n."),
1840+
'different'),
1841+
]
1842+
if sys.version_info[0] >= 3:
1843+
for original, data, koi8r_validity in datas:
1844+
result = pickle.loads(data, encoding='latin1')
1845+
assert_equal(result, original)
1846+
1847+
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
1848+
# produce bad results, but should not segfault.
1849+
if koi8r_validity == 'different':
1850+
# Unicode code points happen to lie within latin1,
1851+
# but are different in koi8-r, resulting to silent
1852+
# bogus results
1853+
result = pickle.loads(data, encoding='koi8-r')
1854+
assert_(result != original)
1855+
elif koi8r_validity == 'invalid':
1856+
# Unicode code points outside latin1, so results
1857+
# to an encoding exception
1858+
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
1859+
else:
1860+
raise ValueError(koi8r_validity)
1861+
18171862
def test_structured_type_to_object(self):
18181863
a_rec = np.array([(0, 1), (3, 2)], dtype='i4,i8')
18191864
a_obj = np.empty((2,), dtype=object)

0 commit comments

Comments
 (0)