-
-
Notifications
You must be signed in to change notification settings - Fork 10.9k
DEP: issue deprecation warning when creating ragged array (NEP 34) #14794
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
61bd4c2
fa2aa53
e6168f1
7bc36c2
58282d1
ca5ae64
576fc47
d4167c2
25df5f1
9b4d0ac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
Deprecate automatic ``dtype=object`` for ragged input | ||
----------------------------------------------------- | ||
Calling ``np.array([[1, [1, 2, 3]])`` will issue a ``DeprecationWarning`` as | ||
per `NEP 34`_. Users should explicitly use ``dtype=object`` to avoid the | ||
warning. | ||
|
||
.. _`NEP 34`: https://numpy.org/neps/nep-0034.html |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -713,14 +713,20 @@ discover_itemsize(PyObject *s, int nd, int *itemsize, int string_type) | |
return 0; | ||
} | ||
|
||
typedef enum { | ||
DISCOVERED_OK = 0, | ||
DISCOVERED_RAGGED = 1, | ||
DISCOVERED_OBJECT = 2 | ||
} discovered_t; | ||
eric-wieser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/* | ||
* Take an arbitrary object and discover how many dimensions it | ||
* has, filling in the dimensions as we go. | ||
*/ | ||
static int | ||
discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it, | ||
int stop_at_string, int stop_at_tuple, | ||
int *out_is_object) | ||
discovered_t *out_is_object) | ||
{ | ||
PyObject *e; | ||
npy_intp n, i; | ||
|
@@ -906,7 +912,7 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it, | |
if (PyErr_ExceptionMatches(PyExc_KeyError)) { | ||
eric-wieser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
PyErr_Clear(); | ||
*maxndim = 0; | ||
*out_is_object = 1; | ||
*out_is_object = DISCOVERED_OBJECT; | ||
return 0; | ||
} | ||
else { | ||
|
@@ -965,7 +971,7 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it, | |
*maxndim = all_elems_maxndim + 1; | ||
if (!all_dimensions_match) { | ||
/* typically results in an array containing variable-length lists */ | ||
*out_is_object = 1; | ||
*out_is_object = DISCOVERED_RAGGED; | ||
} | ||
} | ||
|
||
|
@@ -1809,7 +1815,7 @@ PyArray_GetArrayParamsFromObject(PyObject *op, | |
|
||
/* Try to treat op as a list of lists or array-like objects. */ | ||
if (!writeable && PySequence_Check(op)) { | ||
int check_it, stop_at_string, stop_at_tuple, is_object; | ||
int check_it, stop_at_string, stop_at_tuple; | ||
int type_num, type; | ||
|
||
/* | ||
|
@@ -1859,7 +1865,7 @@ PyArray_GetArrayParamsFromObject(PyObject *op, | |
((*out_dtype)->names || (*out_dtype)->subarray)); | ||
|
||
*out_ndim = NPY_MAXDIMS; | ||
is_object = 0; | ||
discovered_t is_object = DISCOVERED_OK; | ||
if (discover_dimensions( | ||
op, out_ndim, out_dims, check_it, | ||
stop_at_string, stop_at_tuple, &is_object) < 0) { | ||
|
@@ -1876,7 +1882,17 @@ PyArray_GetArrayParamsFromObject(PyObject *op, | |
return 0; | ||
} | ||
/* If object arrays are forced */ | ||
if (is_object) { | ||
if (is_object != DISCOVERED_OK) { | ||
if (is_object == DISCOVERED_RAGGED && requested_dtype == NULL) { | ||
/* NumPy 1.18, 2019-11-01 */ | ||
eric-wieser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (DEPRECATE("Creating an ndarray with automatic object " | ||
"dtype is deprecated, use dtype=object if you intended " | ||
"it, otherwise specify an exact dtype") < 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could style nit, that the indentation should be deeper, but please ignore if you want. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will try to sneak it into another PR |
||
{ | ||
return -1; | ||
} | ||
} | ||
/* either DISCOVERED_OBJECT or there is a requested_dtype */ | ||
Py_DECREF(*out_dtype); | ||
*out_dtype = PyArray_DescrFromType(NPY_OBJECT); | ||
if (*out_dtype == NULL) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -447,7 +447,7 @@ def test_array(self): | |
assert_equal(r, np.ones((2, 6, 6))) | ||
|
||
d = np.ones((6, )) | ||
r = np.array([[d, d + 1], d + 2]) | ||
r = np.array([[d, d + 1], d + 2], dtype=object) | ||
assert_equal(len(r), 2) | ||
assert_equal(r[0], [d, d + 1]) | ||
assert_equal(r[1], d + 2) | ||
|
@@ -1073,34 +1073,60 @@ def test_array_too_big(self): | |
assert_raises(ValueError, np.ndarray, buffer=buf, strides=(0,), | ||
shape=(max_bytes//itemsize + 1,), dtype=dtype) | ||
|
||
def test_jagged_ndim_object(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are the real NEP 34 tests (from here down in this file): they test that the behaviour has not changed when using the |
||
def _ragged_creation(self, seq): | ||
# without dtype=object, the ragged object should raise | ||
with assert_warns(DeprecationWarning): | ||
a = np.array(seq) | ||
b = np.array(seq, dtype=object) | ||
assert_equal(a, b) | ||
return b | ||
eric-wieser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def test_ragged_ndim_object(self): | ||
# Lists of mismatching depths are treated as object arrays | ||
a = np.array([[1], 2, 3]) | ||
a = self._ragged_creation([[1], 2, 3]) | ||
assert_equal(a.shape, (3,)) | ||
assert_equal(a.dtype, object) | ||
|
||
a = np.array([1, [2], 3]) | ||
a = self._ragged_creation([1, [2], 3]) | ||
assert_equal(a.shape, (3,)) | ||
assert_equal(a.dtype, object) | ||
|
||
a = np.array([1, 2, [3]]) | ||
a = self._ragged_creation([1, 2, [3]]) | ||
assert_equal(a.shape, (3,)) | ||
assert_equal(a.dtype, object) | ||
|
||
def test_jagged_shape_object(self): | ||
def test_ragged_shape_object(self): | ||
# The jagged dimension of a list is turned into an object array | ||
a = np.array([[1, 1], [2], [3]]) | ||
assert_equal(a.shape, (3,)) | ||
assert_equal(a.dtype, object) | ||
|
||
a = np.array([[1], [2, 2], [3]]) | ||
a = self._ragged_creation([[1, 1], [2], [3]]) | ||
assert_equal(a.shape, (3,)) | ||
assert_equal(a.dtype, object) | ||
|
||
a = np.array([[1], [2], [3, 3]]) | ||
a = self._ragged_creation([[1], [2, 2], [3]]) | ||
assert_equal(a.shape, (3,)) | ||
assert_equal(a.dtype, object) | ||
|
||
a = self._ragged_creation([[1], [2], [3, 3]]) | ||
assert a.shape == (3,) | ||
assert a.dtype == object | ||
|
||
def test_array_of_ragged_array(self): | ||
outer = np.array([None, None]) | ||
outer[0] = outer[1] = np.array([1, 2, 3]) | ||
assert np.array(outer).shape == (2,) | ||
assert np.array([outer]).shape == (1, 2) | ||
|
||
outer_ragged = np.array([None, None]) | ||
outer_ragged[0] = np.array([1, 2, 3]) | ||
outer_ragged[1] = np.array([1, 2, 3, 4]) | ||
# should both of these emit deprecation warnings? | ||
assert np.array(outer_ragged).shape == (2,) | ||
assert np.array([outer_ragged]).shape == (1, 2,) | ||
|
||
def test_deep_nonragged_object(self): | ||
# None of these should raise, even though they are missing dtype=object | ||
a = np.array([[[Decimal(1)]]]) | ||
a = np.array([1, Decimal(1)]) | ||
a = np.array([[1], [Decimal(1)]]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In principle could be nice to add that similar cases (also None) such as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean specifically ragged lists of objects? In the tests above this ragged arrays of integers are checked. |
||
|
||
class TestStructured(object): | ||
def test_subarray_field_access(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Stray or deliberate?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Deliberate. The changenote has a link as the last line, which confuses sphinx when assembling the notes into the final text unless it is followed by a blank line