Thanks to visit codestin.com
Credit goes to github.com

Skip to content

DEP: issue deprecation warning when creating ragged array (NEP 34) #14794

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Dec 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/release/upcoming_changes/14794.deprecation.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Deprecate automatic ``dtype=object`` for ragged input
-----------------------------------------------------
Calling ``np.array([[1, [1, 2, 3]])`` will issue a ``DeprecationWarning`` as
per `NEP 34`_. Users should explicitly use ``dtype=object`` to avoid the
warning.

.. _`NEP 34`: https://numpy.org/neps/nep-0034.html
1 change: 1 addition & 0 deletions doc/release/upcoming_changes/template.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
{% if definitions[category]['showcontent'] %}
{% for text, values in sections[section][category].items() %}
{{ text }}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stray or deliberate?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deliberate. The changenote has a link as the last line, which confuses sphinx when assembling the notes into the final text unless it is followed by a blank line

{{ get_indent(text) }}({{values|join(', ') }})

{% endfor %}
Expand Down
28 changes: 22 additions & 6 deletions numpy/core/src/multiarray/ctors.c
Original file line number Diff line number Diff line change
Expand Up @@ -713,14 +713,20 @@ discover_itemsize(PyObject *s, int nd, int *itemsize, int string_type)
return 0;
}

typedef enum {
DISCOVERED_OK = 0,
DISCOVERED_RAGGED = 1,
DISCOVERED_OBJECT = 2
} discovered_t;

/*
* Take an arbitrary object and discover how many dimensions it
* has, filling in the dimensions as we go.
*/
static int
discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
int stop_at_string, int stop_at_tuple,
int *out_is_object)
discovered_t *out_is_object)
{
PyObject *e;
npy_intp n, i;
Expand Down Expand Up @@ -906,7 +912,7 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
if (PyErr_ExceptionMatches(PyExc_KeyError)) {
PyErr_Clear();
*maxndim = 0;
*out_is_object = 1;
*out_is_object = DISCOVERED_OBJECT;
return 0;
}
else {
Expand Down Expand Up @@ -965,7 +971,7 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
*maxndim = all_elems_maxndim + 1;
if (!all_dimensions_match) {
/* typically results in an array containing variable-length lists */
*out_is_object = 1;
*out_is_object = DISCOVERED_RAGGED;
}
}

Expand Down Expand Up @@ -1809,7 +1815,7 @@ PyArray_GetArrayParamsFromObject(PyObject *op,

/* Try to treat op as a list of lists or array-like objects. */
if (!writeable && PySequence_Check(op)) {
int check_it, stop_at_string, stop_at_tuple, is_object;
int check_it, stop_at_string, stop_at_tuple;
int type_num, type;

/*
Expand Down Expand Up @@ -1859,7 +1865,7 @@ PyArray_GetArrayParamsFromObject(PyObject *op,
((*out_dtype)->names || (*out_dtype)->subarray));

*out_ndim = NPY_MAXDIMS;
is_object = 0;
discovered_t is_object = DISCOVERED_OK;
if (discover_dimensions(
op, out_ndim, out_dims, check_it,
stop_at_string, stop_at_tuple, &is_object) < 0) {
Expand All @@ -1876,7 +1882,17 @@ PyArray_GetArrayParamsFromObject(PyObject *op,
return 0;
}
/* If object arrays are forced */
if (is_object) {
if (is_object != DISCOVERED_OK) {
if (is_object == DISCOVERED_RAGGED && requested_dtype == NULL) {
/* NumPy 1.18, 2019-11-01 */
if (DEPRECATE("Creating an ndarray with automatic object "
"dtype is deprecated, use dtype=object if you intended "
"it, otherwise specify an exact dtype") < 0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could style nit, that the indentation should be deeper, but please ignore if you want.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will try to sneak it into another PR

{
return -1;
}
}
/* either DISCOVERED_OBJECT or there is a requested_dtype */
Py_DECREF(*out_dtype);
*out_dtype = PyArray_DescrFromType(NPY_OBJECT);
if (*out_dtype == NULL) {
Expand Down
10 changes: 10 additions & 0 deletions numpy/core/tests/test_deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,3 +568,13 @@ class TestNonZero(_DeprecationTestCase):
def test_zerod(self):
self.assert_deprecated(lambda: np.nonzero(np.array(0)))
self.assert_deprecated(lambda: np.nonzero(np.array(1)))


class TestRaggedArray(_DeprecationTestCase):
# 2019-11-29 1.18.0
def test_deprecate_ragged_arrays(self):
# NEP 34 deprecated automatic object dtype when creating ragged
# arrays. Also see the "ragged" tests in `test_multiarray`
arg = [1, [2, 3]]
self.assert_deprecated(np.array, args=(arg,))

50 changes: 38 additions & 12 deletions numpy/core/tests/test_multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ def test_array(self):
assert_equal(r, np.ones((2, 6, 6)))

d = np.ones((6, ))
r = np.array([[d, d + 1], d + 2])
r = np.array([[d, d + 1], d + 2], dtype=object)
assert_equal(len(r), 2)
assert_equal(r[0], [d, d + 1])
assert_equal(r[1], d + 2)
Expand Down Expand Up @@ -1073,34 +1073,60 @@ def test_array_too_big(self):
assert_raises(ValueError, np.ndarray, buffer=buf, strides=(0,),
shape=(max_bytes//itemsize + 1,), dtype=dtype)

def test_jagged_ndim_object(self):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are the real NEP 34 tests (from here down in this file): they test that the behaviour has not changed when using the dtype=object kwarg, and that the warning does not seem to have false-positives when no dtype is used.

def _ragged_creation(self, seq):
# without dtype=object, the ragged object should raise
with assert_warns(DeprecationWarning):
a = np.array(seq)
b = np.array(seq, dtype=object)
assert_equal(a, b)
return b

def test_ragged_ndim_object(self):
# Lists of mismatching depths are treated as object arrays
a = np.array([[1], 2, 3])
a = self._ragged_creation([[1], 2, 3])
assert_equal(a.shape, (3,))
assert_equal(a.dtype, object)

a = np.array([1, [2], 3])
a = self._ragged_creation([1, [2], 3])
assert_equal(a.shape, (3,))
assert_equal(a.dtype, object)

a = np.array([1, 2, [3]])
a = self._ragged_creation([1, 2, [3]])
assert_equal(a.shape, (3,))
assert_equal(a.dtype, object)

def test_jagged_shape_object(self):
def test_ragged_shape_object(self):
# The jagged dimension of a list is turned into an object array
a = np.array([[1, 1], [2], [3]])
assert_equal(a.shape, (3,))
assert_equal(a.dtype, object)

a = np.array([[1], [2, 2], [3]])
a = self._ragged_creation([[1, 1], [2], [3]])
assert_equal(a.shape, (3,))
assert_equal(a.dtype, object)

a = np.array([[1], [2], [3, 3]])
a = self._ragged_creation([[1], [2, 2], [3]])
assert_equal(a.shape, (3,))
assert_equal(a.dtype, object)

a = self._ragged_creation([[1], [2], [3, 3]])
assert a.shape == (3,)
assert a.dtype == object

def test_array_of_ragged_array(self):
outer = np.array([None, None])
outer[0] = outer[1] = np.array([1, 2, 3])
assert np.array(outer).shape == (2,)
assert np.array([outer]).shape == (1, 2)

outer_ragged = np.array([None, None])
outer_ragged[0] = np.array([1, 2, 3])
outer_ragged[1] = np.array([1, 2, 3, 4])
# should both of these emit deprecation warnings?
assert np.array(outer_ragged).shape == (2,)
assert np.array([outer_ragged]).shape == (1, 2,)

def test_deep_nonragged_object(self):
# None of these should raise, even though they are missing dtype=object
a = np.array([[[Decimal(1)]]])
a = np.array([1, Decimal(1)])
a = np.array([[1], [Decimal(1)]])
Copy link
Member

@seberg seberg Dec 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In principle could be nice to add that similar cases (also None) such as [None, [None, None]] do correctly warn. (although maybe I missed a test that checks this already)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean specifically ragged lists of objects? In the tests above this ragged arrays of integers are checked.


class TestStructured(object):
def test_subarray_field_access(self):
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/tests/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -1211,7 +1211,7 @@ def test_array_method(self):

def test_nonzero_invalid_object(self):
# gh-9295
a = np.array([np.array([1, 2]), 3])
a = np.array([np.array([1, 2]), 3], dtype=object)
assert_raises(ValueError, np.nonzero, a)

class BoolErrors:
Expand Down
11 changes: 6 additions & 5 deletions numpy/core/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -1365,21 +1365,21 @@ def test_fromiter_bytes(self):
def test_array_from_sequence_scalar_array(self):
# Ticket #1078: segfaults when creating an array with a sequence of
# 0d arrays.
a = np.array((np.ones(2), np.array(2)))
a = np.array((np.ones(2), np.array(2)), dtype=object)
assert_equal(a.shape, (2,))
assert_equal(a.dtype, np.dtype(object))
assert_equal(a[0], np.ones(2))
assert_equal(a[1], np.array(2))

a = np.array(((1,), np.array(1)))
a = np.array(((1,), np.array(1)), dtype=object)
assert_equal(a.shape, (2,))
assert_equal(a.dtype, np.dtype(object))
assert_equal(a[0], (1,))
assert_equal(a[1], np.array(1))

def test_array_from_sequence_scalar_array2(self):
# Ticket #1081: weird array with strange input...
t = np.array([np.array([]), np.array(0, object)])
t = np.array([np.array([]), np.array(0, object)], dtype=object)
assert_equal(t.shape, (2,))
assert_equal(t.dtype, np.dtype(object))

Expand Down Expand Up @@ -2288,9 +2288,10 @@ def f(x):
x[0], x[-1] = x[-1], x[0]

uf = np.frompyfunc(f, 1, 0)
a = np.array([[1, 2, 3], [4, 5], [6, 7, 8, 9]])
a = np.array([[1, 2, 3], [4, 5], [6, 7, 8, 9]], dtype=object)
assert_equal(uf(a), ())
assert_array_equal(a, [[3, 2, 1], [5, 4], [9, 7, 8, 6]])
expected = np.array([[3, 2, 1], [5, 4], [9, 7, 8, 6]], dtype=object)
assert_array_equal(a, expected)

@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
def test_leak_in_structured_dtype_comparison(self):
Expand Down
8 changes: 6 additions & 2 deletions numpy/core/tests/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1125,14 +1125,18 @@ def test_object_array_accumulate_inplace(self):
# Twice reproduced also for tuples:
np.add.accumulate(arr, out=arr)
np.add.accumulate(arr, out=arr)
assert_array_equal(arr, np.array([[1]*i for i in [1, 3, 6, 10]]))
assert_array_equal(arr,
np.array([[1]*i for i in [1, 3, 6, 10]], dtype=object),
)

# And the same if the axis argument is used
arr = np.ones((2, 4), dtype=object)
arr[0, :] = [[2] for i in range(4)]
np.add.accumulate(arr, out=arr, axis=-1)
np.add.accumulate(arr, out=arr, axis=-1)
assert_array_equal(arr[0, :], np.array([[2]*i for i in [1, 3, 6, 10]]))
assert_array_equal(arr[0, :],
np.array([[2]*i for i in [1, 3, 6, 10]], dtype=object),
)

def test_object_array_reduceat_inplace(self):
# Checks that in-place reduceats work, see also gh-7465
Expand Down
33 changes: 19 additions & 14 deletions numpy/lib/tests/test_arraypad.py
Original file line number Diff line number Diff line change
Expand Up @@ -1262,24 +1262,29 @@ def test_negative_pad_width(self, pad_width, mode):
with pytest.raises(ValueError, match=match):
np.pad(arr, pad_width, mode)

@pytest.mark.parametrize("pad_width", [
"3",
"word",
None,
object(),
3.4,
((2, 3, 4), (3, 2)), # dtype=object (tuple)
complex(1, -1),
((-2.1, 3), (3, 2)),
@pytest.mark.parametrize("pad_width, dtype", [
("3", None),
("word", None),
(None, None),
(object(), None),
(3.4, None),
(((2, 3, 4), (3, 2)), object),
(complex(1, -1), None),
(((-2.1, 3), (3, 2)), None),
])
@pytest.mark.parametrize("mode", _all_modes.keys())
def test_bad_type(self, pad_width, mode):
def test_bad_type(self, pad_width, dtype, mode):
arr = np.arange(30).reshape((6, 5))
match = "`pad_width` must be of integral type."
with pytest.raises(TypeError, match=match):
np.pad(arr, pad_width, mode)
with pytest.raises(TypeError, match=match):
np.pad(arr, np.array(pad_width), mode)
if dtype is not None:
# avoid DeprecationWarning when not specifying dtype
with pytest.raises(TypeError, match=match):
np.pad(arr, np.array(pad_width, dtype=dtype), mode)
else:
with pytest.raises(TypeError, match=match):
np.pad(arr, pad_width, mode)
with pytest.raises(TypeError, match=match):
np.pad(arr, np.array(pad_width), mode)

def test_pad_width_as_ndarray(self):
a = np.arange(12)
Expand Down
2 changes: 1 addition & 1 deletion numpy/lib/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ def test_large_zip(self):
except (MemoryError, OverflowError):
pytest.skip("Cannot allocate enough memory for test")
test_data = np.asarray([np.random.rand(np.random.randint(50,100),4)
for i in range(800000)])
for i in range(800000)], dtype=object)
with tempdir() as tmpdir:
np.savez(os.path.join(tmpdir, 'test.npz'), test_data=test_data)

Expand Down
4 changes: 2 additions & 2 deletions numpy/ma/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2817,8 +2817,8 @@ def __new__(cls, data=None, mask=nomask, dtype=None, copy=False,
elif isinstance(data, (tuple, list)):
try:
# If data is a sequence of masked array
mask = np.array([getmaskarray(m) for m in data],
dtype=mdtype)
mask = np.array([getmaskarray(np.asanyarray(m, dtype=mdtype))
for m in data], dtype=mdtype)
except ValueError:
# If data is nested
mask = nomask
Expand Down
2 changes: 1 addition & 1 deletion numpy/ma/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,7 +936,7 @@ def test_mvoid_multidim_print(self):
def test_object_with_array(self):
mx1 = masked_array([1.], mask=[True])
mx2 = masked_array([1., 2.])
mx = masked_array([mx1, mx2], mask=[False, True])
mx = masked_array([mx1, mx2], mask=[False, True], dtype=object)
assert_(mx[0] is mx1)
assert_(mx[1] is not mx2)
assert_(np.all(mx[1].data == mx2.data))
Expand Down
7 changes: 4 additions & 3 deletions numpy/random/tests/test_generator_mt19937_regressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,10 @@ def test_shuffle_mixed_dimension(self):
[1, (2, 2), (3, 3), None],
[(1, 1), 2, 3, None]]:
mt19937 = Generator(MT19937(12345))
shuffled = list(t)
shuffled = np.array(t, dtype=object)
mt19937.shuffle(shuffled)
assert_array_equal(shuffled, [t[2], t[0], t[3], t[1]])
expected = np.array([t[2], t[0], t[3], t[1]], dtype=object)
assert_array_equal(np.array(shuffled, dtype=object), expected)

def test_call_within_randomstate(self):
# Check that custom BitGenerator does not call into global state
Expand Down Expand Up @@ -118,7 +119,7 @@ def test_shuffle_of_array_of_objects(self):
# a segfault on garbage collection.
# See gh-7719
mt19937 = Generator(MT19937(1234))
a = np.array([np.arange(1), np.arange(4)])
a = np.array([np.arange(1), np.arange(4)], dtype=object)

for _ in range(1000):
mt19937.shuffle(a)
Expand Down
5 changes: 3 additions & 2 deletions numpy/random/tests/test_randomstate_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def test_shuffle_mixed_dimension(self):
random.seed(12345)
shuffled = list(t)
random.shuffle(shuffled)
assert_array_equal(shuffled, [t[0], t[3], t[1], t[2]])
expected = np.array([t[0], t[3], t[1], t[2]], dtype=object)
assert_array_equal(np.array(shuffled, dtype=object), expected)

def test_call_within_randomstate(self):
# Check that custom RandomState does not call into global state
Expand Down Expand Up @@ -128,7 +129,7 @@ def test_shuffle_of_array_of_objects(self):
# a segfault on garbage collection.
# See gh-7719
random.seed(1234)
a = np.array([np.arange(1), np.arange(4)])
a = np.array([np.arange(1), np.arange(4)], dtype=object)

for _ in range(1000):
random.shuffle(a)
Expand Down
5 changes: 3 additions & 2 deletions numpy/random/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def test_shuffle_mixed_dimension(self):
np.random.seed(12345)
shuffled = list(t)
random.shuffle(shuffled)
assert_array_equal(shuffled, [t[0], t[3], t[1], t[2]])
expected = np.array([t[0], t[3], t[1], t[2]], dtype=object)
assert_array_equal(np.array(shuffled, dtype=object), expected)

def test_call_within_randomstate(self):
# Check that custom RandomState does not call into global state
Expand Down Expand Up @@ -126,7 +127,7 @@ def test_shuffle_of_array_of_objects(self):
# a segfault on garbage collection.
# See gh-7719
np.random.seed(1234)
a = np.array([np.arange(1), np.arange(4)])
a = np.array([np.arange(1), np.arange(4)], dtype=object)

for _ in range(1000):
np.random.shuffle(a)
Expand Down