diff --git a/doc/release/upcoming_changes/14794.deprecation.rst b/doc/release/upcoming_changes/14794.deprecation.rst new file mode 100644 index 000000000000..b84bc1c61d89 --- /dev/null +++ b/doc/release/upcoming_changes/14794.deprecation.rst @@ -0,0 +1,7 @@ +Deprecate automatic ``dtype=object`` for ragged input +----------------------------------------------------- +Calling ``np.array([[1, [1, 2, 3]])`` will issue a ``DeprecationWarning`` as +per `NEP 34`_. Users should explicitly use ``dtype=object`` to avoid the +warning. + +.. _`NEP 34`: https://numpy.org/neps/nep-0034.html diff --git a/doc/release/upcoming_changes/template.rst b/doc/release/upcoming_changes/template.rst index 9c8a3b5fca7e..997b4850ea58 100644 --- a/doc/release/upcoming_changes/template.rst +++ b/doc/release/upcoming_changes/template.rst @@ -17,6 +17,7 @@ {% if definitions[category]['showcontent'] %} {% for text, values in sections[section][category].items() %} {{ text }} + {{ get_indent(text) }}({{values|join(', ') }}) {% endfor %} diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 62804b979cbd..88ce1e221618 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -713,6 +713,12 @@ discover_itemsize(PyObject *s, int nd, int *itemsize, int string_type) return 0; } +typedef enum { + DISCOVERED_OK = 0, + DISCOVERED_RAGGED = 1, + DISCOVERED_OBJECT = 2 +} discovered_t; + /* * Take an arbitrary object and discover how many dimensions it * has, filling in the dimensions as we go. @@ -720,7 +726,7 @@ discover_itemsize(PyObject *s, int nd, int *itemsize, int string_type) static int discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it, int stop_at_string, int stop_at_tuple, - int *out_is_object) + discovered_t *out_is_object) { PyObject *e; npy_intp n, i; @@ -906,7 +912,7 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it, if (PyErr_ExceptionMatches(PyExc_KeyError)) { PyErr_Clear(); *maxndim = 0; - *out_is_object = 1; + *out_is_object = DISCOVERED_OBJECT; return 0; } else { @@ -965,7 +971,7 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it, *maxndim = all_elems_maxndim + 1; if (!all_dimensions_match) { /* typically results in an array containing variable-length lists */ - *out_is_object = 1; + *out_is_object = DISCOVERED_RAGGED; } } @@ -1809,7 +1815,7 @@ PyArray_GetArrayParamsFromObject(PyObject *op, /* Try to treat op as a list of lists or array-like objects. */ if (!writeable && PySequence_Check(op)) { - int check_it, stop_at_string, stop_at_tuple, is_object; + int check_it, stop_at_string, stop_at_tuple; int type_num, type; /* @@ -1859,7 +1865,7 @@ PyArray_GetArrayParamsFromObject(PyObject *op, ((*out_dtype)->names || (*out_dtype)->subarray)); *out_ndim = NPY_MAXDIMS; - is_object = 0; + discovered_t is_object = DISCOVERED_OK; if (discover_dimensions( op, out_ndim, out_dims, check_it, stop_at_string, stop_at_tuple, &is_object) < 0) { @@ -1876,7 +1882,17 @@ PyArray_GetArrayParamsFromObject(PyObject *op, return 0; } /* If object arrays are forced */ - if (is_object) { + if (is_object != DISCOVERED_OK) { + if (is_object == DISCOVERED_RAGGED && requested_dtype == NULL) { + /* NumPy 1.18, 2019-11-01 */ + if (DEPRECATE("Creating an ndarray with automatic object " + "dtype is deprecated, use dtype=object if you intended " + "it, otherwise specify an exact dtype") < 0) + { + return -1; + } + } + /* either DISCOVERED_OBJECT or there is a requested_dtype */ Py_DECREF(*out_dtype); *out_dtype = PyArray_DescrFromType(NPY_OBJECT); if (*out_dtype == NULL) { diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 8bffaa9affc5..214372524bcb 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -568,3 +568,13 @@ class TestNonZero(_DeprecationTestCase): def test_zerod(self): self.assert_deprecated(lambda: np.nonzero(np.array(0))) self.assert_deprecated(lambda: np.nonzero(np.array(1))) + + +class TestRaggedArray(_DeprecationTestCase): + # 2019-11-29 1.18.0 + def test_deprecate_ragged_arrays(self): + # NEP 34 deprecated automatic object dtype when creating ragged + # arrays. Also see the "ragged" tests in `test_multiarray` + arg = [1, [2, 3]] + self.assert_deprecated(np.array, args=(arg,)) + diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index c699a9bc1c18..59afe8e5e179 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -447,7 +447,7 @@ def test_array(self): assert_equal(r, np.ones((2, 6, 6))) d = np.ones((6, )) - r = np.array([[d, d + 1], d + 2]) + r = np.array([[d, d + 1], d + 2], dtype=object) assert_equal(len(r), 2) assert_equal(r[0], [d, d + 1]) assert_equal(r[1], d + 2) @@ -1073,34 +1073,60 @@ def test_array_too_big(self): assert_raises(ValueError, np.ndarray, buffer=buf, strides=(0,), shape=(max_bytes//itemsize + 1,), dtype=dtype) - def test_jagged_ndim_object(self): + def _ragged_creation(self, seq): + # without dtype=object, the ragged object should raise + with assert_warns(DeprecationWarning): + a = np.array(seq) + b = np.array(seq, dtype=object) + assert_equal(a, b) + return b + + def test_ragged_ndim_object(self): # Lists of mismatching depths are treated as object arrays - a = np.array([[1], 2, 3]) + a = self._ragged_creation([[1], 2, 3]) assert_equal(a.shape, (3,)) assert_equal(a.dtype, object) - a = np.array([1, [2], 3]) + a = self._ragged_creation([1, [2], 3]) assert_equal(a.shape, (3,)) assert_equal(a.dtype, object) - a = np.array([1, 2, [3]]) + a = self._ragged_creation([1, 2, [3]]) assert_equal(a.shape, (3,)) assert_equal(a.dtype, object) - def test_jagged_shape_object(self): + def test_ragged_shape_object(self): # The jagged dimension of a list is turned into an object array - a = np.array([[1, 1], [2], [3]]) - assert_equal(a.shape, (3,)) - assert_equal(a.dtype, object) - - a = np.array([[1], [2, 2], [3]]) + a = self._ragged_creation([[1, 1], [2], [3]]) assert_equal(a.shape, (3,)) assert_equal(a.dtype, object) - a = np.array([[1], [2], [3, 3]]) + a = self._ragged_creation([[1], [2, 2], [3]]) assert_equal(a.shape, (3,)) assert_equal(a.dtype, object) + a = self._ragged_creation([[1], [2], [3, 3]]) + assert a.shape == (3,) + assert a.dtype == object + + def test_array_of_ragged_array(self): + outer = np.array([None, None]) + outer[0] = outer[1] = np.array([1, 2, 3]) + assert np.array(outer).shape == (2,) + assert np.array([outer]).shape == (1, 2) + + outer_ragged = np.array([None, None]) + outer_ragged[0] = np.array([1, 2, 3]) + outer_ragged[1] = np.array([1, 2, 3, 4]) + # should both of these emit deprecation warnings? + assert np.array(outer_ragged).shape == (2,) + assert np.array([outer_ragged]).shape == (1, 2,) + + def test_deep_nonragged_object(self): + # None of these should raise, even though they are missing dtype=object + a = np.array([[[Decimal(1)]]]) + a = np.array([1, Decimal(1)]) + a = np.array([[1], [Decimal(1)]]) class TestStructured(object): def test_subarray_field_access(self): diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 1358b45e9cd9..c0e20f420ebb 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -1211,7 +1211,7 @@ def test_array_method(self): def test_nonzero_invalid_object(self): # gh-9295 - a = np.array([np.array([1, 2]), 3]) + a = np.array([np.array([1, 2]), 3], dtype=object) assert_raises(ValueError, np.nonzero, a) class BoolErrors: diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index 9dc231debca7..444cbd79c7e2 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -1365,13 +1365,13 @@ def test_fromiter_bytes(self): def test_array_from_sequence_scalar_array(self): # Ticket #1078: segfaults when creating an array with a sequence of # 0d arrays. - a = np.array((np.ones(2), np.array(2))) + a = np.array((np.ones(2), np.array(2)), dtype=object) assert_equal(a.shape, (2,)) assert_equal(a.dtype, np.dtype(object)) assert_equal(a[0], np.ones(2)) assert_equal(a[1], np.array(2)) - a = np.array(((1,), np.array(1))) + a = np.array(((1,), np.array(1)), dtype=object) assert_equal(a.shape, (2,)) assert_equal(a.dtype, np.dtype(object)) assert_equal(a[0], (1,)) @@ -1379,7 +1379,7 @@ def test_array_from_sequence_scalar_array(self): def test_array_from_sequence_scalar_array2(self): # Ticket #1081: weird array with strange input... - t = np.array([np.array([]), np.array(0, object)]) + t = np.array([np.array([]), np.array(0, object)], dtype=object) assert_equal(t.shape, (2,)) assert_equal(t.dtype, np.dtype(object)) @@ -2288,9 +2288,10 @@ def f(x): x[0], x[-1] = x[-1], x[0] uf = np.frompyfunc(f, 1, 0) - a = np.array([[1, 2, 3], [4, 5], [6, 7, 8, 9]]) + a = np.array([[1, 2, 3], [4, 5], [6, 7, 8, 9]], dtype=object) assert_equal(uf(a), ()) - assert_array_equal(a, [[3, 2, 1], [5, 4], [9, 7, 8, 6]]) + expected = np.array([[3, 2, 1], [5, 4], [9, 7, 8, 6]], dtype=object) + assert_array_equal(a, expected) @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") def test_leak_in_structured_dtype_comparison(self): diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index 707c690ddbe6..a3b564b11d48 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -1125,14 +1125,18 @@ def test_object_array_accumulate_inplace(self): # Twice reproduced also for tuples: np.add.accumulate(arr, out=arr) np.add.accumulate(arr, out=arr) - assert_array_equal(arr, np.array([[1]*i for i in [1, 3, 6, 10]])) + assert_array_equal(arr, + np.array([[1]*i for i in [1, 3, 6, 10]], dtype=object), + ) # And the same if the axis argument is used arr = np.ones((2, 4), dtype=object) arr[0, :] = [[2] for i in range(4)] np.add.accumulate(arr, out=arr, axis=-1) np.add.accumulate(arr, out=arr, axis=-1) - assert_array_equal(arr[0, :], np.array([[2]*i for i in [1, 3, 6, 10]])) + assert_array_equal(arr[0, :], + np.array([[2]*i for i in [1, 3, 6, 10]], dtype=object), + ) def test_object_array_reduceat_inplace(self): # Checks that in-place reduceats work, see also gh-7465 diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py index 65593dd29922..1c3507a62814 100644 --- a/numpy/lib/tests/test_arraypad.py +++ b/numpy/lib/tests/test_arraypad.py @@ -1262,24 +1262,29 @@ def test_negative_pad_width(self, pad_width, mode): with pytest.raises(ValueError, match=match): np.pad(arr, pad_width, mode) - @pytest.mark.parametrize("pad_width", [ - "3", - "word", - None, - object(), - 3.4, - ((2, 3, 4), (3, 2)), # dtype=object (tuple) - complex(1, -1), - ((-2.1, 3), (3, 2)), + @pytest.mark.parametrize("pad_width, dtype", [ + ("3", None), + ("word", None), + (None, None), + (object(), None), + (3.4, None), + (((2, 3, 4), (3, 2)), object), + (complex(1, -1), None), + (((-2.1, 3), (3, 2)), None), ]) @pytest.mark.parametrize("mode", _all_modes.keys()) - def test_bad_type(self, pad_width, mode): + def test_bad_type(self, pad_width, dtype, mode): arr = np.arange(30).reshape((6, 5)) match = "`pad_width` must be of integral type." - with pytest.raises(TypeError, match=match): - np.pad(arr, pad_width, mode) - with pytest.raises(TypeError, match=match): - np.pad(arr, np.array(pad_width), mode) + if dtype is not None: + # avoid DeprecationWarning when not specifying dtype + with pytest.raises(TypeError, match=match): + np.pad(arr, np.array(pad_width, dtype=dtype), mode) + else: + with pytest.raises(TypeError, match=match): + np.pad(arr, pad_width, mode) + with pytest.raises(TypeError, match=match): + np.pad(arr, np.array(pad_width), mode) def test_pad_width_as_ndarray(self): a = np.arange(12) diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 1181fe98630d..4a5eec92c194 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -583,7 +583,7 @@ def test_large_zip(self): except (MemoryError, OverflowError): pytest.skip("Cannot allocate enough memory for test") test_data = np.asarray([np.random.rand(np.random.randint(50,100),4) - for i in range(800000)]) + for i in range(800000)], dtype=object) with tempdir() as tmpdir: np.savez(os.path.join(tmpdir, 'test.npz'), test_data=test_data) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index bb0d8d41238e..dd7e39b7184d 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -2817,8 +2817,8 @@ def __new__(cls, data=None, mask=nomask, dtype=None, copy=False, elif isinstance(data, (tuple, list)): try: # If data is a sequence of masked array - mask = np.array([getmaskarray(m) for m in data], - dtype=mdtype) + mask = np.array([getmaskarray(np.asanyarray(m, dtype=mdtype)) + for m in data], dtype=mdtype) except ValueError: # If data is nested mask = nomask diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index b72ce56aa299..3f7226dfb32b 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -936,7 +936,7 @@ def test_mvoid_multidim_print(self): def test_object_with_array(self): mx1 = masked_array([1.], mask=[True]) mx2 = masked_array([1., 2.]) - mx = masked_array([mx1, mx2], mask=[False, True]) + mx = masked_array([mx1, mx2], mask=[False, True], dtype=object) assert_(mx[0] is mx1) assert_(mx[1] is not mx2) assert_(np.all(mx[1].data == mx2.data)) diff --git a/numpy/random/tests/test_generator_mt19937_regressions.py b/numpy/random/tests/test_generator_mt19937_regressions.py index 3a937f997a43..e328c0c713b4 100644 --- a/numpy/random/tests/test_generator_mt19937_regressions.py +++ b/numpy/random/tests/test_generator_mt19937_regressions.py @@ -56,9 +56,10 @@ def test_shuffle_mixed_dimension(self): [1, (2, 2), (3, 3), None], [(1, 1), 2, 3, None]]: mt19937 = Generator(MT19937(12345)) - shuffled = list(t) + shuffled = np.array(t, dtype=object) mt19937.shuffle(shuffled) - assert_array_equal(shuffled, [t[2], t[0], t[3], t[1]]) + expected = np.array([t[2], t[0], t[3], t[1]], dtype=object) + assert_array_equal(np.array(shuffled, dtype=object), expected) def test_call_within_randomstate(self): # Check that custom BitGenerator does not call into global state @@ -118,7 +119,7 @@ def test_shuffle_of_array_of_objects(self): # a segfault on garbage collection. # See gh-7719 mt19937 = Generator(MT19937(1234)) - a = np.array([np.arange(1), np.arange(4)]) + a = np.array([np.arange(1), np.arange(4)], dtype=object) for _ in range(1000): mt19937.shuffle(a) diff --git a/numpy/random/tests/test_randomstate_regression.py b/numpy/random/tests/test_randomstate_regression.py index bdc2214b6ee5..827240cefad2 100644 --- a/numpy/random/tests/test_randomstate_regression.py +++ b/numpy/random/tests/test_randomstate_regression.py @@ -68,7 +68,8 @@ def test_shuffle_mixed_dimension(self): random.seed(12345) shuffled = list(t) random.shuffle(shuffled) - assert_array_equal(shuffled, [t[0], t[3], t[1], t[2]]) + expected = np.array([t[0], t[3], t[1], t[2]], dtype=object) + assert_array_equal(np.array(shuffled, dtype=object), expected) def test_call_within_randomstate(self): # Check that custom RandomState does not call into global state @@ -128,7 +129,7 @@ def test_shuffle_of_array_of_objects(self): # a segfault on garbage collection. # See gh-7719 random.seed(1234) - a = np.array([np.arange(1), np.arange(4)]) + a = np.array([np.arange(1), np.arange(4)], dtype=object) for _ in range(1000): random.shuffle(a) diff --git a/numpy/random/tests/test_regression.py b/numpy/random/tests/test_regression.py index 509e2d57ff28..3cc289a806c6 100644 --- a/numpy/random/tests/test_regression.py +++ b/numpy/random/tests/test_regression.py @@ -66,7 +66,8 @@ def test_shuffle_mixed_dimension(self): np.random.seed(12345) shuffled = list(t) random.shuffle(shuffled) - assert_array_equal(shuffled, [t[0], t[3], t[1], t[2]]) + expected = np.array([t[0], t[3], t[1], t[2]], dtype=object) + assert_array_equal(np.array(shuffled, dtype=object), expected) def test_call_within_randomstate(self): # Check that custom RandomState does not call into global state @@ -126,7 +127,7 @@ def test_shuffle_of_array_of_objects(self): # a segfault on garbage collection. # See gh-7719 np.random.seed(1234) - a = np.array([np.arange(1), np.arange(4)]) + a = np.array([np.arange(1), np.arange(4)], dtype=object) for _ in range(1000): np.random.shuffle(a)