Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 3a9a63f

Browse files
authored
Merge pull request #14794 from mattip/nep-0034-impl
DEP: issue deprecation warning when creating ragged array (NEP 34)
2 parents 7999f7c + 9b4d0ac commit 3a9a63f

15 files changed

+124
-51
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Deprecate automatic ``dtype=object`` for ragged input
2+
-----------------------------------------------------
3+
Calling ``np.array([[1, [1, 2, 3]])`` will issue a ``DeprecationWarning`` as
4+
per `NEP 34`_. Users should explicitly use ``dtype=object`` to avoid the
5+
warning.
6+
7+
.. _`NEP 34`: https://numpy.org/neps/nep-0034.html

doc/release/upcoming_changes/template.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
{% if definitions[category]['showcontent'] %}
1818
{% for text, values in sections[section][category].items() %}
1919
{{ text }}
20+
2021
{{ get_indent(text) }}({{values|join(', ') }})
2122

2223
{% endfor %}

numpy/core/src/multiarray/ctors.c

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -688,14 +688,20 @@ discover_itemsize(PyObject *s, int nd, int *itemsize, int string_type)
688688
return 0;
689689
}
690690

691+
typedef enum {
692+
DISCOVERED_OK = 0,
693+
DISCOVERED_RAGGED = 1,
694+
DISCOVERED_OBJECT = 2
695+
} discovered_t;
696+
691697
/*
692698
* Take an arbitrary object and discover how many dimensions it
693699
* has, filling in the dimensions as we go.
694700
*/
695701
static int
696702
discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
697703
int stop_at_string, int stop_at_tuple,
698-
int *out_is_object)
704+
discovered_t *out_is_object)
699705
{
700706
PyObject *e;
701707
npy_intp n, i;
@@ -881,7 +887,7 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
881887
if (PyErr_ExceptionMatches(PyExc_KeyError)) {
882888
PyErr_Clear();
883889
*maxndim = 0;
884-
*out_is_object = 1;
890+
*out_is_object = DISCOVERED_OBJECT;
885891
return 0;
886892
}
887893
else {
@@ -940,7 +946,7 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
940946
*maxndim = all_elems_maxndim + 1;
941947
if (!all_dimensions_match) {
942948
/* typically results in an array containing variable-length lists */
943-
*out_is_object = 1;
949+
*out_is_object = DISCOVERED_RAGGED;
944950
}
945951
}
946952

@@ -1749,7 +1755,7 @@ PyArray_GetArrayParamsFromObject(PyObject *op,
17491755

17501756
/* Try to treat op as a list of lists */
17511757
if (!writeable && PySequence_Check(op)) {
1752-
int check_it, stop_at_string, stop_at_tuple, is_object;
1758+
int check_it, stop_at_string, stop_at_tuple;
17531759
int type_num, type;
17541760

17551761
/*
@@ -1799,7 +1805,7 @@ PyArray_GetArrayParamsFromObject(PyObject *op,
17991805
((*out_dtype)->names || (*out_dtype)->subarray));
18001806

18011807
*out_ndim = NPY_MAXDIMS;
1802-
is_object = 0;
1808+
discovered_t is_object = DISCOVERED_OK;
18031809
if (discover_dimensions(
18041810
op, out_ndim, out_dims, check_it,
18051811
stop_at_string, stop_at_tuple, &is_object) < 0) {
@@ -1816,7 +1822,17 @@ PyArray_GetArrayParamsFromObject(PyObject *op,
18161822
return 0;
18171823
}
18181824
/* If object arrays are forced */
1819-
if (is_object) {
1825+
if (is_object != DISCOVERED_OK) {
1826+
if (is_object == DISCOVERED_RAGGED && requested_dtype == NULL) {
1827+
/* NumPy 1.18, 2019-11-01 */
1828+
if (DEPRECATE("Creating an ndarray with automatic object "
1829+
"dtype is deprecated, use dtype=object if you intended "
1830+
"it, otherwise specify an exact dtype") < 0)
1831+
{
1832+
return -1;
1833+
}
1834+
}
1835+
/* either DISCOVERED_OBJECT or there is a requested_dtype */
18201836
Py_DECREF(*out_dtype);
18211837
*out_dtype = PyArray_DescrFromType(NPY_OBJECT);
18221838
if (*out_dtype == NULL) {

numpy/core/tests/test_deprecations.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,3 +568,13 @@ class TestNonZero(_DeprecationTestCase):
568568
def test_zerod(self):
569569
self.assert_deprecated(lambda: np.nonzero(np.array(0)))
570570
self.assert_deprecated(lambda: np.nonzero(np.array(1)))
571+
572+
573+
class TestRaggedArray(_DeprecationTestCase):
574+
# 2019-11-29 1.18.0
575+
def test_deprecate_ragged_arrays(self):
576+
# NEP 34 deprecated automatic object dtype when creating ragged
577+
# arrays. Also see the "ragged" tests in `test_multiarray`
578+
arg = [1, [2, 3]]
579+
self.assert_deprecated(np.array, args=(arg,))
580+

numpy/core/tests/test_multiarray.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ def test_array(self):
448448
assert_equal(r, np.ones((2, 6, 6)))
449449

450450
d = np.ones((6, ))
451-
r = np.array([[d, d + 1], d + 2])
451+
r = np.array([[d, d + 1], d + 2], dtype=object)
452452
assert_equal(len(r), 2)
453453
assert_equal(r[0], [d, d + 1])
454454
assert_equal(r[1], d + 2)
@@ -1051,34 +1051,60 @@ def test_array_too_big(self):
10511051
assert_raises(ValueError, np.ndarray, buffer=buf, strides=(0,),
10521052
shape=(max_bytes//itemsize + 1,), dtype=dtype)
10531053

1054-
def test_jagged_ndim_object(self):
1054+
def _ragged_creation(self, seq):
1055+
# without dtype=object, the ragged object should raise
1056+
with assert_warns(DeprecationWarning):
1057+
a = np.array(seq)
1058+
b = np.array(seq, dtype=object)
1059+
assert_equal(a, b)
1060+
return b
1061+
1062+
def test_ragged_ndim_object(self):
10551063
# Lists of mismatching depths are treated as object arrays
1056-
a = np.array([[1], 2, 3])
1064+
a = self._ragged_creation([[1], 2, 3])
10571065
assert_equal(a.shape, (3,))
10581066
assert_equal(a.dtype, object)
10591067

1060-
a = np.array([1, [2], 3])
1068+
a = self._ragged_creation([1, [2], 3])
10611069
assert_equal(a.shape, (3,))
10621070
assert_equal(a.dtype, object)
10631071

1064-
a = np.array([1, 2, [3]])
1072+
a = self._ragged_creation([1, 2, [3]])
10651073
assert_equal(a.shape, (3,))
10661074
assert_equal(a.dtype, object)
10671075

1068-
def test_jagged_shape_object(self):
1076+
def test_ragged_shape_object(self):
10691077
# The jagged dimension of a list is turned into an object array
1070-
a = np.array([[1, 1], [2], [3]])
1071-
assert_equal(a.shape, (3,))
1072-
assert_equal(a.dtype, object)
1073-
1074-
a = np.array([[1], [2, 2], [3]])
1078+
a = self._ragged_creation([[1, 1], [2], [3]])
10751079
assert_equal(a.shape, (3,))
10761080
assert_equal(a.dtype, object)
10771081

1078-
a = np.array([[1], [2], [3, 3]])
1082+
a = self._ragged_creation([[1], [2, 2], [3]])
10791083
assert_equal(a.shape, (3,))
10801084
assert_equal(a.dtype, object)
10811085

1086+
a = self._ragged_creation([[1], [2], [3, 3]])
1087+
assert a.shape == (3,)
1088+
assert a.dtype == object
1089+
1090+
def test_array_of_ragged_array(self):
1091+
outer = np.array([None, None])
1092+
outer[0] = outer[1] = np.array([1, 2, 3])
1093+
assert np.array(outer).shape == (2,)
1094+
assert np.array([outer]).shape == (1, 2)
1095+
1096+
outer_ragged = np.array([None, None])
1097+
outer_ragged[0] = np.array([1, 2, 3])
1098+
outer_ragged[1] = np.array([1, 2, 3, 4])
1099+
# should both of these emit deprecation warnings?
1100+
assert np.array(outer_ragged).shape == (2,)
1101+
assert np.array([outer_ragged]).shape == (1, 2,)
1102+
1103+
def test_deep_nonragged_object(self):
1104+
# None of these should raise, even though they are missing dtype=object
1105+
a = np.array([[[Decimal(1)]]])
1106+
a = np.array([1, Decimal(1)])
1107+
a = np.array([[1], [Decimal(1)]])
10821108

10831109
class TestStructured(object):
10841110
def test_subarray_field_access(self):

numpy/core/tests/test_numeric.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1211,7 +1211,7 @@ def test_array_method(self):
12111211

12121212
def test_nonzero_invalid_object(self):
12131213
# gh-9295
1214-
a = np.array([np.array([1, 2]), 3])
1214+
a = np.array([np.array([1, 2]), 3], dtype=object)
12151215
assert_raises(ValueError, np.nonzero, a)
12161216

12171217
class BoolErrors:

numpy/core/tests/test_regression.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,21 +1367,21 @@ def test_fromiter_bytes(self):
13671367
def test_array_from_sequence_scalar_array(self):
13681368
# Ticket #1078: segfaults when creating an array with a sequence of
13691369
# 0d arrays.
1370-
a = np.array((np.ones(2), np.array(2)))
1370+
a = np.array((np.ones(2), np.array(2)), dtype=object)
13711371
assert_equal(a.shape, (2,))
13721372
assert_equal(a.dtype, np.dtype(object))
13731373
assert_equal(a[0], np.ones(2))
13741374
assert_equal(a[1], np.array(2))
13751375

1376-
a = np.array(((1,), np.array(1)))
1376+
a = np.array(((1,), np.array(1)), dtype=object)
13771377
assert_equal(a.shape, (2,))
13781378
assert_equal(a.dtype, np.dtype(object))
13791379
assert_equal(a[0], (1,))
13801380
assert_equal(a[1], np.array(1))
13811381

13821382
def test_array_from_sequence_scalar_array2(self):
13831383
# Ticket #1081: weird array with strange input...
1384-
t = np.array([np.array([]), np.array(0, object)])
1384+
t = np.array([np.array([]), np.array(0, object)], dtype=object)
13851385
assert_equal(t.shape, (2,))
13861386
assert_equal(t.dtype, np.dtype(object))
13871387

@@ -2290,9 +2290,10 @@ def f(x):
22902290
x[0], x[-1] = x[-1], x[0]
22912291

22922292
uf = np.frompyfunc(f, 1, 0)
2293-
a = np.array([[1, 2, 3], [4, 5], [6, 7, 8, 9]])
2293+
a = np.array([[1, 2, 3], [4, 5], [6, 7, 8, 9]], dtype=object)
22942294
assert_equal(uf(a), ())
2295-
assert_array_equal(a, [[3, 2, 1], [5, 4], [9, 7, 8, 6]])
2295+
expected = np.array([[3, 2, 1], [5, 4], [9, 7, 8, 6]], dtype=object)
2296+
assert_array_equal(a, expected)
22962297

22972298
@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
22982299
def test_leak_in_structured_dtype_comparison(self):

numpy/core/tests/test_ufunc.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,14 +1124,18 @@ def test_object_array_accumulate_inplace(self):
11241124
# Twice reproduced also for tuples:
11251125
np.add.accumulate(arr, out=arr)
11261126
np.add.accumulate(arr, out=arr)
1127-
assert_array_equal(arr, np.array([[1]*i for i in [1, 3, 6, 10]]))
1127+
assert_array_equal(arr,
1128+
np.array([[1]*i for i in [1, 3, 6, 10]], dtype=object),
1129+
)
11281130

11291131
# And the same if the axis argument is used
11301132
arr = np.ones((2, 4), dtype=object)
11311133
arr[0, :] = [[2] for i in range(4)]
11321134
np.add.accumulate(arr, out=arr, axis=-1)
11331135
np.add.accumulate(arr, out=arr, axis=-1)
1134-
assert_array_equal(arr[0, :], np.array([[2]*i for i in [1, 3, 6, 10]]))
1136+
assert_array_equal(arr[0, :],
1137+
np.array([[2]*i for i in [1, 3, 6, 10]], dtype=object),
1138+
)
11351139

11361140
def test_object_array_reduceat_inplace(self):
11371141
# Checks that in-place reduceats work, see also gh-7465

numpy/lib/tests/test_arraypad.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,24 +1262,29 @@ def test_negative_pad_width(self, pad_width, mode):
12621262
with pytest.raises(ValueError, match=match):
12631263
np.pad(arr, pad_width, mode)
12641264

1265-
@pytest.mark.parametrize("pad_width", [
1266-
"3",
1267-
"word",
1268-
None,
1269-
object(),
1270-
3.4,
1271-
((2, 3, 4), (3, 2)), # dtype=object (tuple)
1272-
complex(1, -1),
1273-
((-2.1, 3), (3, 2)),
1265+
@pytest.mark.parametrize("pad_width, dtype", [
1266+
("3", None),
1267+
("word", None),
1268+
(None, None),
1269+
(object(), None),
1270+
(3.4, None),
1271+
(((2, 3, 4), (3, 2)), object),
1272+
(complex(1, -1), None),
1273+
(((-2.1, 3), (3, 2)), None),
12741274
])
12751275
@pytest.mark.parametrize("mode", _all_modes.keys())
1276-
def test_bad_type(self, pad_width, mode):
1276+
def test_bad_type(self, pad_width, dtype, mode):
12771277
arr = np.arange(30).reshape((6, 5))
12781278
match = "`pad_width` must be of integral type."
1279-
with pytest.raises(TypeError, match=match):
1280-
np.pad(arr, pad_width, mode)
1281-
with pytest.raises(TypeError, match=match):
1282-
np.pad(arr, np.array(pad_width), mode)
1279+
if dtype is not None:
1280+
# avoid DeprecationWarning when not specifying dtype
1281+
with pytest.raises(TypeError, match=match):
1282+
np.pad(arr, np.array(pad_width, dtype=dtype), mode)
1283+
else:
1284+
with pytest.raises(TypeError, match=match):
1285+
np.pad(arr, pad_width, mode)
1286+
with pytest.raises(TypeError, match=match):
1287+
np.pad(arr, np.array(pad_width), mode)
12831288

12841289
def test_pad_width_as_ndarray(self):
12851290
a = np.arange(12)

numpy/lib/tests/test_io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ def test_unicode_and_bytes_fmt(self, fmt, iotype):
580580
def test_large_zip(self):
581581
# The test takes at least 6GB of memory, writes a file larger than 4GB
582582
test_data = np.asarray([np.random.rand(np.random.randint(50,100),4)
583-
for i in range(800000)])
583+
for i in range(800000)], dtype=object)
584584
with tempdir() as tmpdir:
585585
np.savez(os.path.join(tmpdir, 'test.npz'), test_data=test_data)
586586

numpy/ma/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2828,8 +2828,8 @@ def __new__(cls, data=None, mask=nomask, dtype=None, copy=False,
28282828
elif isinstance(data, (tuple, list)):
28292829
try:
28302830
# If data is a sequence of masked array
2831-
mask = np.array([getmaskarray(m) for m in data],
2832-
dtype=mdtype)
2831+
mask = np.array([getmaskarray(np.asanyarray(m, dtype=mdtype))
2832+
for m in data], dtype=mdtype)
28332833
except ValueError:
28342834
# If data is nested
28352835
mask = nomask

numpy/ma/tests/test_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -936,7 +936,7 @@ def test_mvoid_multidim_print(self):
936936
def test_object_with_array(self):
937937
mx1 = masked_array([1.], mask=[True])
938938
mx2 = masked_array([1., 2.])
939-
mx = masked_array([mx1, mx2], mask=[False, True])
939+
mx = masked_array([mx1, mx2], mask=[False, True], dtype=object)
940940
assert_(mx[0] is mx1)
941941
assert_(mx[1] is not mx2)
942942
assert_(np.all(mx[1].data == mx2.data))

numpy/random/tests/test_generator_mt19937_regressions.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,10 @@ def test_shuffle_mixed_dimension(self):
5656
[1, (2, 2), (3, 3), None],
5757
[(1, 1), 2, 3, None]]:
5858
mt19937 = Generator(MT19937(12345))
59-
shuffled = list(t)
59+
shuffled = np.array(t, dtype=object)
6060
mt19937.shuffle(shuffled)
61-
assert_array_equal(shuffled, [t[2], t[0], t[3], t[1]])
61+
expected = np.array([t[2], t[0], t[3], t[1]], dtype=object)
62+
assert_array_equal(np.array(shuffled, dtype=object), expected)
6263

6364
def test_call_within_randomstate(self):
6465
# Check that custom BitGenerator does not call into global state
@@ -118,7 +119,7 @@ def test_shuffle_of_array_of_objects(self):
118119
# a segfault on garbage collection.
119120
# See gh-7719
120121
mt19937 = Generator(MT19937(1234))
121-
a = np.array([np.arange(1), np.arange(4)])
122+
a = np.array([np.arange(1), np.arange(4)], dtype=object)
122123

123124
for _ in range(1000):
124125
mt19937.shuffle(a)

numpy/random/tests/test_randomstate_regression.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ def test_shuffle_mixed_dimension(self):
6868
random.seed(12345)
6969
shuffled = list(t)
7070
random.shuffle(shuffled)
71-
assert_array_equal(shuffled, [t[0], t[3], t[1], t[2]])
71+
expected = np.array([t[0], t[3], t[1], t[2]], dtype=object)
72+
assert_array_equal(np.array(shuffled, dtype=object), expected)
7273

7374
def test_call_within_randomstate(self):
7475
# Check that custom RandomState does not call into global state
@@ -128,7 +129,7 @@ def test_shuffle_of_array_of_objects(self):
128129
# a segfault on garbage collection.
129130
# See gh-7719
130131
random.seed(1234)
131-
a = np.array([np.arange(1), np.arange(4)])
132+
a = np.array([np.arange(1), np.arange(4)], dtype=object)
132133

133134
for _ in range(1000):
134135
random.shuffle(a)

numpy/random/tests/test_regression.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ def test_shuffle_mixed_dimension(self):
6666
np.random.seed(12345)
6767
shuffled = list(t)
6868
random.shuffle(shuffled)
69-
assert_array_equal(shuffled, [t[0], t[3], t[1], t[2]])
69+
expected = np.array([t[0], t[3], t[1], t[2]], dtype=object)
70+
assert_array_equal(np.array(shuffled, dtype=object), expected)
7071

7172
def test_call_within_randomstate(self):
7273
# Check that custom RandomState does not call into global state
@@ -126,7 +127,7 @@ def test_shuffle_of_array_of_objects(self):
126127
# a segfault on garbage collection.
127128
# See gh-7719
128129
np.random.seed(1234)
129-
a = np.array([np.arange(1), np.arange(4)])
130+
a = np.array([np.arange(1), np.arange(4)], dtype=object)
130131

131132
for _ in range(1000):
132133
np.random.shuffle(a)

0 commit comments

Comments
 (0)