From 845def00c85f9f40cfa64e6dabb4158bebd502f4 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Thu, 22 Nov 2018 19:47:58 -0500 Subject: [PATCH] ENH: add back the multifield copy->view change Fixes #10409 Closes #11530 --- doc/release/1.16.0-notes.rst | 21 +++++ numpy/core/src/multiarray/arrayobject.c | 8 +- numpy/core/src/multiarray/convert.c | 16 ---- numpy/core/src/multiarray/mapping.c | 52 +----------- numpy/core/tests/test_multiarray.py | 76 +++-------------- numpy/core/tests/test_records.py | 1 - numpy/doc/structured_arrays.py | 108 ++++++++++++++---------- numpy/lib/tests/test_io.py | 10 +++ 8 files changed, 113 insertions(+), 179 deletions(-) diff --git a/doc/release/1.16.0-notes.rst b/doc/release/1.16.0-notes.rst index 8ab2cab15610..936386ddd8a5 100644 --- a/doc/release/1.16.0-notes.rst +++ b/doc/release/1.16.0-notes.rst @@ -67,6 +67,10 @@ Expired deprecations * ``np.lib.function_base.unique`` was removed, finishing a deprecation cycle begun in NumPy 1.4. Use `numpy.unique` instead. +* multi-field indexing now returns views instead of copies, finishing a + deprecation cycle begun in NumPy 1.7. The change was previously attempted in + NumPy 1.14 but reverted until now. + Compatibility notes =================== @@ -113,6 +117,23 @@ Previously, only the ``dims`` keyword argument was accepted for specification of the shape of the array to be used for unraveling. ``dims`` remains supported, but is now deprecated. +multi-field views return a view instead of a copy +------------------------------------------------- +Indexing a structured array with multiple fields, e.g., +``arr[['f1', 'f3']]``, returns a view into the original array instead of a +copy. The returned view will often have extra padding bytes corresponding to +intervening fields in the original array, unlike before, which will +affect code such as ``arr[['f1', 'f3']].view('float64')``. This change has +been planned since numpy 1.7 and such operations have emitted +``FutureWarnings`` since then and more since 1.12. + +To help users update their code to account for these changes, a number of +functions have been added to the ``numpy.lib.recfunctions`` module which +safely allow such operations. For instance, the code above can be replaced +with ``structured_to_unstructured(arr[['f1', 'f3']], dtype='float64')``. +See the "accessing multiple fields" section of the +`user guide `__. + C API changes ============= diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index 936a30426c26..97aaee93d0f5 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -656,11 +656,9 @@ array_might_be_written(PyArrayObject *obj) { const char *msg = "Numpy has detected that you (may be) writing to an array returned\n" - "by numpy.diagonal or by selecting multiple fields in a structured\n" - "array. This code will likely break in a future numpy release --\n" - "see numpy.diagonal or arrays.indexing reference docs for details.\n" - "The quick fix is to make an explicit copy (e.g., do\n" - "arr.diagonal().copy() or arr[['f0','f1']].copy())."; + "by numpy.diagonal. This code will likely break in a future numpy\n" + "release -- see numpy.diagonal docs for details. The quick fix is\n" + "to make an explicit copy (e.g., do arr.diagonal().copy())."; if (PyArray_FLAGS(obj) & NPY_ARRAY_WARN_ON_WRITE) { /* 2012-07-17, 1.7 */ if (DEPRECATE_FUTUREWARNING(msg) < 0) { diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c index e88582a514a1..7db46730881f 100644 --- a/numpy/core/src/multiarray/convert.c +++ b/numpy/core/src/multiarray/convert.c @@ -614,22 +614,6 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype) } dtype = PyArray_DESCR(self); - - if (type != NULL && !PyArray_EquivTypes(dtype, type) && - (PyArray_FLAGS(self) & NPY_ARRAY_WARN_ON_WRITE)) { - const char *msg = - "Numpy has detected that you may be viewing or writing to an array " - "returned by selecting multiple fields in a structured array. \n\n" - "This code may break in numpy 1.16 because this will return a view " - "instead of a copy -- see release notes for details."; - /* 2016-09-19, 1.12 */ - if (DEPRECATE_FUTUREWARNING(msg) < 0) { - return NULL; - } - /* Only warn once per array */ - PyArray_CLEARFLAGS(self, NPY_ARRAY_WARN_ON_WRITE); - } - flags = PyArray_FLAGS(self); Py_INCREF(dtype); diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index d371ae762298..1b05faeebf37 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -1388,55 +1388,15 @@ array_subscript_asarray(PyArrayObject *self, PyObject *op) return PyArray_EnsureAnyArray(array_subscript(self, op)); } -/* - * Helper function for _get_field_view which turns a multifield - * view into a "packed" copy, as done in numpy 1.15 and before. - * In numpy 1.16 this function should be removed. - */ -NPY_NO_EXPORT int -_multifield_view_to_copy(PyArrayObject **view) { - static PyObject *copyfunc = NULL; - PyObject *viewcopy; - - /* return a repacked copy of the view */ - npy_cache_import("numpy.lib.recfunctions", "repack_fields", ©func); - if (copyfunc == NULL) { - goto view_fail; - } - - PyArray_CLEARFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE); - viewcopy = PyObject_CallFunction(copyfunc, "O", *view); - if (viewcopy == NULL) { - goto view_fail; - } - Py_DECREF(*view); - *view = (PyArrayObject*)viewcopy; - - /* warn when writing to the copy */ - PyArray_ENABLEFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE); - return 0; - -view_fail: - Py_DECREF(*view); - *view = NULL; - return 0; -} - /* * Attempts to subscript an array using a field name or list of field names. * * If an error occurred, return 0 and set view to NULL. If the subscript is not * a string or list of strings, return -1 and set view to NULL. Otherwise * return 0 and set view to point to a new view into arr for the given fields. - * - * In numpy 1.15 and before, in the case of a list of field names the returned - * view will actually be a copy by default, with fields packed together. - * The `force_view` argument causes a view to be returned. This argument can be - * removed in 1.16 when we plan to return a view always. */ NPY_NO_EXPORT int -_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view, - int force_view) +_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view) { *view = NULL; @@ -1597,11 +1557,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view, return 0; } - /* the code below can be replaced by "return 0" in 1.16 */ - if (force_view) { - return 0; - } - return _multifield_view_to_copy(view); + return 0; } return -1; } @@ -1629,7 +1585,7 @@ array_subscript(PyArrayObject *self, PyObject *op) /* return fields if op is a string index */ if (PyDataType_HASFIELDS(PyArray_DESCR(self))) { PyArrayObject *view; - int ret = _get_field_view(self, op, &view, 0); + int ret = _get_field_view(self, op, &view); if (ret == 0){ if (view == NULL) { return NULL; @@ -1911,7 +1867,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op) /* field access */ if (PyDataType_HASFIELDS(PyArray_DESCR(self))){ PyArrayObject *view; - int ret = _get_field_view(self, ind, &view, 1); + int ret = _get_field_view(self, ind, &view); if (ret == 0){ if (view == NULL) { return -1; diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 51fe6e9efeb9..48aab3802b26 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -4979,25 +4979,9 @@ def test_field_names(self): fn2 = func('f2') b[fn2] = 3 - # In 1.16 code below can be replaced by: - # assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3)) - # assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2)) - # assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,))) - with suppress_warnings() as sup: - sup.filter(FutureWarning, - ".* selecting multiple fields .*") - - assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3)) - assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2)) - assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,))) - # view of subfield view/copy - assert_equal(b[['f1', 'f2']][0].view(('i4', 2)).tolist(), - (2, 3)) - assert_equal(b[['f2', 'f1']][0].view(('i4', 2)).tolist(), - (3, 2)) - view_dtype = [('f1', 'i4'), ('f3', [('', 'i4')])] - assert_equal(b[['f1', 'f3']][0].view(view_dtype).tolist(), - (2, (1,))) + assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3)) + assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2)) + assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,))) # non-ascii unicode field indexing is well behaved if not is_py3: @@ -5007,50 +4991,6 @@ def test_field_names(self): assert_raises(ValueError, a.__setitem__, u'\u03e0', 1) assert_raises(ValueError, a.__getitem__, u'\u03e0') - # can be removed in 1.16 - def test_field_names_deprecation(self): - - def collect_warnings(f, *args, **kwargs): - with warnings.catch_warnings(record=True) as log: - warnings.simplefilter("always") - f(*args, **kwargs) - return [w.category for w in log] - - a = np.zeros((1,), dtype=[('f1', 'i4'), - ('f2', 'i4'), - ('f3', [('sf1', 'i4')])]) - a['f1'][0] = 1 - a['f2'][0] = 2 - a['f3'][0] = (3,) - b = np.zeros((1,), dtype=[('f1', 'i4'), - ('f2', 'i4'), - ('f3', [('sf1', 'i4')])]) - b['f1'][0] = 1 - b['f2'][0] = 2 - b['f3'][0] = (3,) - - # All the different functions raise a warning, but not an error - assert_equal(collect_warnings(a[['f1', 'f2']].__setitem__, 0, (10, 20)), - [FutureWarning]) - # For <=1.12 a is not modified, but it will be in 1.13 - assert_equal(a, b) - - # Views also warn - subset = a[['f1', 'f2']] - subset_view = subset.view() - assert_equal(collect_warnings(subset_view['f1'].__setitem__, 0, 10), - [FutureWarning]) - # But the write goes through: - assert_equal(subset['f1'][0], 10) - # Only one warning per multiple field indexing, though (even if there - # are multiple views involved): - assert_equal(collect_warnings(subset['f1'].__setitem__, 0, 10), []) - - # make sure views of a multi-field index warn too - c = np.zeros(3, dtype='i8,i8,i8') - assert_equal(collect_warnings(c[['f0', 'f2']].view, 'i8,i8'), - [FutureWarning]) - def test_record_hash(self): a = np.array([(1, 2), (1, 2)], dtype='i1,i2') a.flags.writeable = False @@ -5074,6 +5014,16 @@ def test_empty_structure_creation(self): np.array([(), (), (), (), ()], dtype={'names': [], 'formats': [], 'offsets': [], 'itemsize': 12}) + def test_multifield_indexing_view(self): + a = np.ones(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u4')]) + v = a[['a', 'c']] + assert_(v.base is a) + assert_(v.dtype == np.dtype({'names': ['a', 'c'], + 'formats': ['i4', 'u4'], + 'offsets': [0, 8]})) + v[:] = (4,5) + assert_equal(a[0].item(), (4, 1, 5)) + class TestView(object): def test_basic(self): x = np.array([(1, 2, 3, 4), (5, 6, 7, 8)], diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py index 08d8865a052b..11f2e4175a9c 100644 --- a/numpy/core/tests/test_records.py +++ b/numpy/core/tests/test_records.py @@ -379,7 +379,6 @@ def test_nonwriteable_setfield(self): with assert_raises(ValueError): r.setfield([2,3], *r.dtype.fields['f']) - @pytest.mark.xfail(reason="See gh-10411, becomes real error in 1.16") def test_out_of_order_fields(self): # names in the same order, padding added to descr x = self.data[['col1', 'col2']] diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index 42711a7c0d9d..0fcdecf00e6f 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -35,26 +35,24 @@ array([('Rex', 5, 81.0), ('Fido', 5, 27.0)], dtype=[('name', 'S10'), ('age', '>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')]) >>> a[['a', 'c']] @@ -420,41 +419,58 @@ view's dtype has the same itemsize as the original array, and has fields at the same offsets as in the original array, and unindexed fields are merely missing. -In Numpy 1.15, indexing an array with a multi-field index returns a copy of -the result above for 1.16, but with fields packed together in memory as if -passed through :func:`numpy.lib.recfunctions.repack_fields`. This is the -behavior since Numpy 1.7. - .. warning:: - The new behavior in Numpy 1.16 leads to extra "padding" bytes at the - location of unindexed fields. You will need to update any code which depends - on the data having a "packed" layout. For instance code such as:: + In Numpy 1.15, indexing an array with a multi-field index returned a copy of + the result above, but with fields packed together in memory as if + passed through :func:`numpy.lib.recfunctions.repack_fields`. + + The new behavior as of Numpy 1.16 leads to extra "padding" bytes at the + location of unindexed fields compared to 1.15. You will need to update any + code which depends on the data having a "packed" layout. For instance code + such as:: + + >>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')]) + >>> a[['a','c']].view('i8') # Fails in Numpy 1.16 + ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype + + will need to be changed. This code has raised a ``FutureWarning`` since + Numpy 1.12, and similar code has raised ``FutureWarning`` since 1.7. + + In 1.16 a number of functions have been introduced in the + :module:`numpy.lib.recfunctions` module to help users account for this + change. These are + :func:`numpy.lib.recfunctions.repack_fields`. + :func:`numpy.lib.recfunctions.structured_to_unstructured`, + :func:`numpy.lib.recfunctions.unstructured_to_structured`, + :func:`numpy.lib.recfunctions.apply_along_fields`, + :func:`numpy.lib.recfunctions.assign_fields_by_name`, and + :func:`numpy.lib.recfunctions.require_fields`. - >>> a[['a','c']].view('i8') # will fail in Numpy 1.16 - ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype + The function :func:`numpy.lib.recfunctions.repack_fields` can always be + used to reproduce the old behavior, as it will return a packed copy of the + structured array. The code above, for example, can be replaced with: - will need to be changed. This code has raised a ``FutureWarning`` since - Numpy 1.12. + >>> repack_fields(a[['a','c']]).view('i8') # supported in 1.16 + array([0, 0, 0]) - The following is a recommended fix, which will behave identically in Numpy - 1.15 and Numpy 1.16:: + Furthermore, numpy now provides a new function + :func:`numpy.lib.recfunctions.structured_to_unstructured` which is a safer + and more efficient alternative for users who wish to convert structured + arrays to unstructured arrays, as the view above is often indeded to do. + This function allows safe conversion to an unstructured type taking into + account padding, often avoids a copy, and also casts the datatypes + as needed, unlike the view. Code such as: - >>> from numpy.lib.recfunctions import repack_fields - >>> repack_fields(a[['a','c']]).view('i8') # supported 1.15 and 1.16 - array([0, 0, 0]) + >>> a = np.zeros(3, dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')]) + >>> a[['x', 'z']].view('f4') - The :module:`numpy.lib.recfunctions` module has other new methods - introduced in numpy 1.16 to help users account for this change. These are - :func:`numpy.lib.recfunctions.structured_to_unstructured`, - :func:`numpy.lib.recfunctions.unstructured_to_structured`, - :func:`numpy.lib.recfunctions.apply_along_fields`, - :func:`numpy.lib.recfunctions.assign_fields_by_name`, and - :func:`numpy.lib.recfunctions.require_fields`. + can be made safer by replacing with: + + >>> structured_to_unstructured(a[['x', 'z']]) + array([0, 0, 0]) -Assigning to an array with a multi-field index will behave the same in Numpy -1.15 and Numpy 1.16. In both versions the assignment will modify the original -array:: +Assignment to an array with a multi-field index modifies the original array:: >>> a[['a', 'c']] = (2, 3) >>> a diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index b746937b9655..f2d5d4ab5c96 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -355,6 +355,16 @@ def test_record(self): c.seek(0) assert_equal(c.readlines(), [b'1 2\n', b'3 4\n']) + @pytest.mark.skipif(Path is None, reason="No pathlib.Path") + def test_multifield_view(self): + a = np.ones(1, dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'f4')]) + v = a[['x', 'z']] + with temppath(suffix='.npy') as path: + path = Path(path) + np.save(path, v) + data = np.load(path) + assert_array_equal(data, v) + def test_delimiter(self): a = np.array([[1., 2.], [3., 4.]]) c = BytesIO()