From 845def00c85f9f40cfa64e6dabb4158bebd502f4 Mon Sep 17 00:00:00 2001
From: Allan Haldane <allan.haldane@gmail.com>
Date: Thu, 22 Nov 2018 19:47:58 -0500
Subject: [PATCH] ENH: add back the multifield copy->view change

Fixes #10409
Closes #11530
---
 doc/release/1.16.0-notes.rst            |  21 +++++
 numpy/core/src/multiarray/arrayobject.c |   8 +-
 numpy/core/src/multiarray/convert.c     |  16 ----
 numpy/core/src/multiarray/mapping.c     |  52 +-----------
 numpy/core/tests/test_multiarray.py     |  76 +++--------------
 numpy/core/tests/test_records.py        |   1 -
 numpy/doc/structured_arrays.py          | 108 ++++++++++++++----------
 numpy/lib/tests/test_io.py              |  10 +++
 8 files changed, 113 insertions(+), 179 deletions(-)

diff --git a/doc/release/1.16.0-notes.rst b/doc/release/1.16.0-notes.rst
index 8ab2cab15610..936386ddd8a5 100644
--- a/doc/release/1.16.0-notes.rst
+++ b/doc/release/1.16.0-notes.rst
@@ -67,6 +67,10 @@ Expired deprecations
 * ``np.lib.function_base.unique`` was removed, finishing a deprecation cycle
   begun in NumPy 1.4. Use `numpy.unique` instead.
 
+* multi-field indexing now returns views instead of copies, finishing a
+  deprecation cycle begun in NumPy 1.7. The change was previously attempted in
+  NumPy 1.14 but reverted until now.
+
 Compatibility notes
 ===================
 
@@ -113,6 +117,23 @@ Previously, only the ``dims`` keyword argument was accepted
 for specification of the shape of the array to be used
 for unraveling. ``dims`` remains supported, but is now deprecated.
 
+multi-field views return a view instead of a copy
+-------------------------------------------------
+Indexing a structured array with multiple fields, e.g.,
+``arr[['f1', 'f3']]``, returns a view into the original array instead of a
+copy. The returned view will often have extra padding bytes corresponding to
+intervening fields in the original array, unlike before, which will
+affect code such as ``arr[['f1', 'f3']].view('float64')``. This change has
+been planned since numpy 1.7 and such operations have emitted
+``FutureWarnings`` since then and more since 1.12.
+
+To help users update their code to account for these changes, a number of
+functions have been added to the ``numpy.lib.recfunctions`` module which
+safely allow such operations. For instance, the code above can be replaced
+with ``structured_to_unstructured(arr[['f1', 'f3']], dtype='float64')``.
+See the "accessing multiple fields" section of the
+`user guide <https://docs.scipy.org/doc/numpy/user/basics.rec.html>`__.
+
 
 C API changes
 =============
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 936a30426c26..97aaee93d0f5 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -656,11 +656,9 @@ array_might_be_written(PyArrayObject *obj)
 {
     const char *msg =
         "Numpy has detected that you (may be) writing to an array returned\n"
-        "by numpy.diagonal or by selecting multiple fields in a structured\n"
-        "array. This code will likely break in a future numpy release --\n"
-        "see numpy.diagonal or arrays.indexing reference docs for details.\n"
-        "The quick fix is to make an explicit copy (e.g., do\n"
-        "arr.diagonal().copy() or arr[['f0','f1']].copy()).";
+        "by numpy.diagonal. This code will likely break in a future numpy\n"
+        "release -- see numpy.diagonal docs for details. The quick fix is\n"
+        "to make an explicit copy (e.g., do arr.diagonal().copy()).";
     if (PyArray_FLAGS(obj) & NPY_ARRAY_WARN_ON_WRITE) {
         /* 2012-07-17, 1.7 */
         if (DEPRECATE_FUTUREWARNING(msg) < 0) {
diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c
index e88582a514a1..7db46730881f 100644
--- a/numpy/core/src/multiarray/convert.c
+++ b/numpy/core/src/multiarray/convert.c
@@ -614,22 +614,6 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype)
     }
 
     dtype = PyArray_DESCR(self);
-
-    if (type != NULL && !PyArray_EquivTypes(dtype, type) &&
-            (PyArray_FLAGS(self) & NPY_ARRAY_WARN_ON_WRITE)) {
-        const char *msg =
-            "Numpy has detected that you may be viewing or writing to an array "
-            "returned by selecting multiple fields in a structured array. \n\n"
-            "This code may break in numpy 1.16 because this will return a view "
-            "instead of a copy -- see release notes for details.";
-        /* 2016-09-19, 1.12 */
-        if (DEPRECATE_FUTUREWARNING(msg) < 0) {
-            return NULL;
-        }
-        /* Only warn once per array */
-        PyArray_CLEARFLAGS(self, NPY_ARRAY_WARN_ON_WRITE);
-    }
-
     flags = PyArray_FLAGS(self);
 
     Py_INCREF(dtype);
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index d371ae762298..1b05faeebf37 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -1388,55 +1388,15 @@ array_subscript_asarray(PyArrayObject *self, PyObject *op)
     return PyArray_EnsureAnyArray(array_subscript(self, op));
 }
 
-/*
- * Helper function for _get_field_view which turns a multifield
- * view into a "packed" copy, as done in numpy 1.15 and before.
- * In numpy 1.16 this function should be removed.
- */
-NPY_NO_EXPORT int
-_multifield_view_to_copy(PyArrayObject **view) {
-    static PyObject *copyfunc = NULL;
-    PyObject *viewcopy;
-
-    /* return a repacked copy of the view */
-    npy_cache_import("numpy.lib.recfunctions", "repack_fields", &copyfunc);
-    if (copyfunc == NULL) {
-        goto view_fail;
-    }
-
-    PyArray_CLEARFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE);
-    viewcopy = PyObject_CallFunction(copyfunc, "O", *view);
-    if (viewcopy == NULL) {
-        goto view_fail;
-    }
-    Py_DECREF(*view);
-    *view = (PyArrayObject*)viewcopy;
-
-    /* warn when writing to the copy */
-    PyArray_ENABLEFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE);
-    return 0;
-
-view_fail:
-    Py_DECREF(*view);
-    *view = NULL;
-    return 0;
-}
-
 /*
  * Attempts to subscript an array using a field name or list of field names.
  *
  * If an error occurred, return 0 and set view to NULL. If the subscript is not
  * a string or list of strings, return -1 and set view to NULL. Otherwise
  * return 0 and set view to point to a new view into arr for the given fields.
- *
- * In numpy 1.15 and before, in the case of a list of field names the returned
- * view will actually be a copy by default, with fields packed together.
- * The `force_view` argument causes a view to be returned. This argument can be
- * removed in 1.16 when we plan to return a view always.
  */
 NPY_NO_EXPORT int
-_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view,
-                int force_view)
+_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
 {
     *view = NULL;
 
@@ -1597,11 +1557,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view,
             return 0;
         }
 
-        /* the code below can be replaced by "return 0" in 1.16 */
-        if (force_view) {
-            return 0;
-        }
-        return _multifield_view_to_copy(view);
+        return 0;
     }
     return -1;
 }
@@ -1629,7 +1585,7 @@ array_subscript(PyArrayObject *self, PyObject *op)
     /* return fields if op is a string index */
     if (PyDataType_HASFIELDS(PyArray_DESCR(self))) {
         PyArrayObject *view;
-        int ret = _get_field_view(self, op, &view, 0);
+        int ret = _get_field_view(self, op, &view);
         if (ret == 0){
             if (view == NULL) {
                 return NULL;
@@ -1911,7 +1867,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
     /* field access */
     if (PyDataType_HASFIELDS(PyArray_DESCR(self))){
         PyArrayObject *view;
-        int ret = _get_field_view(self, ind, &view, 1);
+        int ret = _get_field_view(self, ind, &view);
         if (ret == 0){
             if (view == NULL) {
                 return -1;
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 51fe6e9efeb9..48aab3802b26 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -4979,25 +4979,9 @@ def test_field_names(self):
             fn2 = func('f2')
             b[fn2] = 3
 
-            # In 1.16 code below can be replaced by:
-            # assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
-            # assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
-            # assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
-            with suppress_warnings() as sup:
-                sup.filter(FutureWarning,
-                           ".* selecting multiple fields .*")
-
-                assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
-                assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
-                assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
-                # view of subfield view/copy
-                assert_equal(b[['f1', 'f2']][0].view(('i4', 2)).tolist(),
-                             (2, 3))
-                assert_equal(b[['f2', 'f1']][0].view(('i4', 2)).tolist(),
-                             (3, 2))
-                view_dtype = [('f1', 'i4'), ('f3', [('', 'i4')])]
-                assert_equal(b[['f1', 'f3']][0].view(view_dtype).tolist(),
-                             (2, (1,)))
+            assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
+            assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
+            assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
 
         # non-ascii unicode field indexing is well behaved
         if not is_py3:
@@ -5007,50 +4991,6 @@ def test_field_names(self):
             assert_raises(ValueError, a.__setitem__, u'\u03e0', 1)
             assert_raises(ValueError, a.__getitem__, u'\u03e0')
 
-    # can be removed in 1.16
-    def test_field_names_deprecation(self):
-
-        def collect_warnings(f, *args, **kwargs):
-            with warnings.catch_warnings(record=True) as log:
-                warnings.simplefilter("always")
-                f(*args, **kwargs)
-            return [w.category for w in log]
-
-        a = np.zeros((1,), dtype=[('f1', 'i4'),
-                                  ('f2', 'i4'),
-                                  ('f3', [('sf1', 'i4')])])
-        a['f1'][0] = 1
-        a['f2'][0] = 2
-        a['f3'][0] = (3,)
-        b = np.zeros((1,), dtype=[('f1', 'i4'),
-                                  ('f2', 'i4'),
-                                  ('f3', [('sf1', 'i4')])])
-        b['f1'][0] = 1
-        b['f2'][0] = 2
-        b['f3'][0] = (3,)
-
-        # All the different functions raise a warning, but not an error
-        assert_equal(collect_warnings(a[['f1', 'f2']].__setitem__, 0, (10, 20)),
-                     [FutureWarning])
-        # For <=1.12 a is not modified, but it will be in 1.13
-        assert_equal(a, b)
-
-        # Views also warn
-        subset = a[['f1', 'f2']]
-        subset_view = subset.view()
-        assert_equal(collect_warnings(subset_view['f1'].__setitem__, 0, 10),
-                     [FutureWarning])
-        # But the write goes through:
-        assert_equal(subset['f1'][0], 10)
-        # Only one warning per multiple field indexing, though (even if there
-        # are multiple views involved):
-        assert_equal(collect_warnings(subset['f1'].__setitem__, 0, 10), [])
-
-        # make sure views of a multi-field index warn too
-        c = np.zeros(3, dtype='i8,i8,i8')
-        assert_equal(collect_warnings(c[['f0', 'f2']].view, 'i8,i8'),
-                     [FutureWarning])
-
     def test_record_hash(self):
         a = np.array([(1, 2), (1, 2)], dtype='i1,i2')
         a.flags.writeable = False
@@ -5074,6 +5014,16 @@ def test_empty_structure_creation(self):
         np.array([(), (), (), (), ()], dtype={'names': [], 'formats': [],
                                            'offsets': [], 'itemsize': 12})
 
+    def test_multifield_indexing_view(self):
+        a = np.ones(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u4')])
+        v = a[['a', 'c']]
+        assert_(v.base is a)
+        assert_(v.dtype == np.dtype({'names': ['a', 'c'],
+                                     'formats': ['i4', 'u4'],
+                                     'offsets': [0, 8]}))
+        v[:] = (4,5)
+        assert_equal(a[0].item(), (4, 1, 5))
+
 class TestView(object):
     def test_basic(self):
         x = np.array([(1, 2, 3, 4), (5, 6, 7, 8)],
diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py
index 08d8865a052b..11f2e4175a9c 100644
--- a/numpy/core/tests/test_records.py
+++ b/numpy/core/tests/test_records.py
@@ -379,7 +379,6 @@ def test_nonwriteable_setfield(self):
         with assert_raises(ValueError):
             r.setfield([2,3], *r.dtype.fields['f'])
 
-    @pytest.mark.xfail(reason="See gh-10411, becomes real error in 1.16")
     def test_out_of_order_fields(self):
         # names in the same order, padding added to descr
         x = self.data[['col1', 'col2']]
diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py
index 42711a7c0d9d..0fcdecf00e6f 100644
--- a/numpy/doc/structured_arrays.py
+++ b/numpy/doc/structured_arrays.py
@@ -35,26 +35,24 @@
  array([('Rex', 5, 81.0), ('Fido', 5, 27.0)],
        dtype=[('name', 'S10'), ('age', '<i4'), ('weight', '<f4')])
 
-Structured arrays are designed for low-level manipulation of structured data,
-for example, for interpreting binary blobs. Structured datatypes are
-designed to mimic 'structs' in the C language, making them also useful for
-interfacing with C code. For these purposes, numpy supports specialized
-features such as subarrays and nested datatypes, and allows manual control over
-the memory layout of the structure.
-
-For simple manipulation of tabular data other pydata projects, such as pandas,
-xarray, or DataArray, provide higher-level interfaces that may be more
-suitable. These projects may also give better performance for tabular data
-analysis because the C-struct-like memory layout of structured arrays can lead
-to poor cache behavior.
+Structured datatypes are designed to be able to mimic 'structs' in the C
+language, and share a similar memory layout. They are meant for interfacing with
+C code and for low-level manipulation of structured buffers, for example for
+interpreting binary blobs. For these purposes they support specialized features
+such as subarrays, nested datatypes, and unions, and allow control over the
+memory layout of the structure.
+
+Users looking to manipulate tabular data, such as stored in csv files, may find
+other pydata projects more suitable, such as xarray, pandas, or DataArray.
+These provide a high-level interface for tabular data analysis and are better
+optimized for that use. For instance, the C-struct-like memory layout of
+structured arrays in numpy can lead to poor cache behavior in comparison.
 
 .. _defining-structured-types:
 
 Structured Datatypes
 ====================
 
-To use structured arrays one first needs to define a structured datatype.
-
 A structured datatype can be thought of as a sequence of bytes of a certain
 length (the structure's :term:`itemsize`) which is interpreted as a collection
 of fields. Each field has a name, a datatype, and a byte offset within the
@@ -180,7 +178,9 @@
  mappingproxy({'x': (dtype('int64'), 0), 'y': (dtype('float32'), 8)})
 
 Both the ``names`` and ``fields`` attributes will equal ``None`` for
-unstructured arrays.
+unstructured arrays. The recommended way to test if a dtype is structured is
+with `if dt.names is not None` rather than `if dt.names`, to account for dtypes
+with 0 fields.
 
 The string representation of a structured datatype is shown in the "list of
 tuples" form if possible, otherwise numpy falls back to using the more general
@@ -404,11 +404,10 @@
 the index is a list of field names.
 
 .. warning::
-    The behavior of multi-field indexes will change from Numpy 1.15 to Numpy
-    1.16.
+    The behavior of multi-field indexes changed from Numpy 1.15 to Numpy 1.16.
 
-In Numpy 1.16, the result of indexing with a multi-field index will be a view
-into the original array, as follows::
+The result of indexing with a multi-field index is a view into the original
+array, as follows::
 
  >>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')])
  >>> a[['a', 'c']]
@@ -420,41 +419,58 @@
 view's dtype has the same itemsize as the original array, and has fields at the
 same offsets as in the original array, and unindexed fields are merely missing.
 
-In Numpy 1.15, indexing an array with a multi-field index returns a copy of
-the result above for 1.16, but with fields packed together in memory as if
-passed through :func:`numpy.lib.recfunctions.repack_fields`. This is the
-behavior since Numpy 1.7.
-
 .. warning::
-   The new behavior in Numpy 1.16 leads to extra "padding" bytes at the
-   location of unindexed fields. You will need to update any code which depends
-   on the data having a "packed" layout. For instance code such as::
+    In Numpy 1.15, indexing an array with a multi-field index returned a copy of
+    the result above, but with fields packed together in memory as if
+    passed through :func:`numpy.lib.recfunctions.repack_fields`.
+
+    The new behavior as of Numpy 1.16 leads to extra "padding" bytes at the
+    location of unindexed fields compared to 1.15. You will need to update any
+    code which depends on the data having a "packed" layout. For instance code
+    such as::
+
+     >>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')])
+     >>> a[['a','c']].view('i8')  # Fails in Numpy 1.16
+     ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype
+
+    will need to be changed. This code has raised a ``FutureWarning`` since
+    Numpy 1.12, and similar code has raised ``FutureWarning`` since 1.7.
+
+    In 1.16 a number of functions have been introduced in the
+    :module:`numpy.lib.recfunctions` module to help users account for this
+    change. These are
+    :func:`numpy.lib.recfunctions.repack_fields`.
+    :func:`numpy.lib.recfunctions.structured_to_unstructured`,
+    :func:`numpy.lib.recfunctions.unstructured_to_structured`,
+    :func:`numpy.lib.recfunctions.apply_along_fields`,
+    :func:`numpy.lib.recfunctions.assign_fields_by_name`,  and
+    :func:`numpy.lib.recfunctions.require_fields`.
 
-    >>> a[['a','c']].view('i8')  # will fail in Numpy 1.16
-    ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype
+    The function :func:`numpy.lib.recfunctions.repack_fields` can always be
+    used to reproduce the old behavior, as it will return a packed copy of the
+    structured array. The code above, for example, can be replaced with:
 
-   will need to be changed. This code has raised a ``FutureWarning`` since
-   Numpy 1.12.
+     >>> repack_fields(a[['a','c']]).view('i8')  # supported in 1.16
+     array([0, 0, 0])
 
-   The following is a recommended fix, which will behave identically in Numpy
-   1.15 and Numpy 1.16::
+    Furthermore, numpy now provides a new function
+    :func:`numpy.lib.recfunctions.structured_to_unstructured` which is a safer
+    and more efficient alternative for users who wish to convert structured
+    arrays to unstructured arrays, as the view above is often indeded to do.
+    This function allows safe conversion to an unstructured type taking into
+    account padding, often avoids a copy, and also casts the datatypes
+    as needed, unlike the view. Code such as:
 
-    >>> from numpy.lib.recfunctions import repack_fields
-    >>> repack_fields(a[['a','c']]).view('i8')  # supported 1.15 and 1.16
-    array([0, 0, 0])
+     >>> a = np.zeros(3, dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')])
+     >>> a[['x', 'z']].view('f4')
 
-    The :module:`numpy.lib.recfunctions` module has other new methods
-    introduced in numpy 1.16 to help users account for this change. These are
-    :func:`numpy.lib.recfunctions.structured_to_unstructured`,
-    :func:`numpy.lib.recfunctions.unstructured_to_structured`, 
-    :func:`numpy.lib.recfunctions.apply_along_fields`, 
-    :func:`numpy.lib.recfunctions.assign_fields_by_name`,  and
-    :func:`numpy.lib.recfunctions.require_fields`.
+    can be made safer by replacing with:
+
+     >>> structured_to_unstructured(a[['x', 'z']])
+     array([0, 0, 0])
 
 
-Assigning to an array with a multi-field index will behave the same in Numpy
-1.15 and Numpy 1.16. In both versions the assignment will modify the original
-array::
+Assignment to an array with a multi-field index modifies the original array::
 
  >>> a[['a', 'c']] = (2, 3)
  >>> a
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index b746937b9655..f2d5d4ab5c96 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -355,6 +355,16 @@ def test_record(self):
         c.seek(0)
         assert_equal(c.readlines(), [b'1 2\n', b'3 4\n'])
 
+    @pytest.mark.skipif(Path is None, reason="No pathlib.Path")
+    def test_multifield_view(self):
+        a = np.ones(1, dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'f4')])
+        v = a[['x', 'z']]
+        with temppath(suffix='.npy') as path:
+            path = Path(path)
+            np.save(path, v)
+            data = np.load(path)
+            assert_array_equal(data, v)
+
     def test_delimiter(self):
         a = np.array([[1., 2.], [3., 4.]])
         c = BytesIO()