From 4afeac20fd9d9e4a7946cd9fadf41a2a3476a620 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 2 Jun 2017 18:10:59 +0100 Subject: [PATCH 1/3] ENH: Add out argument to `concatenate` --- doc/release/1.14.0-notes.rst | 4 + numpy/add_newdocs.py | 6 +- numpy/core/src/multiarray/multiarraymodule.c | 234 +++++++++++-------- numpy/core/tests/test_shape_base.py | 34 +++ 4 files changed, 183 insertions(+), 95 deletions(-) diff --git a/doc/release/1.14.0-notes.rst b/doc/release/1.14.0-notes.rst index fdb1341af9c5..94626ed7822c 100644 --- a/doc/release/1.14.0-notes.rst +++ b/doc/release/1.14.0-notes.rst @@ -211,6 +211,10 @@ selected via the ``--fcompiler`` and ``--compiler`` options to supported; by default a gfortran-compatible static archive ``openblas.a`` is looked for. +``concatenate`` gained an ``out`` argument +------------------------------------------ +A preallocated buffer of the desired dtype can now be used with ``concatenate``. + Changes ======= diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 687204fc1232..c4713f16b4ea 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -1158,7 +1158,7 @@ def luf(lamdaexpr, *args, **kwargs): add_newdoc('numpy.core.multiarray', 'concatenate', """ - concatenate((a1, a2, ...), axis=0) + concatenate((a1, a2, ...), axis=0, out=None) Join a sequence of arrays along an existing axis. @@ -1169,6 +1169,10 @@ def luf(lamdaexpr, *args, **kwargs): corresponding to `axis` (the first, by default). axis : int, optional The axis along which the arrays will be joined. Default is 0. + out : ndarray, optional + If provided, the destination to place the result. The shape must be + correct, matching that of what concatenate would have returned if no + out argument were specified. Returns ------- diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index ca481a11f504..e9c34c306c67 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -320,15 +320,11 @@ PyArray_Free(PyObject *op, void *ptr) * Concatenates a list of ndarrays. */ NPY_NO_EXPORT PyArrayObject * -PyArray_ConcatenateArrays(int narrays, PyArrayObject **arrays, int axis) +PyArray_ConcatenateArrays(int narrays, PyArrayObject **arrays, int axis, + PyArrayObject* ret) { - PyTypeObject *subtype = &PyArray_Type; - double priority = NPY_PRIORITY; int iarrays, idim, ndim; - npy_intp shape[NPY_MAXDIMS], s, strides[NPY_MAXDIMS]; - int strideperm[NPY_MAXDIMS]; - PyArray_Descr *dtype = NULL; - PyArrayObject *ret = NULL; + npy_intp shape[NPY_MAXDIMS]; PyArrayObject_fields *sliding_view = NULL; if (narrays <= 0) { @@ -383,47 +379,68 @@ PyArray_ConcatenateArrays(int narrays, PyArrayObject **arrays, int axis) } } - /* Get the priority subtype for the array */ - for (iarrays = 0; iarrays < narrays; ++iarrays) { - if (Py_TYPE(arrays[iarrays]) != subtype) { - double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0); - if (pr > priority) { - priority = pr; - subtype = Py_TYPE(arrays[iarrays]); - } + if (ret != NULL) { + if (PyArray_NDIM(ret) != ndim) { + PyErr_SetString(PyExc_ValueError, + "Output array has wrong dimensionality"); + return NULL; + } + if (!PyArray_CompareLists(shape, PyArray_SHAPE(ret), ndim)) { + PyErr_SetString(PyExc_ValueError, + "Output array is the wrong shape"); + return NULL; } + Py_INCREF(ret); } + else { + npy_intp s, strides[NPY_MAXDIMS]; + int strideperm[NPY_MAXDIMS]; + PyArray_Descr *dtype = NULL; + PyTypeObject *subtype = &PyArray_Type; + double priority = NPY_PRIORITY; + + /* Get the priority subtype for the array */ + for (iarrays = 0; iarrays < narrays; ++iarrays) { + if (Py_TYPE(arrays[iarrays]) != subtype) { + double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0); + if (pr > priority) { + priority = pr; + subtype = Py_TYPE(arrays[iarrays]); + } + } + } - /* Get the resulting dtype from combining all the arrays */ - dtype = PyArray_ResultType(narrays, arrays, 0, NULL); - if (dtype == NULL) { - return NULL; - } + /* Get the resulting dtype from combining all the arrays */ + dtype = PyArray_ResultType(narrays, arrays, 0, NULL); + if (dtype == NULL) { + return NULL; + } - /* - * Figure out the permutation to apply to the strides to match - * the memory layout of the input arrays, using ambiguity - * resolution rules matching that of the NpyIter. - */ - PyArray_CreateMultiSortedStridePerm(narrays, arrays, ndim, strideperm); - s = dtype->elsize; - for (idim = ndim-1; idim >= 0; --idim) { - int iperm = strideperm[idim]; - strides[iperm] = s; - s *= shape[iperm]; - } - - /* Allocate the array for the result. This steals the 'dtype' reference. */ - ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, - dtype, - ndim, - shape, - strides, - NULL, - 0, - NULL); - if (ret == NULL) { - return NULL; + /* + * Figure out the permutation to apply to the strides to match + * the memory layout of the input arrays, using ambiguity + * resolution rules matching that of the NpyIter. + */ + PyArray_CreateMultiSortedStridePerm(narrays, arrays, ndim, strideperm); + s = dtype->elsize; + for (idim = ndim-1; idim >= 0; --idim) { + int iperm = strideperm[idim]; + strides[iperm] = s; + s *= shape[iperm]; + } + + /* Allocate the array for the result. This steals the 'dtype' reference. */ + ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, + dtype, + ndim, + shape, + strides, + NULL, + 0, + NULL); + if (ret == NULL) { + return NULL; + } } /* @@ -462,15 +479,10 @@ PyArray_ConcatenateArrays(int narrays, PyArrayObject **arrays, int axis) */ NPY_NO_EXPORT PyArrayObject * PyArray_ConcatenateFlattenedArrays(int narrays, PyArrayObject **arrays, - NPY_ORDER order) + NPY_ORDER order, PyArrayObject *ret) { - PyTypeObject *subtype = &PyArray_Type; - double priority = NPY_PRIORITY; int iarrays; - npy_intp stride; npy_intp shape = 0; - PyArray_Descr *dtype = NULL; - PyArrayObject *ret = NULL; PyArrayObject_fields *sliding_view = NULL; if (narrays <= 0) { @@ -494,36 +506,56 @@ PyArray_ConcatenateFlattenedArrays(int narrays, PyArrayObject **arrays, } } - /* Get the priority subtype for the array */ - for (iarrays = 0; iarrays < narrays; ++iarrays) { - if (Py_TYPE(arrays[iarrays]) != subtype) { - double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0); - if (pr > priority) { - priority = pr; - subtype = Py_TYPE(arrays[iarrays]); - } + if (ret != NULL) { + if (PyArray_NDIM(ret) != 1) { + PyErr_SetString(PyExc_ValueError, + "Output array must be 1D"); + return NULL; } + if (shape != PyArray_SIZE(ret)) { + PyErr_SetString(PyExc_ValueError, + "Output array is the wrong size"); + return NULL; + } + Py_INCREF(ret); } + else { + PyArray_Descr *dtype = NULL; + npy_intp stride; + double priority = NPY_PRIORITY; + PyTypeObject *subtype = &PyArray_Type; + + /* Get the priority subtype for the array */ + for (iarrays = 0; iarrays < narrays; ++iarrays) { + if (Py_TYPE(arrays[iarrays]) != subtype) { + double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0); + if (pr > priority) { + priority = pr; + subtype = Py_TYPE(arrays[iarrays]); + } + } + } - /* Get the resulting dtype from combining all the arrays */ - dtype = PyArray_ResultType(narrays, arrays, 0, NULL); - if (dtype == NULL) { - return NULL; - } - - stride = dtype->elsize; + /* Get the resulting dtype from combining all the arrays */ + dtype = PyArray_ResultType(narrays, arrays, 0, NULL); + if (dtype == NULL) { + return NULL; + } - /* Allocate the array for the result. This steals the 'dtype' reference. */ - ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, - dtype, - 1, - &shape, - &stride, - NULL, - 0, - NULL); - if (ret == NULL) { - return NULL; + stride = dtype->elsize; + + /* Allocate the array for the result. This steals the 'dtype' reference. */ + ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, + dtype, + 1, + &shape, + &stride, + NULL, + 0, + NULL); + if (ret == NULL) { + return NULL; + } } /* @@ -558,22 +590,11 @@ PyArray_ConcatenateFlattenedArrays(int narrays, PyArrayObject **arrays, return ret; } - -/*NUMPY_API - * Concatenate - * - * Concatenate an arbitrary Python sequence into an array. - * op is a python object supporting the sequence interface. - * Its elements will be concatenated together to form a single - * multidimensional array. If axis is NPY_MAXDIMS or bigger, then - * each sequence object will be flattened before concatenation -*/ NPY_NO_EXPORT PyObject * -PyArray_Concatenate(PyObject *op, int axis) +PyArray_ConcatenateInto(PyObject *op, int axis, PyArrayObject *ret) { int iarrays, narrays; PyArrayObject **arrays; - PyArrayObject *ret; if (!PySequence_Check(op)) { PyErr_SetString(PyExc_TypeError, @@ -606,10 +627,10 @@ PyArray_Concatenate(PyObject *op, int axis) } if (axis >= NPY_MAXDIMS) { - ret = PyArray_ConcatenateFlattenedArrays(narrays, arrays, NPY_CORDER); + ret = PyArray_ConcatenateFlattenedArrays(narrays, arrays, NPY_CORDER, ret); } else { - ret = PyArray_ConcatenateArrays(narrays, arrays, axis); + ret = PyArray_ConcatenateArrays(narrays, arrays, axis, ret); } for (iarrays = 0; iarrays < narrays; ++iarrays) { @@ -629,6 +650,21 @@ PyArray_Concatenate(PyObject *op, int axis) return NULL; } +/*NUMPY_API + * Concatenate + * + * Concatenate an arbitrary Python sequence into an array. + * op is a python object supporting the sequence interface. + * Its elements will be concatenated together to form a single + * multidimensional array. If axis is NPY_MAXDIMS or bigger, then + * each sequence object will be flattened before concatenation +*/ +NPY_NO_EXPORT PyObject * +PyArray_Concatenate(PyObject *op, int axis) +{ + return PyArray_ConcatenateInto(op, axis, NULL); +} + static int _signbit_set(PyArrayObject *arr) { @@ -2156,14 +2192,24 @@ static PyObject * array_concatenate(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds) { PyObject *a0; + PyObject *out = NULL; int axis = 0; - static char *kwlist[] = {"seq", "axis", NULL}; + static char *kwlist[] = {"seq", "axis", "out", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&:concatenate", kwlist, - &a0, PyArray_AxisConverter, &axis)) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&O:concatenate", kwlist, + &a0, PyArray_AxisConverter, &axis, &out)) { return NULL; } - return PyArray_Concatenate(a0, axis); + if (out != NULL) { + if (out == Py_None) { + out = NULL; + } + else if (!PyArray_Check(out)) { + PyErr_SetString(PyExc_TypeError, "'out' must be an array"); + return NULL; + } + } + return PyArray_ConcatenateInto(a0, axis, (PyArrayObject *)out); } static PyObject * diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py index d1fbe8e92556..5c1e569b7d9a 100644 --- a/numpy/core/tests/test_shape_base.py +++ b/numpy/core/tests/test_shape_base.py @@ -230,6 +230,12 @@ def test_concatenate_axis_None(self): '0', '1', '2', 'x']) assert_array_equal(r, d) + out = np.zeros(a.size + len(b)) + r = np.concatenate((a, b), axis=None) + rout = np.concatenate((a, b), axis=None, out=out) + assert_(out is rout) + assert_equal(r, rout) + def test_large_concatenate_axis_None(self): # When no axis is given, concatenate uses flattened versions. # This also had a bug with many arrays (see gh-5979). @@ -278,6 +284,34 @@ def test_concatenate(self): assert_array_equal(concatenate((a0, a1, a2), -1), res) assert_array_equal(concatenate((a0.T, a1.T, a2.T), 0), res.T) + out = res.copy() + rout = concatenate((a0, a1, a2), 2, out=out) + assert_(out is rout) + assert_equal(res, rout) + + def test_bad_out_shape(self): + a = array([1, 2]) + b = array([3, 4]) + + assert_raises(ValueError, concatenate, (a, b), out=np.empty(5)) + assert_raises(ValueError, concatenate, (a, b), out=np.empty((4,1))) + assert_raises(ValueError, concatenate, (a, b), out=np.empty((1,4))) + concatenate((a, b), out=np.empty(4)) + + def test_out_dtype(self): + out = np.empty(4, np.float32) + res = concatenate((array([1, 2]), array([3, 4])), out=out) + assert_(out is res) + + out = np.empty(4, np.complex64) + res = concatenate((array([0.1, 0.2]), array([0.3, 0.4])), out=out) + assert_(out is res) + + # invalid cast + out = np.empty(4, np.int32) + assert_raises(TypeError, concatenate, + (array([0.1, 0.2]), array([0.3, 0.4])), out=out) + def test_stack(): # non-iterable input From 57fe5d04f0e54b5ef92c7bd69b6a31ad7df0bba5 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 2 Jun 2017 18:17:31 +0100 Subject: [PATCH 2/3] MAINT: Extract duplicated priority code --- numpy/core/src/multiarray/multiarraymodule.c | 53 ++++++++++---------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index e9c34c306c67..724ae45ce127 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -315,6 +315,29 @@ PyArray_Free(PyObject *op, void *ptr) return 0; } +/* + * Get the ndarray subclass with the highest priority + */ +NPY_NO_EXPORT PyTypeObject * +PyArray_GetSubType(int narrays, PyArrayObject **arrays) { + PyTypeObject *subtype = &PyArray_Type; + double priority = NPY_PRIORITY; + int i; + + /* Get the priority subtype for the array */ + for (i = 0; i < narrays; ++i) { + if (Py_TYPE(arrays[i]) != subtype) { + double pr = PyArray_GetPriority((PyObject *)(arrays[i]), 0.0); + if (pr > priority) { + priority = pr; + subtype = Py_TYPE(arrays[i]); + } + } + } + + return subtype; +} + /* * Concatenates a list of ndarrays. @@ -395,23 +418,12 @@ PyArray_ConcatenateArrays(int narrays, PyArrayObject **arrays, int axis, else { npy_intp s, strides[NPY_MAXDIMS]; int strideperm[NPY_MAXDIMS]; - PyArray_Descr *dtype = NULL; - PyTypeObject *subtype = &PyArray_Type; - double priority = NPY_PRIORITY; /* Get the priority subtype for the array */ - for (iarrays = 0; iarrays < narrays; ++iarrays) { - if (Py_TYPE(arrays[iarrays]) != subtype) { - double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0); - if (pr > priority) { - priority = pr; - subtype = Py_TYPE(arrays[iarrays]); - } - } - } + PyTypeObject *subtype = PyArray_GetSubType(narrays, arrays); /* Get the resulting dtype from combining all the arrays */ - dtype = PyArray_ResultType(narrays, arrays, 0, NULL); + PyArray_Descr *dtype = PyArray_ResultType(narrays, arrays, 0, NULL); if (dtype == NULL) { return NULL; } @@ -520,24 +532,13 @@ PyArray_ConcatenateFlattenedArrays(int narrays, PyArrayObject **arrays, Py_INCREF(ret); } else { - PyArray_Descr *dtype = NULL; npy_intp stride; - double priority = NPY_PRIORITY; - PyTypeObject *subtype = &PyArray_Type; /* Get the priority subtype for the array */ - for (iarrays = 0; iarrays < narrays; ++iarrays) { - if (Py_TYPE(arrays[iarrays]) != subtype) { - double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0); - if (pr > priority) { - priority = pr; - subtype = Py_TYPE(arrays[iarrays]); - } - } - } + PyTypeObject *subtype = PyArray_GetSubType(narrays, arrays); /* Get the resulting dtype from combining all the arrays */ - dtype = PyArray_ResultType(narrays, arrays, 0, NULL); + PyArray_Descr *dtype = PyArray_ResultType(narrays, arrays, 0, NULL); if (dtype == NULL) { return NULL; } From 0228ebf759bda388ef2712cbca836ea39e13054e Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 2 Jul 2017 00:28:55 +0100 Subject: [PATCH 3/3] ENH: Add the out parameter to stack too --- doc/release/1.14.0-notes.rst | 8 ++++---- numpy/core/shape_base.py | 8 ++++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/doc/release/1.14.0-notes.rst b/doc/release/1.14.0-notes.rst index 94626ed7822c..11ee9c00844a 100644 --- a/doc/release/1.14.0-notes.rst +++ b/doc/release/1.14.0-notes.rst @@ -211,10 +211,10 @@ selected via the ``--fcompiler`` and ``--compiler`` options to supported; by default a gfortran-compatible static archive ``openblas.a`` is looked for. -``concatenate`` gained an ``out`` argument ------------------------------------------- -A preallocated buffer of the desired dtype can now be used with ``concatenate``. - +``concatenate`` and ``stack`` gained an ``out`` argument +-------------------------------------------------------- +A preallocated buffer of the desired dtype can now be used for the output of +these functions. Changes ======= diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py index f1847d7e3c43..026ad603a500 100644 --- a/numpy/core/shape_base.py +++ b/numpy/core/shape_base.py @@ -293,7 +293,7 @@ def hstack(tup): return _nx.concatenate(arrs, 1) -def stack(arrays, axis=0): +def stack(arrays, axis=0, out=None): """ Join a sequence of arrays along a new axis. @@ -309,6 +309,10 @@ def stack(arrays, axis=0): Each array must have the same shape. axis : int, optional The axis in the result array along which the input arrays are stacked. + out : ndarray, optional + If provided, the destination to place the result. The shape must be + correct, matching that of what stack would have returned if no + out argument were specified. Returns ------- @@ -358,7 +362,7 @@ def stack(arrays, axis=0): sl = (slice(None),) * axis + (_nx.newaxis,) expanded_arrays = [arr[sl] for arr in arrays] - return _nx.concatenate(expanded_arrays, axis=axis) + return _nx.concatenate(expanded_arrays, axis=axis, out=out) class _Recurser(object):