diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index dbb24f26bc00..ce5eeb71e2ed 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -908,6 +908,9 @@ npyiter_check_per_op_flags(npy_uint32 op_flags, npyiter_opitflags *op_itflags) } *op_itflags = NPY_OP_ITFLAG_READ; + if (op_flags & NPY_ITER_UPDATEIFCOPY) { + *op_itflags |= NPY_OP_ITFLAG_CAST; + } } else if (op_flags & NPY_ITER_READWRITE) { /* The read/write flags are mutually exclusive */ @@ -2270,6 +2273,7 @@ npyiter_find_best_axis_ordering(NpyIter *iter) npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int iop, nop = NIT_NOP(iter); + npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter); npy_intp ax_i0, ax_i1, ax_ipos; npy_int8 ax_j0, ax_j1; @@ -2301,8 +2305,15 @@ npyiter_find_best_axis_ordering(NpyIter *iter) strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j1)); + /* + * Order strides, but excluding broadcasted ones, except + * if they are from an output that is being reduced and + * an external loop is enabled, since then the external + * loop can deal with masking. + */ for (iop = 0; iop < nop; ++iop) { - if (strides0[iop] != 0 && strides1[iop] != 0) { + if (strides0[iop] != 0 && strides1[iop] != 0) + { if (intp_abs(strides1[iop]) <= intp_abs(strides0[iop])) { /* @@ -2325,6 +2336,14 @@ npyiter_find_best_axis_ordering(NpyIter *iter) */ ambig = 0; } + else if ((itflags & NPY_ITFLAG_REDUCE) && + (itflags & NPY_ITFLAG_EXLOOP) && + (op_itflags[iop] & NPY_OP_ITFLAG_REDUCE) && + (strides0[iop] != 0 || strides1[iop] != 0)) { + shouldswap = (strides0[iop] == 0); + ambig = 0; + break; + } } /* * If the comparison was unambiguous, either shift diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c index 6d04ce37224a..9bda1fc0f76f 100644 --- a/numpy/core/src/umath/reduction.c +++ b/numpy/core/src/umath/reduction.c @@ -444,9 +444,9 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, /* Iterator parameters */ NpyIter *iter = NULL; - PyArrayObject *op[2]; - PyArray_Descr *op_dtypes[2]; - npy_uint32 flags, op_flags[2]; + PyArrayObject *op[3]; + PyArray_Descr *op_dtypes[3]; + npy_uint32 flags, op_flags[3]; /* More than one axis means multiple orders are possible */ if (!reorderable && count_axes(PyArray_NDIM(operand), axis_flags) > 1) { @@ -457,15 +457,6 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, return NULL; } - - /* Validate that the parameters for future expansion are NULL */ - if (wheremask != NULL) { - PyErr_SetString(PyExc_RuntimeError, - "Reduce operations in NumPy do not yet support " - "a where mask"); - return NULL; - } - /* * This either conforms 'out' to the ndim of 'operand', or allocates * a new array appropriate for this reduction. @@ -493,6 +484,13 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, Py_INCREF(op_view); } else { + /* Cannot use where when we initialize from the operand */ + if (wheremask != NULL) { + PyErr_SetString(PyExc_RuntimeError, + "Reduce operations with no idenity do not yet support " + "a where mask"); + return NULL; + } op_view = PyArray_InitializeReduceResult( result, operand, axis_flags, &skip_first_count, funcname); if (op_view == NULL) { @@ -523,9 +521,20 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, NPY_ITER_ALIGNED | NPY_ITER_NO_SUBTYPE; op_flags[1] = NPY_ITER_READONLY | - NPY_ITER_ALIGNED; + NPY_ITER_ALIGNED | + NPY_ITER_NO_BROADCAST; + if (wheremask != NULL) { + op_flags[1] |= NPY_ITER_UPDATEIFCOPY; + op[2] = wheremask; + op_dtypes[2] = PyArray_DescrFromType(NPY_BOOL); + if (op_dtypes[2] == NULL) { + goto fail; + } + op_flags[2] = NPY_ITER_READONLY | + NPY_ITER_ALIGNED; + } - iter = NpyIter_AdvancedNew(2, op, flags, + iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, flags, NPY_KEEPORDER, casting, op_flags, op_dtypes, @@ -568,7 +577,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, goto fail; } } - + /* Check whether any errors occurred during the loop */ if (PyErr_Occurred() || _check_ufunc_fperr(errormask, NULL, "reduce") < 0) { diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 5ef134ac12ab..d9d5ec16e5d3 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -3457,21 +3457,39 @@ reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr, return 0; } +static int +remove_masked_items(char *dataptr, npy_intp s_data, + char *maskptr, npy_intp s_mask, npy_intp count) +{ + int n; + char *data = dataptr, *okdata = dataptr, *mask = maskptr; + for (n = 0; n < count; n++, data += s_data, mask += s_mask) { + if (*mask) { + memcpy(okdata, data, s_data); + okdata += s_data; + } + } + return (okdata - dataptr) / s_data; +} + static int reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides, npy_intp *countptr, NpyIter_IterNextFunc *iternext, int needs_api, npy_intp skip_first_count, void *data) { - PyArray_Descr *dtypes[3], **iter_dtypes; + PyArray_Descr *dtypes[4], **iter_dtypes; PyUFuncObject *ufunc = (PyUFuncObject *)data; char *dataptrs_copy[3]; npy_intp strides_copy[3]; + npy_bool where_mask; /* The normal selected inner loop */ PyUFuncGenericFunction innerloop = NULL; void *innerloopdata = NULL; NPY_BEGIN_THREADS_DEF; + /* Get the number of operands, to determine whether "where" is used */ + where_mask = (NpyIter_GetNOp(iter) == 3); /* Get the inner loop */ iter_dtypes = NpyIter_GetDescrArray(iter); @@ -3524,6 +3542,12 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides, } while (iternext(iter)); } do { + npy_intp count = *countptr; + + if (where_mask) { + count = remove_masked_items(dataptrs[1], strides[1], + dataptrs[2], strides[2], count); + } /* Turn the two items into three for the inner loop */ dataptrs_copy[0] = dataptrs[0]; dataptrs_copy[1] = dataptrs[1]; @@ -3531,7 +3555,7 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides, strides_copy[0] = strides[0]; strides_copy[1] = strides[1]; strides_copy[2] = strides[0]; - innerloop(dataptrs_copy, countptr, + innerloop(dataptrs_copy, &count, strides_copy, innerloopdata); } while (iternext(iter)); @@ -3561,7 +3585,7 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides, static PyArrayObject * PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, int naxes, int *axes, PyArray_Descr *odtype, int keepdims, - PyObject *initial) + PyObject *initial, PyArrayObject *wheremask) { int iaxes, ndim; npy_bool reorderable; @@ -3627,7 +3651,7 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, return NULL; } - result = PyUFunc_ReduceWrapper(arr, out, NULL, dtype, dtype, + result = PyUFunc_ReduceWrapper(arr, out, wheremask, dtype, dtype, NPY_UNSAFE_CASTING, axis_flags, reorderable, keepdims, 0, @@ -4384,8 +4408,8 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, int i, naxes=0, ndim; int axes[NPY_MAXDIMS]; PyObject *axes_in = NULL; - PyArrayObject *mp = NULL, *ret = NULL; - PyObject *op; + PyArrayObject *mp = NULL, *wheremask = NULL, *ret = NULL; + PyObject *op, *where = NULL; PyObject *obj_ind, *context; PyArrayObject *indices = NULL; PyArray_Descr *otype = NULL; @@ -4393,7 +4417,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, int keepdims = 0; PyObject *initial = NULL; static char *reduce_kwlist[] = { - "array", "axis", "dtype", "out", "keepdims", "initial", NULL}; + "array", "axis", "dtype", "out", "keepdims", "initial", "where", NULL}; static char *accumulate_kwlist[] = { "array", "axis", "dtype", "out", NULL}; static char *reduceat_kwlist[] = { @@ -4456,24 +4480,43 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, } else if (operation == UFUNC_ACCUMULATE) { if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO&O&:accumulate", - accumulate_kwlist, - &op, - &axes_in, - PyArray_DescrConverter2, &otype, - PyArray_OutputConverter, &out)) { + accumulate_kwlist, + &op, + &axes_in, + PyArray_DescrConverter2, &otype, + PyArray_OutputConverter, &out)) { goto fail; } } else { - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO&O&iO:reduce", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO&O&iOO:reduce", reduce_kwlist, &op, &axes_in, PyArray_DescrConverter2, &otype, PyArray_OutputConverter, &out, - &keepdims, &initial)) { + &keepdims, &initial, &where)) { goto fail; } + /* Interpret mask */ + if (where != NULL) { + PyArray_Descr *dtype; + dtype = PyArray_DescrFromType(NPY_BOOL); + if (dtype == NULL) { + goto fail; + } + /* + * Optimization: where=True is the same as no where argument. + * This lets us document it as a default argument. + */ + if (where != Py_True) { + wheremask = (PyArrayObject *)PyArray_FromAny(where, dtype, + 0, 0, 0, NULL); + if (wheremask == NULL) { + goto fail; + } + } + } } /* Ensure input is an array */ if (!PyArray_Check(op) && !PyArray_IsScalar(op, Generic)) { @@ -4602,7 +4645,8 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, switch(operation) { case UFUNC_REDUCE: ret = PyUFunc_Reduce(ufunc, mp, out, naxes, axes, - otype, keepdims, initial); + otype, keepdims, initial, wheremask); + Py_XDECREF(wheremask); break; case UFUNC_ACCUMULATE: if (naxes != 1) { @@ -4660,6 +4704,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, fail: Py_XDECREF(otype); Py_XDECREF(mp); + Py_XDECREF(wheremask); return NULL; } diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index b83b8ccffafc..d79fd7abb90f 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -3,6 +3,8 @@ import warnings import itertools +import pytest + import numpy as np import numpy.core._umath_tests as umt import numpy.linalg._umath_linalg as uml @@ -1396,6 +1398,24 @@ def test_initial_reduction(self): res = np.add.reduce(a, initial=5) assert_equal(res, 15) + @pytest.mark.parametrize('axis', (0, 1, (0, 1))) + @pytest.mark.parametrize('where', (np.array([True, False, True]), + np.array([[True], [False], [True]]), + np.array([[True, False, True], + [False, True, False], + [False, False, True]]))) + def test_reduction_with_where(self, axis, where): + a = np.arange(9.).reshape(3, 3) + a_copy = a.copy() + a_check = np.zeros_like(a) + np.positive(a, out=a_check, where=where) + + res = np.add.reduce(a, axis=axis, where=where) + check = a_check.sum(axis) + assert_equal(res, check) + # Check we do not overwrite elements of a internally. + assert_array_equal(a, a_copy) + def test_identityless_reduction_nonreorderable(self): a = np.array([[8.0, 2.0, 2.0], [1.0, 0.5, 0.25]]) @@ -1758,7 +1778,7 @@ def test_reduce_arguments(self): # too little assert_raises(TypeError, f) # too much - assert_raises(TypeError, f, d, 0, None, None, False, 0, 1) + assert_raises(TypeError, f, d, 0, None, None, False, 0, True, 1) # invalid axis assert_raises(TypeError, f, d, "invalid") assert_raises(TypeError, f, d, axis="invalid")