From 3bb3dc969158973d9588d17a8756158340adf418 Mon Sep 17 00:00:00 2001 From: Marten van Kerkwijk Date: Sun, 10 Dec 2017 14:32:40 -0500 Subject: [PATCH 1/7] ENH: allow where to be recognized in reduce operations. --- numpy/core/src/umath/ufunc_object.c | 47 +++++++++++++++++++++-------- numpy/core/tests/test_ufunc.py | 2 +- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 5ef134ac12ab..7f1709a051d0 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -3561,7 +3561,7 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides, static PyArrayObject * PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, int naxes, int *axes, PyArray_Descr *odtype, int keepdims, - PyObject *initial) + PyObject *initial, PyArrayObject *wheremask) { int iaxes, ndim; npy_bool reorderable; @@ -3627,7 +3627,7 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, return NULL; } - result = PyUFunc_ReduceWrapper(arr, out, NULL, dtype, dtype, + result = PyUFunc_ReduceWrapper(arr, out, wheremask, dtype, dtype, NPY_UNSAFE_CASTING, axis_flags, reorderable, keepdims, 0, @@ -4384,8 +4384,8 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, int i, naxes=0, ndim; int axes[NPY_MAXDIMS]; PyObject *axes_in = NULL; - PyArrayObject *mp = NULL, *ret = NULL; - PyObject *op; + PyArrayObject *mp = NULL, *wheremask = NULL, *ret = NULL; + PyObject *op, *where = NULL; PyObject *obj_ind, *context; PyArrayObject *indices = NULL; PyArray_Descr *otype = NULL; @@ -4393,7 +4393,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, int keepdims = 0; PyObject *initial = NULL; static char *reduce_kwlist[] = { - "array", "axis", "dtype", "out", "keepdims", "initial", NULL}; + "array", "axis", "dtype", "out", "keepdims", "initial", "where", NULL}; static char *accumulate_kwlist[] = { "array", "axis", "dtype", "out", NULL}; static char *reduceat_kwlist[] = { @@ -4456,24 +4456,43 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, } else if (operation == UFUNC_ACCUMULATE) { if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO&O&:accumulate", - accumulate_kwlist, - &op, - &axes_in, - PyArray_DescrConverter2, &otype, - PyArray_OutputConverter, &out)) { + accumulate_kwlist, + &op, + &axes_in, + PyArray_DescrConverter2, &otype, + PyArray_OutputConverter, &out)) { goto fail; } } else { - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO&O&iO:reduce", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO&O&iOO:reduce", reduce_kwlist, &op, &axes_in, PyArray_DescrConverter2, &otype, PyArray_OutputConverter, &out, - &keepdims, &initial)) { + &keepdims, &initial, &where)) { goto fail; } + /* Interpret mask */ + if (where != NULL) { + PyArray_Descr *dtype; + dtype = PyArray_DescrFromType(NPY_BOOL); + if (dtype == NULL) { + goto fail; + } + /* + * Optimization: where=True is the same as no where argument. + * This lets us document it as a default argument. + */ + if (where != Py_True) { + wheremask = (PyArrayObject *)PyArray_FromAny(where, dtype, + 0, 0, 0, NULL); + if (wheremask == NULL) { + goto fail; + } + } + } } /* Ensure input is an array */ if (!PyArray_Check(op) && !PyArray_IsScalar(op, Generic)) { @@ -4602,7 +4621,8 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, switch(operation) { case UFUNC_REDUCE: ret = PyUFunc_Reduce(ufunc, mp, out, naxes, axes, - otype, keepdims, initial); + otype, keepdims, initial, wheremask); + Py_XDECREF(wheremask); break; case UFUNC_ACCUMULATE: if (naxes != 1) { @@ -4660,6 +4680,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, fail: Py_XDECREF(otype); Py_XDECREF(mp); + Py_XDECREF(wheremask); return NULL; } diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index b83b8ccffafc..aeff92bea0d5 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -1758,7 +1758,7 @@ def test_reduce_arguments(self): # too little assert_raises(TypeError, f) # too much - assert_raises(TypeError, f, d, 0, None, None, False, 0, 1) + assert_raises(TypeError, f, d, 0, None, None, False, 0, True, 1) # invalid axis assert_raises(TypeError, f, d, "invalid") assert_raises(TypeError, f, d, axis="invalid") From 7d2ed2e5fff3350e228d5d45ac23f26546d71bec Mon Sep 17 00:00:00 2001 From: Marten van Kerkwijk Date: Sun, 10 Dec 2017 16:51:20 -0500 Subject: [PATCH 2/7] Attempt at Masking - does not work! The iterator masking is unsuited, as it prevents writing back an element to an array, while what is needed is to skip even reading/operating on an element. Might need a double internal loop, where the operand is filled in with an identity whenever the mask is set. --- numpy/core/src/umath/reduction.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c index 6d04ce37224a..1a38ff14cf1b 100644 --- a/numpy/core/src/umath/reduction.c +++ b/numpy/core/src/umath/reduction.c @@ -444,9 +444,9 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, /* Iterator parameters */ NpyIter *iter = NULL; - PyArrayObject *op[2]; - PyArray_Descr *op_dtypes[2]; - npy_uint32 flags, op_flags[2]; + PyArrayObject *op[3]; + PyArray_Descr *op_dtypes[3]; + npy_uint32 flags, op_flags[3]; /* More than one axis means multiple orders are possible */ if (!reorderable && count_axes(PyArray_NDIM(operand), axis_flags) > 1) { @@ -457,15 +457,6 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, return NULL; } - - /* Validate that the parameters for future expansion are NULL */ - if (wheremask != NULL) { - PyErr_SetString(PyExc_RuntimeError, - "Reduce operations in NumPy do not yet support " - "a where mask"); - return NULL; - } - /* * This either conforms 'out' to the ndim of 'operand', or allocates * a new array appropriate for this reduction. @@ -523,9 +514,16 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, NPY_ITER_ALIGNED | NPY_ITER_NO_SUBTYPE; op_flags[1] = NPY_ITER_READONLY | - NPY_ITER_ALIGNED; + NPY_ITER_ALIGNED | + NPY_ITER_NO_BROADCAST; + if (wheremask != NULL) { + op[2] = wheremask; + op_dtypes[2] = NULL; + op_flags[2] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK; + op_flags[0] |= NPY_ITER_WRITEMASKED; + } - iter = NpyIter_AdvancedNew(2, op, flags, + iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, flags, NPY_KEEPORDER, casting, op_flags, op_dtypes, @@ -568,7 +566,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, goto fail; } } - + /* Check whether any errors occurred during the loop */ if (PyErr_Occurred() || _check_ufunc_fperr(errormask, NULL, "reduce") < 0) { From 7401abca1946f33b075cef717c869a1c10271a34 Mon Sep 17 00:00:00 2001 From: Marten van Kerkwijk Date: Sun, 30 Dec 2018 22:13:44 -0500 Subject: [PATCH 3/7] Hack to force buffering with NPY_ITER_READONLY|NPY_ITER_UPDATEIFCOPY --- numpy/core/src/multiarray/nditer_constr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index dbb24f26bc00..97187a75ee67 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -908,6 +908,9 @@ npyiter_check_per_op_flags(npy_uint32 op_flags, npyiter_opitflags *op_itflags) } *op_itflags = NPY_OP_ITFLAG_READ; + if (op_flags & NPY_ITER_UPDATEIFCOPY) { + *op_itflags |= NPY_OP_ITFLAG_CAST; + } } else if (op_flags & NPY_ITER_READWRITE) { /* The read/write flags are mutually exclusive */ From 59ad0df0f5158840cda61a7acb24ce7a7ee7b6b5 Mon Sep 17 00:00:00 2001 From: Marten van Kerkwijk Date: Sun, 30 Dec 2018 22:14:51 -0500 Subject: [PATCH 4/7] Buffer when using where, and set masked elements to identity. --- numpy/core/src/umath/reduction.c | 29 +++++++++++++++++++++------- numpy/core/src/umath/ufunc_object.c | 30 ++++++++++++++++++++++++++--- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c index 1a38ff14cf1b..708efaccb555 100644 --- a/numpy/core/src/umath/reduction.c +++ b/numpy/core/src/umath/reduction.c @@ -444,9 +444,9 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, /* Iterator parameters */ NpyIter *iter = NULL; - PyArrayObject *op[3]; - PyArray_Descr *op_dtypes[3]; - npy_uint32 flags, op_flags[3]; + PyArrayObject *op[4]; + PyArray_Descr *op_dtypes[4]; + npy_uint32 flags, op_flags[4]; /* More than one axis means multiple orders are possible */ if (!reorderable && count_axes(PyArray_NDIM(operand), axis_flags) > 1) { @@ -484,6 +484,13 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, Py_INCREF(op_view); } else { + /* Cannot use where when we initialize from the operand */ + if (wheremask != NULL) { + PyErr_SetString(PyExc_RuntimeError, + "Reduce operations with no idenity do not yet support " + "a where mask"); + return NULL; + } op_view = PyArray_InitializeReduceResult( result, operand, axis_flags, &skip_first_count, funcname); if (op_view == NULL) { @@ -517,13 +524,21 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, NPY_ITER_ALIGNED | NPY_ITER_NO_BROADCAST; if (wheremask != NULL) { + op_flags[1] |= NPY_ITER_UPDATEIFCOPY; op[2] = wheremask; - op_dtypes[2] = NULL; - op_flags[2] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK; - op_flags[0] |= NPY_ITER_WRITEMASKED; + op_dtypes[2] = PyArray_DescrFromType(NPY_BOOL); + if (op_dtypes[2] == NULL) { + goto fail; + } + op_flags[2] = NPY_ITER_READONLY | + NPY_ITER_ALIGNED; + op[3] = (PyArrayObject *)PyArray_FromScalar(identity, operand_dtype); + op_dtypes[3] = operand_dtype; + op_flags[3] = NPY_ITER_READONLY | + NPY_ITER_ALIGNED; } - iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, flags, + iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 4, op, flags, NPY_KEEPORDER, casting, op_flags, op_dtypes, diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 7f1709a051d0..233cad3eb849 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -3457,21 +3457,38 @@ reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr, return 0; } +static void +clear_masked_items(char *dataptr, npy_intp s_data, + char *maskptr, npy_intp s_mask, char *identityptr, + npy_intp count) +{ + int n; + char *data = dataptr, *mask = maskptr; + for (n = 0; n < count; n++, data += s_data, mask += s_mask) { + if (!(*mask)) { + memcpy(data, identityptr, s_data); + } + } +} + static int reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides, npy_intp *countptr, NpyIter_IterNextFunc *iternext, int needs_api, npy_intp skip_first_count, void *data) { - PyArray_Descr *dtypes[3], **iter_dtypes; + PyArray_Descr *dtypes[4], **iter_dtypes; PyUFuncObject *ufunc = (PyUFuncObject *)data; - char *dataptrs_copy[3]; - npy_intp strides_copy[3]; + char *dataptrs_copy[4]; + npy_intp strides_copy[4]; + npy_bool where_mask; /* The normal selected inner loop */ PyUFuncGenericFunction innerloop = NULL; void *innerloopdata = NULL; NPY_BEGIN_THREADS_DEF; + /* Get the number of operands, to determine whether "where" is used */ + where_mask = (NpyIter_GetNOp(iter) == 4); /* Get the inner loop */ iter_dtypes = NpyIter_GetDescrArray(iter); @@ -3524,6 +3541,13 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides, } while (iternext(iter)); } do { + if (where_mask) { + printf("strides=%ld,%ld,%ld\n", strides[0], strides[1], + strides[2]); + clear_masked_items(dataptrs[1], strides[1], + dataptrs[2], strides[2], + dataptrs[3], *countptr); + } /* Turn the two items into three for the inner loop */ dataptrs_copy[0] = dataptrs[0]; dataptrs_copy[1] = dataptrs[1]; From 01f3f0dae619fc82d236aa0bc57d0c70301a194a Mon Sep 17 00:00:00 2001 From: Marten van Kerkwijk Date: Tue, 1 Jan 2019 13:21:46 -0500 Subject: [PATCH 5/7] Force iterator to move reducing axis to inner-most with innerloop. Since this changes behaviour for cases where it may not be necessary, this should really involve a new flag! --- numpy/core/src/multiarray/nditer_constr.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index 97187a75ee67..ce5eeb71e2ed 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -2273,6 +2273,7 @@ npyiter_find_best_axis_ordering(NpyIter *iter) npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int iop, nop = NIT_NOP(iter); + npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter); npy_intp ax_i0, ax_i1, ax_ipos; npy_int8 ax_j0, ax_j1; @@ -2304,8 +2305,15 @@ npyiter_find_best_axis_ordering(NpyIter *iter) strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j1)); + /* + * Order strides, but excluding broadcasted ones, except + * if they are from an output that is being reduced and + * an external loop is enabled, since then the external + * loop can deal with masking. + */ for (iop = 0; iop < nop; ++iop) { - if (strides0[iop] != 0 && strides1[iop] != 0) { + if (strides0[iop] != 0 && strides1[iop] != 0) + { if (intp_abs(strides1[iop]) <= intp_abs(strides0[iop])) { /* @@ -2328,6 +2336,14 @@ npyiter_find_best_axis_ordering(NpyIter *iter) */ ambig = 0; } + else if ((itflags & NPY_ITFLAG_REDUCE) && + (itflags & NPY_ITFLAG_EXLOOP) && + (op_itflags[iop] & NPY_OP_ITFLAG_REDUCE) && + (strides0[iop] != 0 || strides1[iop] != 0)) { + shouldswap = (strides0[iop] == 0); + ambig = 0; + break; + } } /* * If the comparison was unambiguous, either shift From 0a75750e0fdd1eea0ea0936188ff9e3b4b219a96 Mon Sep 17 00:00:00 2001 From: Marten van Kerkwijk Date: Tue, 1 Jan 2019 13:55:05 -0500 Subject: [PATCH 6/7] TST: Initial tests for reduction with where argument. --- numpy/core/tests/test_ufunc.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index aeff92bea0d5..d79fd7abb90f 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -3,6 +3,8 @@ import warnings import itertools +import pytest + import numpy as np import numpy.core._umath_tests as umt import numpy.linalg._umath_linalg as uml @@ -1396,6 +1398,24 @@ def test_initial_reduction(self): res = np.add.reduce(a, initial=5) assert_equal(res, 15) + @pytest.mark.parametrize('axis', (0, 1, (0, 1))) + @pytest.mark.parametrize('where', (np.array([True, False, True]), + np.array([[True], [False], [True]]), + np.array([[True, False, True], + [False, True, False], + [False, False, True]]))) + def test_reduction_with_where(self, axis, where): + a = np.arange(9.).reshape(3, 3) + a_copy = a.copy() + a_check = np.zeros_like(a) + np.positive(a, out=a_check, where=where) + + res = np.add.reduce(a, axis=axis, where=where) + check = a_check.sum(axis) + assert_equal(res, check) + # Check we do not overwrite elements of a internally. + assert_array_equal(a, a_copy) + def test_identityless_reduction_nonreorderable(self): a = np.array([[8.0, 2.0, 2.0], [1.0, 0.5, 0.25]]) From 3abdbe36ae2119e8d29ea95375bc4da53a3bdc93 Mon Sep 17 00:00:00 2001 From: Marten van Kerkwijk Date: Tue, 1 Jan 2019 13:56:27 -0500 Subject: [PATCH 7/7] Use guarantee that reduction axis is used by inner loop. --- numpy/core/src/umath/reduction.c | 12 ++++------- numpy/core/src/umath/ufunc_object.c | 32 ++++++++++++++--------------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c index 708efaccb555..9bda1fc0f76f 100644 --- a/numpy/core/src/umath/reduction.c +++ b/numpy/core/src/umath/reduction.c @@ -444,9 +444,9 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, /* Iterator parameters */ NpyIter *iter = NULL; - PyArrayObject *op[4]; - PyArray_Descr *op_dtypes[4]; - npy_uint32 flags, op_flags[4]; + PyArrayObject *op[3]; + PyArray_Descr *op_dtypes[3]; + npy_uint32 flags, op_flags[3]; /* More than one axis means multiple orders are possible */ if (!reorderable && count_axes(PyArray_NDIM(operand), axis_flags) > 1) { @@ -532,13 +532,9 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, } op_flags[2] = NPY_ITER_READONLY | NPY_ITER_ALIGNED; - op[3] = (PyArrayObject *)PyArray_FromScalar(identity, operand_dtype); - op_dtypes[3] = operand_dtype; - op_flags[3] = NPY_ITER_READONLY | - NPY_ITER_ALIGNED; } - iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 4, op, flags, + iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, flags, NPY_KEEPORDER, casting, op_flags, op_dtypes, diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 233cad3eb849..d9d5ec16e5d3 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -3457,18 +3457,19 @@ reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr, return 0; } -static void -clear_masked_items(char *dataptr, npy_intp s_data, - char *maskptr, npy_intp s_mask, char *identityptr, - npy_intp count) +static int +remove_masked_items(char *dataptr, npy_intp s_data, + char *maskptr, npy_intp s_mask, npy_intp count) { int n; - char *data = dataptr, *mask = maskptr; + char *data = dataptr, *okdata = dataptr, *mask = maskptr; for (n = 0; n < count; n++, data += s_data, mask += s_mask) { - if (!(*mask)) { - memcpy(data, identityptr, s_data); + if (*mask) { + memcpy(okdata, data, s_data); + okdata += s_data; } } + return (okdata - dataptr) / s_data; } static int @@ -3478,8 +3479,8 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides, { PyArray_Descr *dtypes[4], **iter_dtypes; PyUFuncObject *ufunc = (PyUFuncObject *)data; - char *dataptrs_copy[4]; - npy_intp strides_copy[4]; + char *dataptrs_copy[3]; + npy_intp strides_copy[3]; npy_bool where_mask; /* The normal selected inner loop */ @@ -3488,7 +3489,7 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides, NPY_BEGIN_THREADS_DEF; /* Get the number of operands, to determine whether "where" is used */ - where_mask = (NpyIter_GetNOp(iter) == 4); + where_mask = (NpyIter_GetNOp(iter) == 3); /* Get the inner loop */ iter_dtypes = NpyIter_GetDescrArray(iter); @@ -3541,12 +3542,11 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides, } while (iternext(iter)); } do { + npy_intp count = *countptr; + if (where_mask) { - printf("strides=%ld,%ld,%ld\n", strides[0], strides[1], - strides[2]); - clear_masked_items(dataptrs[1], strides[1], - dataptrs[2], strides[2], - dataptrs[3], *countptr); + count = remove_masked_items(dataptrs[1], strides[1], + dataptrs[2], strides[2], count); } /* Turn the two items into three for the inner loop */ dataptrs_copy[0] = dataptrs[0]; @@ -3555,7 +3555,7 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides, strides_copy[0] = strides[0]; strides_copy[1] = strides[1]; strides_copy[2] = strides[0]; - innerloop(dataptrs_copy, countptr, + innerloop(dataptrs_copy, &count, strides_copy, innerloopdata); } while (iternext(iter));