diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index 9dbeb76cd0ca..1daadae5e8f9 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -464,6 +464,14 @@ def english_upper(s): TD(noobj), TD(O, f='npy_ObjectMin') ), +'max_abs': + Ufunc(2, 1, Zero, + 'needs a docstring...', + 'PyUFunc_MaxAbsTypeResolver', + TD(ints+flts), + TD(cmplx, out=('f', 'd', 'g')), + TD(O, f='npy_ObjectMaxAbs') + ), 'fmax': Ufunc(2, 1, ReorderableNone, docstrings.get('numpy.core.umath.fmax'), diff --git a/numpy/core/src/umath/funcs.inc.src b/numpy/core/src/umath/funcs.inc.src index 3aad44c9feec..0bd3dfa97f91 100644 --- a/numpy/core/src/umath/funcs.inc.src +++ b/numpy/core/src/umath/funcs.inc.src @@ -80,6 +80,33 @@ npy_Object@Kind@(PyObject *i1, PyObject *i2) } /**end repeat**/ +static PyObject * +npy_ObjectMaxAbs(PyObject *i1, PyObject *i2) +{ + PyObject *absi1, *absi2, *result; + int cmp; + + absi1 = PyNumber_Absolute(i1); + absi2 = PyNumber_Absolute(i2); + + cmp = PyObject_RichCompareBool(absi1, absi2, Py_GE); + if (cmp < 0) { + Py_DECREF(absi1); + Py_DECREF(absi2); + return NULL; + } + if (cmp == 1) { + result = absi1; + } + else { + result = absi2; + } + Py_INCREF(result); + Py_DECREF(absi1); + Py_DECREF(absi2); + return result; +} + /* Emulates Python's 'a or b' behavior */ static PyObject * npy_ObjectLogicalOr(PyObject *i1, PyObject *i2) diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index a69fc814718f..8b45123d7ee3 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -891,6 +891,18 @@ NPY_NO_EXPORT void /**end repeat1**/ +NPY_NO_EXPORT void +@TYPE@_max_abs(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + @type@ in1 = *(@type@ *)ip1; + @type@ in2 = *(@type@ *)ip2; + in1 = (in1 >= 0) ? in1 : -in1; + in2 = (in2 >= 0) ? in2 : -in2; + *((@type@ *)op1) = (in1 > in2) ? in1 : in2; + } +} + NPY_NO_EXPORT void @TYPE@_true_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { @@ -1617,6 +1629,30 @@ NPY_NO_EXPORT void } /**end repeat1**/ +NPY_NO_EXPORT void +@TYPE@_max_abs(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) +{ + if (IS_BINARY_REDUCE) { + if (!run_unary_reduce_simd_max_abs_@TYPE@(args, dimensions, steps)) { + BINARY_REDUCE_LOOP(@type@) { + const @type@ in2 = *(@type@ *)ip2; + const @type@ ain2 = (in2 >= 0) ? in2 : -in2; + io1 = (io1 >= in2 || npy_isnan(io1)) ? io1 : in2; + } + *((@type@ *)iop1) = io1; + } + } + else { + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + const @type@ ain1 = (in1 >= 0) ? in1 : -in1; + const @type@ ain2 = (in2 >= 0) ? in2 : -in2; + *((@type@ *)op1) = (ain1 >= ain2 || npy_isnan(ain1)) ? ain1 : ain2; + } + } +} + /**begin repeat1 * #kind = fmax, fmin# * #OP = >=, <=# @@ -1948,6 +1984,16 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED } /**end repeat**/ +NPY_NO_EXPORT void +HALF_max_abs(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_half in1 = (*(npy_half *)ip1) & 0x7fffu; + const npy_half in2 = (*(npy_half *)ip2) & 0x7fffu; + *((npy_half *)op1) = (npy_half_ge(in1, in2) || npy_half_isnan(in1)) ? in1 : in2; + } +} + /**begin repeat * #kind = fmax, fmin# * #OP = npy_half_ge, npy_half_le# @@ -2496,6 +2542,20 @@ NPY_NO_EXPORT void } /**end repeat1**/ +NPY_NO_EXPORT void +@TYPE@_max_abs(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + const @ftype@ in2r = ((@ftype@ *)ip2)[0]; + const @ftype@ in2i = ((@ftype@ *)ip2)[1]; + const @ftype@ in1 = npy_hypot@c@(in1r, in1i); + const @ftype@ in2 = npy_hypot@c@(in2r, in2i); + *((@ftype@ *)op1) = (in1 >= in2 || npy_isnan(in1)) ? in1 : in2; + } +} + /**begin repeat1 * #kind = fmax, fmin# * #OP = CGE, CLE# diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src index a6e775a3ad15..14afd9e63939 100644 --- a/numpy/core/src/umath/loops.h.src +++ b/numpy/core/src/umath/loops.h.src @@ -110,6 +110,9 @@ NPY_NO_EXPORT void @S@@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); /**end repeat2**/ +NPY_NO_EXPORT void +@S@@TYPE@_max_abs(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + NPY_NO_EXPORT void @S@@TYPE@_true_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -207,6 +210,9 @@ NPY_NO_EXPORT void @TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); /**end repeat1**/ +NPY_NO_EXPORT void +@TYPE@_max_abs(char **args, npy_intp *dimension, npy_intp *steps, void *NPY_UNUSED(func)); + /**begin repeat1 * #kind = fmax, fmin# * #OP = >=, <=# @@ -360,6 +366,9 @@ NPY_NO_EXPORT void C@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); /**end repeat1**/ +NPY_NO_EXPORT void +C@TYPE@_max_abs(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + /**begin repeat1 * #kind = fmax, fmin# * #OP = CGE, CLE# diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 5b111eb0d152..2de8f9da407e 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -107,10 +107,10 @@ */ /**begin repeat1 - * #func = sqrt, absolute, negative, minimum, maximum# - * #check = IS_BLOCKABLE_UNARY*3, IS_BLOCKABLE_REDUCE*2 # - * #name = unary*3, unary_reduce*2# - * #minmax = 0*3, 1*2# + * #func = sqrt, absolute, negative, minimum, maximum, max_abs# + * #check = IS_BLOCKABLE_UNARY*3, IS_BLOCKABLE_REDUCE*3 # + * #name = unary*3, unary_reduce*3# + * #minmax = 0*3, 1*3# */ #if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS @@ -737,6 +737,48 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) } /**end repeat1**/ +static void +sse2_max_abs_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) +{ + const size_t stride = 16 / sizeof(@type@); + @type@ in; + const @vtype@ mask = @vpre@_set1_@vsuf@(-0.@c@); + + LOOP_BLOCK_ALIGN_VAR(ip, @type@, 16) { + in = (ip[i] >= 0) ? ip[i] : -ip[i]; + *op = (*op >= in || npy_isnan(*op)) ? *op : in; + } + assert(n < (stride) || npy_is_aligned(&ip[i], 16)); + if (i + 3 * stride <= n) { + /* load the first elements */ + @vtype@ c1 = @vpre@_andnot_@vsuf@(mask, @vpre@_load_@vsuf@((@type@*)&ip[i])); + @vtype@ c2 = @vpre@_andnot_@vsuf@(mask, @vpre@_load_@vsuf@((@type@*)&ip[i + stride])); + i += 2 * stride; + + /* minps/minpd will set invalid flag if nan is encountered */ + npy_clear_floatstatus(); + LOOP_BLOCKED(@type@, 32) { + @vtype@ v1 = @vpre@_andnot_@vsuf@(mask, @vpre@_load_@vsuf@((@type@*)&ip[i])); + @vtype@ v2 = @vpre@_andnot_@vsuf@(mask, @vpre@_load_@vsuf@((@type@*)&ip[i + stride])); + c1 = @vpre@_max_@vsuf@(c1, v1); + c2 = @vpre@_max_@vsuf@(c2, v2); + } + c1 = @vpre@_max_@vsuf@(c1, c2); + + if (npy_get_floatstatus() & NPY_FPE_INVALID) { + *op = @nan@; + } + else { + @type@ tmp = sse2_horizontal_max_@vtype@(c1); + *op = (*op >= tmp || npy_isnan(*op)) ? *op : tmp + 0; + } + } + LOOP_BLOCKED_END { + in = (ip[i] >= 0) ? ip[i] : -ip[i]; + *op = (*op >= in || npy_isnan(*op)) ? *op : in; + } +} + /**end repeat**/ /* diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c index ec28bb9e428f..36ed547d57b6 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.c +++ b/numpy/core/src/umath/ufunc_type_resolution.c @@ -539,6 +539,33 @@ PyUFunc_AbsoluteTypeResolver(PyUFuncObject *ufunc, } } +/* + * This function applies special type resolution rules for the max_abs + * ufunc. This ufunc converts complex -> float, so isn't covered + * by the simple binary type resolution. This is a binary version of + * the AbsoluteTypeResolver above. + * + * Returns 0 on success, -1 on error. + */ +NPY_NO_EXPORT int +PyUFunc_MaxAbsTypeResolver(PyUFuncObject *ufunc, + NPY_CASTING casting, + PyArrayObject **operands, + PyObject *type_tup, + PyArray_Descr **out_dtypes) +{ + /* Use the default for complex types, to find the loop producing float */ + if (PyTypeNum_ISCOMPLEX(PyArray_DESCR(operands[0])->type_num) || + PyTypeNum_ISCOMPLEX(PyArray_DESCR(operands[1])->type_num)) { + return PyUFunc_DefaultTypeResolver(ufunc, casting, operands, + type_tup, out_dtypes); + } + else { + return PyUFunc_SimpleBinaryOperationTypeResolver(ufunc, casting, + operands, type_tup, out_dtypes); + } +} + /* * Creates a new NPY_TIMEDELTA dtype, copying the datetime metadata * from the given dtype. diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h index a1e28d75b9d0..1adb1d68018a 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.h +++ b/numpy/core/src/umath/ufunc_type_resolution.h @@ -43,6 +43,13 @@ PyUFunc_AbsoluteTypeResolver(PyUFuncObject *ufunc, PyObject *type_tup, PyArray_Descr **out_dtypes); +NPY_NO_EXPORT int +PyUFunc_MaxAbsTypeResolver(PyUFuncObject *ufunc, + NPY_CASTING casting, + PyArrayObject **operands, + PyObject *type_tup, + PyArray_Descr **out_dtypes); + NPY_NO_EXPORT int PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc, NPY_CASTING casting, diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py index e70227e5a44d..70113089648d 100644 --- a/numpy/linalg/linalg.py +++ b/numpy/linalg/linalg.py @@ -23,7 +23,7 @@ csingle, cdouble, inexact, complexfloating, newaxis, ravel, all, Inf, dot, add, multiply, sqrt, maximum, fastCopyAndTranspose, sum, isfinite, size, finfo, errstate, geterrobj, longdouble, rollaxis, amin, amax, product, abs, - broadcast, atleast_2d, intp, asanyarray + broadcast, atleast_2d, intp, asanyarray, max_abs ) from numpy.lib import triu, asfarray from numpy.linalg import lapack_lite, _umath_linalg @@ -2085,7 +2085,8 @@ def norm(x, ord=None, axis=None, keepdims=False): if len(axis) == 1: if ord == Inf: - return abs(x).max(axis=axis, keepdims=keepdims) + return max_abs.reduce(x, axis=axis, keepdims=keepdims) + #return abs(x).max(axis=axis, keepdims=keepdims) elif ord == -Inf: return abs(x).min(axis=axis, keepdims=keepdims) elif ord == 0: