numpy · ewmoore · Jan 6, 2015 · juliantaylor · Jan 27, 2015
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
@@ -464,6 +464,14 @@ def english_upper(s):
           TD(noobj),
           TD(O, f='npy_ObjectMin')
           ),
+'max_abs':
+    Ufunc(2, 1, Zero,
+          'needs a docstring...',
+          'PyUFunc_MaxAbsTypeResolver',
+          TD(ints+flts),
+          TD(cmplx, out=('f', 'd', 'g')),
+          TD(O, f='npy_ObjectMaxAbs')
+          ),
 'fmax':
     Ufunc(2, 1, ReorderableNone,
           docstrings.get('numpy.core.umath.fmax'),

diff --git a/numpy/core/src/umath/funcs.inc.src b/numpy/core/src/umath/funcs.inc.src
@@ -80,6 +80,33 @@ npy_Object@Kind@(PyObject *i1, PyObject *i2)
 }
 /**end repeat**/
 
+static PyObject *
+npy_ObjectMaxAbs(PyObject *i1, PyObject *i2)
+{
+    PyObject *absi1, *absi2, *result;
+    int cmp;
+
+    absi1 = PyNumber_Absolute(i1);
+    absi2 = PyNumber_Absolute(i2);
+
+   cmp = PyObject_RichCompareBool(absi1, absi2, Py_GE);
+   if (cmp < 0) {
+        Py_DECREF(absi1);
+        Py_DECREF(absi2);
+        return NULL;
+    }
+    if (cmp == 1) {
+        result = absi1;
+    }
+    else {
+        result = absi2;
+    }
+    Py_INCREF(result);
+    Py_DECREF(absi1);
+    Py_DECREF(absi2);
+    return result;
+}
+
 /* Emulates Python's 'a or b' behavior */
 static PyObject *
 npy_ObjectLogicalOr(PyObject *i1, PyObject *i2)

diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
@@ -891,6 +891,18 @@ NPY_NO_EXPORT void
 
 /**end repeat1**/
 
+NPY_NO_EXPORT void
+@TYPE@_max_abs(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    BINARY_LOOP {
+        @type@ in1 = *(@type@ *)ip1;
+        @type@ in2 = *(@type@ *)ip2;
+        in1 = (in1 >= 0) ? in1 : -in1;
+        in2 = (in2 >= 0) ? in2 : -in2;
+        *((@type@ *)op1) = (in1 > in2) ? in1 : in2;
+    }
+}
+
 NPY_NO_EXPORT void
 @TYPE@_true_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
 {
@@ -1617,6 +1629,30 @@ NPY_NO_EXPORT void
 }
 /**end repeat1**/
 
+NPY_NO_EXPORT void
+@TYPE@_max_abs(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func))
+{
+    if (IS_BINARY_REDUCE) {
+        if (!run_unary_reduce_simd_max_abs_@TYPE@(args, dimensions, steps)) {
+            BINARY_REDUCE_LOOP(@type@) {
+                const @type@ in2 = *(@type@ *)ip2;
+                const @type@ ain2 = (in2 >= 0) ? in2 : -in2;
+                io1 = (io1 >= in2 || npy_isnan(io1)) ? io1 : in2;
+            }
+            *((@type@ *)iop1) = io1;
+        }
+    }
+    else {
+        BINARY_LOOP {
+            const @type@ in1 = *(@type@ *)ip1;
+            const @type@ in2 = *(@type@ *)ip2;
+            const @type@ ain1 = (in1 >= 0) ? in1 : -in1;
+            const @type@ ain2 = (in2 >= 0) ? in2 : -in2;
+            *((@type@ *)op1) = (ain1 >= ain2 || npy_isnan(ain1)) ? ain1 : ain2;
+         }
+    }
+}
+
 /**begin repeat1
  * #kind = fmax, fmin#
  * #OP =  >=, <=#
@@ -1948,6 +1984,16 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
 }
 /**end repeat**/
 
+NPY_NO_EXPORT void
+HALF_max_abs(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    BINARY_LOOP {
+        const npy_half in1 = (*(npy_half *)ip1) & 0x7fffu;
+        const npy_half in2 = (*(npy_half *)ip2) & 0x7fffu;
+        *((npy_half *)op1) = (npy_half_ge(in1, in2) || npy_half_isnan(in1)) ? in1 : in2;
+     }
+}
+
 /**begin repeat
  * #kind = fmax, fmin#
  * #OP =  npy_half_ge, npy_half_le#
@@ -2496,6 +2542,20 @@ NPY_NO_EXPORT void
 }
 /**end repeat1**/
 
+NPY_NO_EXPORT void
+@TYPE@_max_abs(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func))
+{
+    BINARY_LOOP {
+        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
+        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
+        const @ftype@ in2r = ((@ftype@ *)ip2)[0];
+        const @ftype@ in2i = ((@ftype@ *)ip2)[1];
+        const @ftype@ in1 = npy_hypot@c@(in1r, in1i);
+        const @ftype@ in2 = npy_hypot@c@(in2r, in2i);
+        *((@ftype@ *)op1) = (in1 >= in2 || npy_isnan(in1)) ? in1 : in2;
+    }
+}
+
 /**begin repeat1
  * #kind = fmax, fmin#
  * #OP = CGE, CLE#

diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
@@ -110,6 +110,9 @@ NPY_NO_EXPORT void
 @S@@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
 /**end repeat2**/
 
+NPY_NO_EXPORT void
+@S@@TYPE@_max_abs(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
+
 NPY_NO_EXPORT void
 @S@@TYPE@_true_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
 
@@ -207,6 +210,9 @@ NPY_NO_EXPORT void
 @TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
 /**end repeat1**/
 
+NPY_NO_EXPORT void
+@TYPE@_max_abs(char **args, npy_intp *dimension, npy_intp *steps, void *NPY_UNUSED(func));
+
 /**begin repeat1
  * #kind = fmax, fmin#
  * #OP =  >=, <=#
@@ -360,6 +366,9 @@ NPY_NO_EXPORT void
 C@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
 /**end repeat1**/
 
+NPY_NO_EXPORT void
+C@TYPE@_max_abs(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
+
 /**begin repeat1
  * #kind = fmax, fmin#
  * #OP = CGE, CLE#

diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
@@ -107,10 +107,10 @@
  */
 
 /**begin repeat1
- * #func = sqrt, absolute, negative, minimum, maximum#
- * #check = IS_BLOCKABLE_UNARY*3, IS_BLOCKABLE_REDUCE*2 #
- * #name = unary*3, unary_reduce*2#
- * #minmax = 0*3, 1*2#
+ * #func = sqrt, absolute, negative, minimum, maximum, max_abs#
+ * #check = IS_BLOCKABLE_UNARY*3, IS_BLOCKABLE_REDUCE*3 #
+ * #name = unary*3, unary_reduce*3#
+ * #minmax = 0*3, 1*3#
  */
 
 #if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
@@ -737,6 +737,48 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
 }
 /**end repeat1**/
 
+static void
+sse2_max_abs_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
+{
+    const size_t stride = 16 / sizeof(@type@);
+    @type@ in;
+    const @vtype@ mask = @vpre@_set1_@vsuf@(-0.@c@);
+
+    LOOP_BLOCK_ALIGN_VAR(ip, @type@, 16) {
+        in = (ip[i] >= 0) ? ip[i] : -ip[i];
+        *op = (*op >= in || npy_isnan(*op)) ? *op : in;
+    }
+    assert(n < (stride) || npy_is_aligned(&ip[i], 16));
+    if (i + 3 * stride <= n) {
+        /* load the first elements */
+        @vtype@ c1 = @vpre@_andnot_@vsuf@(mask, @vpre@_load_@vsuf@((@type@*)&ip[i]));
+        @vtype@ c2 = @vpre@_andnot_@vsuf@(mask, @vpre@_load_@vsuf@((@type@*)&ip[i + stride]));
+        i += 2 * stride;
+
+        /* minps/minpd will set invalid flag if nan is encountered */
+        npy_clear_floatstatus();
+        LOOP_BLOCKED(@type@, 32) {
+            @vtype@ v1 = @vpre@_andnot_@vsuf@(mask, @vpre@_load_@vsuf@((@type@*)&ip[i]));
+            @vtype@ v2 = @vpre@_andnot_@vsuf@(mask, @vpre@_load_@vsuf@((@type@*)&ip[i + stride]));
+            c1 = @vpre@_max_@vsuf@(c1, v1);
+            c2 = @vpre@_max_@vsuf@(c2, v2);
+        }
+        c1 = @vpre@_max_@vsuf@(c1, c2);
+
+        if (npy_get_floatstatus() & NPY_FPE_INVALID) {
+            *op = @nan@;
+        }
+        else {
+            @type@ tmp = sse2_horizontal_max_@vtype@(c1);
+            *op  = (*op >= tmp || npy_isnan(*op)) ? *op : tmp + 0;
+        }
+    }
+    LOOP_BLOCKED_END {
+        in = (ip[i] >= 0) ? ip[i] : -ip[i];
+        *op  = (*op >= in || npy_isnan(*op)) ? *op : in;
+    }
+}
+
 /**end repeat**/
 
 /*

diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -539,6 +539,33 @@ PyUFunc_AbsoluteTypeResolver(PyUFuncObject *ufunc,
     }
 }
 
+/*
+ * This function applies special type resolution rules for the max_abs
+ * ufunc. This ufunc converts complex -> float, so isn't covered
+ * by the simple binary type resolution. This is a binary version of 
+ * the AbsoluteTypeResolver above.
+ *
+ * Returns 0 on success, -1 on error.
+ */
+NPY_NO_EXPORT int
+PyUFunc_MaxAbsTypeResolver(PyUFuncObject *ufunc,
+                                NPY_CASTING casting,
+                                PyArrayObject **operands,
+                                PyObject *type_tup,
+                                PyArray_Descr **out_dtypes)
+{
+    /* Use the default for complex types, to find the loop producing float */
+    if (PyTypeNum_ISCOMPLEX(PyArray_DESCR(operands[0])->type_num) ||
+        PyTypeNum_ISCOMPLEX(PyArray_DESCR(operands[1])->type_num)) {
+        return PyUFunc_DefaultTypeResolver(ufunc, casting, operands,
+                    type_tup, out_dtypes);
+    }
+    else {
+        return PyUFunc_SimpleBinaryOperationTypeResolver(ufunc, casting,
+                    operands, type_tup, out_dtypes);
+    }
+}
+
 /*
  * Creates a new NPY_TIMEDELTA dtype, copying the datetime metadata
  * from the given dtype.

diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -43,6 +43,13 @@ PyUFunc_AbsoluteTypeResolver(PyUFuncObject *ufunc,
                                 PyObject *type_tup,
                                 PyArray_Descr **out_dtypes);
 
+NPY_NO_EXPORT int
+PyUFunc_MaxAbsTypeResolver(PyUFuncObject *ufunc,
+                                NPY_CASTING casting,
+                                PyArrayObject **operands,
+                                PyObject *type_tup,
+                                PyArray_Descr **out_dtypes);
+
 NPY_NO_EXPORT int
 PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,

diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py
@@ -23,7 +23,7 @@
     csingle, cdouble, inexact, complexfloating, newaxis, ravel, all, Inf, dot,
     add, multiply, sqrt, maximum, fastCopyAndTranspose, sum, isfinite, size,
     finfo, errstate, geterrobj, longdouble, rollaxis, amin, amax, product, abs,
-    broadcast, atleast_2d, intp, asanyarray
+    broadcast, atleast_2d, intp, asanyarray, max_abs
     )
 from numpy.lib import triu, asfarray
 from numpy.linalg import lapack_lite, _umath_linalg
@@ -2085,7 +2085,8 @@ def norm(x, ord=None, axis=None, keepdims=False):
 
     if len(axis) == 1:
         if ord == Inf:
-            return abs(x).max(axis=axis, keepdims=keepdims)
+            return max_abs.reduce(x, axis=axis, keepdims=keepdims)
+            #return abs(x).max(axis=axis, keepdims=keepdims)
         elif ord == -Inf:
             return abs(x).min(axis=axis, keepdims=keepdims)
         elif ord == 0: