From 0c493feeaf201c7c5b4a4d55233bb3e6bb2432c4 Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Mon, 23 Jan 2017 18:09:55 -0500
Subject: [PATCH 01/26] init commit

---
 numpy/core/setup.py                        |   3 +-
 numpy/core/src/umath/logical_gufuncs.c.src | 253 +++++++++++++++++++++
 numpy/core/src/umath/umathmodule.c         |   2 +
 3 files changed, 257 insertions(+), 1 deletion(-)
 create mode 100644 numpy/core/src/umath/logical_gufuncs.c.src

diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 89f65f41a550..9892b3b814ac 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -899,7 +899,8 @@ def generate_umath_c(ext, build_dir):
             join('src', 'umath', 'ufunc_object.c'),
             join('src', 'umath', 'scalarmath.c.src'),
             join('src', 'umath', 'ufunc_type_resolution.c'),
-            join('src', 'private', 'mem_overlap.c')]
+            join('src', 'private', 'mem_overlap.c'),
+            join('src', 'umath', 'logical_gufuncs.c.src')]
 
     umath_deps = [
             generate_umath_py,
diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
new file mode 100644
index 000000000000..3ccc13aee7c3
--- /dev/null
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -0,0 +1,253 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#include "Python.h"
+#include "math.h"
+#include "numpy/ndarraytypes.h"
+#include "numpy/ufuncobject.h"
+#include "numpy/npy_3kcompat.h"
+
+
+#define all_INNER_LOOP(OP, TYPE)                \
+    *((npy_bool *)c_n) = NPY_TRUE;              \
+    for (i = 0; i < I; i++){                    \
+        a = *(TYPE *)a_i;                       \
+        b = *(TYPE *)b_i;                       \
+        if (a OP b){                            \
+            a_i += a_I;                         \
+            b_i += b_I;                         \
+        } else {                                \
+            *((npy_bool *)c_n) = NPY_FALSE;     \
+            break;                              \
+        }                                       \
+    }
+
+#define any_INNER_LOOP(OP, TYPE)                \
+    *((npy_bool *)c_n) = NPY_FALSE;             \
+    for (i = 0; i < I; i++){                    \
+        a = *(TYPE *)a_i;                       \
+        b = *(TYPE *)b_i;                       \
+        if (a OP b){                            \
+            *((npy_bool *)c_n) = NPY_TRUE;      \
+            break;                              \
+        } else {                                \
+            a_i += a_I;                         \
+            b_i += b_I;                         \
+        }                                       \
+    }
+
+/**begin repeat
+* #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble#
+*/
+
+/**begin repeat1
+* #OP = ==,!=,<,<=,>,>=#
+* #OPNAME = eq,ne,lt,le,gt,ge#
+*/
+
+/**begin repeat2
+* #ALL_OR_ANY = all,any#
+*/
+
+void @TYPE@_@ALL_OR_ANY@_@OPNAME@(char **args, npy_intp *dimensions,
+                                  npy_intp* steps, void* data)
+{
+    npy_intp i, n;
+    npy_intp N = dimensions[0], I = dimensions[1];
+    char *a_n = args[0], *b_n = args[1], *c_n = args[2];
+    npy_intp a_N = steps[0], b_N = steps[1], c_N = steps[2], a_I = steps[3], b_I = steps[4];
+
+    char *a_i, *b_i;
+    npy_@TYPE@ a, b;
+
+    for (n = 0; n < N; n++) {
+        a_i = a_n;
+        b_i = b_n;
+
+        @ALL_OR_ANY@_INNER_LOOP(@OP@, npy_@TYPE@)
+
+        a_n += a_N;
+        b_n += b_N;
+        c_n += c_N;
+    }
+}
+
+/**end repeat2**/
+/**end repeat1**/
+/**end repeat**/
+
+/* -------------------------------------------------------------------------- */
+/* create arrays of type specific functions for each gufunc  */
+
+#define FUNC_ARRAY_NAME(NAME) NAME ## _funcs
+
+#define GUFUNC_FUNC_ARRAY(NAME)                         \
+    static PyUFuncGenericFunction                       \
+    FUNC_ARRAY_NAME(NAME)[] = {                         \
+        byte_ ## NAME,                                  \
+        ubyte_ ## NAME,                                 \
+        short_ ## NAME,                                 \
+        ushort_ ## NAME,                                \
+        int_ ## NAME,                                   \
+        uint_ ## NAME,                                  \
+        long_ ## NAME,                                  \
+        ulong_ ## NAME,                                 \
+        longlong_ ## NAME,                              \
+        ulonglong_ ## NAME,                             \
+        float_ ## NAME,                                 \
+        double_ ## NAME,                                \
+        longdouble_ ## NAME,                            \
+    }
+
+
+GUFUNC_FUNC_ARRAY(all_eq);
+GUFUNC_FUNC_ARRAY(all_ne);
+GUFUNC_FUNC_ARRAY(all_lt);
+GUFUNC_FUNC_ARRAY(all_le);
+GUFUNC_FUNC_ARRAY(all_gt);
+GUFUNC_FUNC_ARRAY(all_ge);
+GUFUNC_FUNC_ARRAY(any_eq);
+GUFUNC_FUNC_ARRAY(any_ne);
+GUFUNC_FUNC_ARRAY(any_lt);
+GUFUNC_FUNC_ARRAY(any_le);
+GUFUNC_FUNC_ARRAY(any_gt);
+GUFUNC_FUNC_ARRAY(any_ge);
+
+
+/* -------------------------------------------------------------------------- */
+/* Create type arrays for each gufunc, which are all identical*/
+static char types[39] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
+                         NPY_UBYTE, NPY_UBYTE, NPY_BOOL,
+                         NPY_SHORT, NPY_SHORT, NPY_BOOL,
+                         NPY_USHORT, NPY_USHORT, NPY_BOOL,
+                         NPY_INT, NPY_INT, NPY_BOOL,
+                         NPY_UINT, NPY_UINT, NPY_BOOL,
+                         NPY_LONG, NPY_LONG, NPY_BOOL,
+                         NPY_ULONG, NPY_ULONG, NPY_BOOL,
+                         NPY_LONGLONG, NPY_LONGLONG, NPY_BOOL,
+                         NPY_ULONGLONG, NPY_ULONGLONG, NPY_BOOL,
+                         NPY_FLOAT, NPY_FLOAT, NPY_BOOL,
+                         NPY_DOUBLE, NPY_DOUBLE, NPY_BOOL,
+                         NPY_LONGDOUBLE, NPY_LONGDOUBLE, NPY_BOOL};
+
+
+/* -------------------------------------------------------------------------- */
+/* define name and docstring for each gufunc*/
+
+typedef struct gufunc_descriptor_struct {
+    char *name;
+    char *doc;
+    PyUFuncGenericFunction *funcs;
+} GUFUNC_DESCRIPTOR_t;
+
+GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = {
+    {
+        "all_equal",
+        "all equal docstring",
+        FUNC_ARRAY_NAME(all_eq)
+    },
+    {
+        "all_not_equal",
+        "all not equal docstring",
+        FUNC_ARRAY_NAME(all_ne)
+    },
+    {
+        "all_less",
+        "all less than docstring",
+        FUNC_ARRAY_NAME(all_lt)
+    },
+    {
+        "all_less_equal",
+        "all less than or equal docstring",
+        FUNC_ARRAY_NAME(all_le)
+    },
+    {
+        "all_greater",
+        "all greater than docstring",
+        FUNC_ARRAY_NAME(all_gt)
+    },
+    {
+        "all_greater_equal",
+        "all greater than or equal docstring",
+        FUNC_ARRAY_NAME(all_ge)
+    },
+    {
+        "any_equal",
+        "any equal docstring",
+        FUNC_ARRAY_NAME(any_eq)
+    },
+    {
+        "any_not_equal",
+        "any not equal docstring",
+        FUNC_ARRAY_NAME(any_ne)
+    },
+    {
+        "any_less",
+        "any less than docstring",
+        FUNC_ARRAY_NAME(any_lt)
+    },
+    {
+        "any_less_equal",
+        "any less than or equal docstring",
+        FUNC_ARRAY_NAME(any_le)
+    },
+    {
+        "any_greater",
+        "any greater than docstring",
+        FUNC_ARRAY_NAME(any_gt)
+    },
+    {
+        "any_greater_equal",
+        "any greater than or equal docstring",
+        FUNC_ARRAY_NAME(any_ge)
+    }
+};
+
+
+/* -------------------------------------------------------------------------- */
+/* create array of nulls for "data" for each gufunc type*/
+static void *array_of_nulls[] = {
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL
+};
+
+
+/* -------------------------------------------------------------------------- */
+/* function to register all gufuncs*/
+void InitLogicalGufuncs(PyObject *dictionary) {
+    PyObject *f;
+    int i;
+    const int gufunc_count = sizeof(gufunc_descriptors)/
+        sizeof(gufunc_descriptors[0]);
+    for (i=0; i < gufunc_count; i++) {
+        GUFUNC_DESCRIPTOR_t* d = &gufunc_descriptors[i];
+        f = PyUFunc_FromFuncAndDataAndSignature(d->funcs,
+                                                array_of_nulls,
+                                                types,
+                                                13,             // number of types
+                                                2,              // number of inputs
+                                                1,              // number of outputs
+                                                PyUFunc_None,
+                                                d->name,
+                                                d->doc,
+                                                0,              // unused
+                                                "(i),(i)->()");
+        PyDict_SetItemString(dictionary, d->name, f);
+        Py_DECREF(f);
+    }
+}
diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c
index 45accb970787..697b410ef0b4 100644
--- a/numpy/core/src/umath/umathmodule.c
+++ b/numpy/core/src/umath/umathmodule.c
@@ -41,6 +41,7 @@
 #include "ufunc_type_resolution.h"
 #include "__umath_generated.c"
 #include "__ufunc_api.c"
+#include "logical_gufuncs.c"
 
 NPY_NO_EXPORT int initscalarmath(PyObject *);
 
@@ -365,6 +366,7 @@ PyMODINIT_FUNC initumath(void)
 
     /* Load the ufunc operators into the array module's namespace */
     InitOperators(d);
+    InitLogicalGufuncs(d);
 
     PyDict_SetItemString(d, "pi", s = PyFloat_FromDouble(NPY_PI));
     Py_DECREF(s);

From 61a9044ad7ef7407d0abb05a0a26a2e1fe103cd6 Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Mon, 30 Jan 2017 13:30:43 +0100
Subject: [PATCH 02/26] temporary build fix

---
 numpy/core/src/umath/logical_gufuncs.c.src | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index 3ccc13aee7c3..93df4e3ba8d4 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -47,7 +47,7 @@
 * #ALL_OR_ANY = all,any#
 */
 
-void @TYPE@_@ALL_OR_ANY@_@OPNAME@(char **args, npy_intp *dimensions,
+static void @TYPE@_@ALL_OR_ANY@_@OPNAME@(char **args, npy_intp *dimensions,
                                   npy_intp* steps, void* data)
 {
     npy_intp i, n;
@@ -138,7 +138,7 @@ typedef struct gufunc_descriptor_struct {
     PyUFuncGenericFunction *funcs;
 } GUFUNC_DESCRIPTOR_t;
 
-GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = {
+static GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = {
     {
         "all_equal",
         "all equal docstring",
@@ -229,7 +229,7 @@ static void *array_of_nulls[] = {
 
 /* -------------------------------------------------------------------------- */
 /* function to register all gufuncs*/
-void InitLogicalGufuncs(PyObject *dictionary) {
+static void InitLogicalGufuncs(PyObject *dictionary) {
     PyObject *f;
     int i;
     const int gufunc_count = sizeof(gufunc_descriptors)/

From 220235f24638978f0432e3be203d9bd3d9206b99 Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Wed, 1 Feb 2017 12:51:59 -0500
Subject: [PATCH 03/26] added all_equal docstring

---
 numpy/core/src/umath/logical_gufuncs.c.src | 26 +++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index 93df4e3ba8d4..109fffbaea38 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -141,7 +141,31 @@ typedef struct gufunc_descriptor_struct {
 static GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = {
     {
         "all_equal",
-        "all equal docstring",
+        "Return True if x1 == x2 for all elements along the last axis, False\n"
+        "otherwise.  Similar to (x1==x2).all(axis=-1), except the last dimension\n"
+        "of x1 and x2 must be equal and greater than 1.  This function short\n"
+        "circuits after the first nonequal element."
+        "\n"
+        "Parameters\n"
+        "----------\n"
+        "x1, x2 : array_like\n"
+        "    Input arrays of the same shape.\n"
+        "\n"
+        "Returns\n"
+        "-------\n"
+        "out : ndarray or bool\n"
+        "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+        "\n"
+        "See Also\n"
+        "-------\n"
+        "equal, all, any_equal, all_not_equal\n"
+        "\n"
+        "Examples\n"
+        "-------\n"
+        ">>> np.all_equal(np.arange(3), np.arange(3))\n"
+        "True\n"
+        ">>> np.all_equal([[1, 2], [0, 0], [1, 2]], [1, 2])\n"
+        "array([ True, False,  True], dtype=bool)",
         FUNC_ARRAY_NAME(all_eq)
     },
     {

From 3161244969fa137da331e6f2c291332f8da5870b Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Wed, 1 Feb 2017 16:23:19 -0500
Subject: [PATCH 04/26] TEST added logical_gufunc test

---
 numpy/core/tests/test_logical_gufuncs.py | 40 ++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 numpy/core/tests/test_logical_gufuncs.py

diff --git a/numpy/core/tests/test_logical_gufuncs.py b/numpy/core/tests/test_logical_gufuncs.py
new file mode 100644
index 000000000000..c8cb33bb62d5
--- /dev/null
+++ b/numpy/core/tests/test_logical_gufuncs.py
@@ -0,0 +1,40 @@
+from __future__ import division, absolute_import, print_function
+
+import numpy as np
+from numpy.testing import (
+    run_module_suite, assert_equal
+)
+
+dtypes = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32, np.int64, 
+          np.uint64, np.float32, np.float64]
+
+# helper function
+def check(f, x1, x2, dtype, expected):
+    result = f(x1.astype(dtype), x2.astype(dtype))
+    assert_equal(result, expected)
+
+
+def test_generator():
+    inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
+    for i in range(inputs.shape[0]):
+        for j in range(inputs.shape[0]):
+            x1 = inputs[i, :]
+            x2 = inputs[j, :]
+
+            for dtype in dtypes:
+
+                yield check, np.all_equal, x1, x2, dtype, (x1==x2).all()
+                yield check, np.any_equal, x1, x2, dtype, (x1==x2).any()
+                yield check, np.all_not_equal, x1, x2, dtype, (x1!=x2).all()
+                yield check, np.any_not_equal, x1, x2, dtype, (x1!=x2).any()
+                yield check, np.all_greater, x1, x2, dtype, (x1>x2).all()
+                yield check, np.any_greater, x1, x2, dtype, (x1>x2).any()
+                yield check, np.all_greater_equal, x1, x2, dtype, (x1>=x2).all()
+                yield check, np.any_greater_equal, x1, x2, dtype, (x1>=x2).any()
+                yield check, np.all_less, x1, x2, dtype, (x1<x2).all()
+                yield check, np.any_less, x1, x2, dtype, (x1<x2).any()
+                yield check, np.all_less_equal, x1, x2, dtype, (x1<=x2).all()
+                yield check, np.any_less_equal, x1, x2, dtype, (x1<=x2).any()
+
+if __name__ == "__main__":
+    run_module_suite()

From a1dccaedd90a4fc186f2eb274429bda834dc7c44 Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Fri, 3 Feb 2017 15:13:46 -0500
Subject: [PATCH 05/26] added complex types

---
 numpy/core/src/umath/logical_gufuncs.c.src | 38 +++++++++---
 numpy/core/tests/test_logical_gufuncs.py   | 70 +++++++++++++---------
 2 files changed, 73 insertions(+), 35 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index 109fffbaea38..c5adeecdd9b7 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -6,12 +6,23 @@
 #include "numpy/npy_3kcompat.h"
 
 
+// define the basic real version similar to the macro for complex numbers
+#define REQ(a,b) (a==b)
+#define RNE(a,b) (a!=b)
+#define RLT(a,b) (a<b)
+#define RLE(a,b) (a<=b)
+#define RGT(a,b) (a>b)
+#define RGE(a,b) (a>=b)
+
+/* -------------------------------------------------------------------------- */
+/* define the inner loops  */
+
 #define all_INNER_LOOP(OP, TYPE)                \
     *((npy_bool *)c_n) = NPY_TRUE;              \
     for (i = 0; i < I; i++){                    \
         a = *(TYPE *)a_i;                       \
         b = *(TYPE *)b_i;                       \
-        if (a OP b){                            \
+        if (OP(a,b)){                           \
             a_i += a_I;                         \
             b_i += b_I;                         \
         } else {                                \
@@ -25,7 +36,7 @@
     for (i = 0; i < I; i++){                    \
         a = *(TYPE *)a_i;                       \
         b = *(TYPE *)b_i;                       \
-        if (a OP b){                            \
+        if (OP(a,b)){                           \
             *((npy_bool *)c_n) = NPY_TRUE;      \
             break;                              \
         } else {                                \
@@ -34,12 +45,16 @@
         }                                       \
     }
 
+/* -------------------------------------------------------------------------- */
+/* create the family of functions using a template  */
+
 /**begin repeat
-* #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble#
+* #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble#
+* #OP_PREFIX = R,R,R,R,R,R,R,R,R,R,R,R,R,PyArray_C,PyArray_C,PyArray_C#
 */
 
 /**begin repeat1
-* #OP = ==,!=,<,<=,>,>=#
+* #OP = EQ,NE,LT,LE,GT,GE#
 * #OPNAME = eq,ne,lt,le,gt,ge#
 */
 
@@ -62,7 +77,7 @@ static void @TYPE@_@ALL_OR_ANY@_@OPNAME@(char **args, npy_intp *dimensions,
         a_i = a_n;
         b_i = b_n;
 
-        @ALL_OR_ANY@_INNER_LOOP(@OP@, npy_@TYPE@)
+        @ALL_OR_ANY@_INNER_LOOP(@OP_PREFIX@@OP@, npy_@TYPE@)
 
         a_n += a_N;
         b_n += b_N;
@@ -74,6 +89,7 @@ static void @TYPE@_@ALL_OR_ANY@_@OPNAME@(char **args, npy_intp *dimensions,
 /**end repeat1**/
 /**end repeat**/
 
+
 /* -------------------------------------------------------------------------- */
 /* create arrays of type specific functions for each gufunc  */
 
@@ -95,6 +111,9 @@ static void @TYPE@_@ALL_OR_ANY@_@OPNAME@(char **args, npy_intp *dimensions,
         float_ ## NAME,                                 \
         double_ ## NAME,                                \
         longdouble_ ## NAME,                            \
+        cfloat_ ## NAME,                                \
+        cdouble_ ## NAME,                               \
+        clongdouble_ ## NAME,                           \
     }
 
 
@@ -114,7 +133,7 @@ GUFUNC_FUNC_ARRAY(any_ge);
 
 /* -------------------------------------------------------------------------- */
 /* Create type arrays for each gufunc, which are all identical*/
-static char types[39] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
+static char types[48] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
                          NPY_UBYTE, NPY_UBYTE, NPY_BOOL,
                          NPY_SHORT, NPY_SHORT, NPY_BOOL,
                          NPY_USHORT, NPY_USHORT, NPY_BOOL,
@@ -126,7 +145,10 @@ static char types[39] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
                          NPY_ULONGLONG, NPY_ULONGLONG, NPY_BOOL,
                          NPY_FLOAT, NPY_FLOAT, NPY_BOOL,
                          NPY_DOUBLE, NPY_DOUBLE, NPY_BOOL,
-                         NPY_LONGDOUBLE, NPY_LONGDOUBLE, NPY_BOOL};
+                         NPY_LONGDOUBLE, NPY_LONGDOUBLE, NPY_BOOL,
+                         NPY_CFLOAT, NPY_CFLOAT, NPY_BOOL,
+                         NPY_CDOUBLE, NPY_CDOUBLE, NPY_BOOL,
+                         NPY_CLONGDOUBLE, NPY_CLONGDOUBLE, NPY_BOOL};
 
 
 /* -------------------------------------------------------------------------- */
@@ -263,7 +285,7 @@ static void InitLogicalGufuncs(PyObject *dictionary) {
         f = PyUFunc_FromFuncAndDataAndSignature(d->funcs,
                                                 array_of_nulls,
                                                 types,
-                                                13,             // number of types
+                                                16,             // number of types
                                                 2,              // number of inputs
                                                 1,              // number of outputs
                                                 PyUFunc_None,
diff --git a/numpy/core/tests/test_logical_gufuncs.py b/numpy/core/tests/test_logical_gufuncs.py
index c8cb33bb62d5..c5b5a944cc39 100644
--- a/numpy/core/tests/test_logical_gufuncs.py
+++ b/numpy/core/tests/test_logical_gufuncs.py
@@ -5,36 +5,52 @@
     run_module_suite, assert_equal
 )
 
-dtypes = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32, np.int64, 
-          np.uint64, np.float32, np.float64]
-
-# helper function
-def check(f, x1, x2, dtype, expected):
-    result = f(x1.astype(dtype), x2.astype(dtype))
-    assert_equal(result, expected)
-
-
-def test_generator():
+float_types = [np.float32, np.float64, np.longdouble]
+complex_types = [np.cfloat, np.cdouble, np.clongdouble]
+int_types = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32, np.int64, 
+          np.uint64, np.longlong, np.ulonglong]
+
+# helper functions
+def check(f, x1, x2, expected):
+    assert_equal(f(x1, x2), expected)
+
+
+def check_all(x1, x2):
+    yield check, np.all_equal,          x1, x2, (x1==x2).all()
+    yield check, np.any_equal,          x1, x2, (x1==x2).any()
+    yield check, np.all_not_equal,      x1, x2, (x1!=x2).all()
+    yield check, np.any_not_equal,      x1, x2, (x1!=x2).any()
+    yield check, np.all_greater,        x1, x2, (x1>x2).all()
+    yield check, np.any_greater,        x1, x2, (x1>x2).any()
+    yield check, np.all_greater_equal,  x1, x2, (x1>=x2).all()
+    yield check, np.any_greater_equal,  x1, x2, (x1>=x2).any()
+    yield check, np.all_less,           x1, x2, (x1<x2).all()
+    yield check, np.any_less,           x1, x2, (x1<x2).any()
+    yield check, np.all_less_equal,     x1, x2, (x1<=x2).all()
+    yield check, np.any_less_equal,     x1, x2, (x1<=x2).any()
+
+
+def test_real():
     inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
     for i in range(inputs.shape[0]):
         for j in range(inputs.shape[0]):
-            x1 = inputs[i, :]
-            x2 = inputs[j, :]
-
-            for dtype in dtypes:
-
-                yield check, np.all_equal, x1, x2, dtype, (x1==x2).all()
-                yield check, np.any_equal, x1, x2, dtype, (x1==x2).any()
-                yield check, np.all_not_equal, x1, x2, dtype, (x1!=x2).all()
-                yield check, np.any_not_equal, x1, x2, dtype, (x1!=x2).any()
-                yield check, np.all_greater, x1, x2, dtype, (x1>x2).all()
-                yield check, np.any_greater, x1, x2, dtype, (x1>x2).any()
-                yield check, np.all_greater_equal, x1, x2, dtype, (x1>=x2).all()
-                yield check, np.any_greater_equal, x1, x2, dtype, (x1>=x2).any()
-                yield check, np.all_less, x1, x2, dtype, (x1<x2).all()
-                yield check, np.any_less, x1, x2, dtype, (x1<x2).any()
-                yield check, np.all_less_equal, x1, x2, dtype, (x1<=x2).all()
-                yield check, np.any_less_equal, x1, x2, dtype, (x1<=x2).any()
+            for dtype in int_types + float_types + complex_types:
+                x1 = inputs[i, :].astype(dtype)
+                x2 = inputs[j, :].astype(dtype)
+                for x in check_all(x1, x2):
+                    yield x
+
+
+def test_complex():
+    j = 1j
+    for m in range(-1, 2):
+        for n in range(-1, 2):
+            for dtype in complex_types:
+                x1 = np.zeros(2, dtype=dtype)
+                x2 = x1 + m + n * j
+                for x in check_all(x1, x2):
+                    yield x
+
 
 if __name__ == "__main__":
     run_module_suite()

From 6aaf76129ad94f4531544f6ce6ac60abdc63ce15 Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Sun, 5 Feb 2017 10:57:23 -0500
Subject: [PATCH 06/26] fix build issues

---
 numpy/core/src/umath/logical_gufuncs.c.src | 33 +++++++++++++---------
 numpy/core/src/umath/logical_gufuncs.h     | 13 +++++++++
 numpy/core/src/umath/umathmodule.c         |  4 +--
 3 files changed, 34 insertions(+), 16 deletions(-)
 create mode 100644 numpy/core/src/umath/logical_gufuncs.h

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index c5adeecdd9b7..8d4aa5699948 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -4,6 +4,9 @@
 #include "numpy/ndarraytypes.h"
 #include "numpy/ufuncobject.h"
 #include "numpy/npy_3kcompat.h"
+#include "logical_gufuncs.h"
+//#include "npy_pycompat.h"
+//#include "npy_config.h"
 
 
 // define the basic real version similar to the macro for complex numbers
@@ -272,27 +275,29 @@ static void *array_of_nulls[] = {
     (void *)NULL
 };
 
-
 /* -------------------------------------------------------------------------- */
-/* function to register all gufuncs*/
-static void InitLogicalGufuncs(PyObject *dictionary) {
+/* function to create and register all gufuncs*/
+void InitLogicalGufuncs(PyObject *dictionary, PyUFunc_FromFuncAndDataAndSignature_t createPyUFunc) {
     PyObject *f;
     int i;
     const int gufunc_count = sizeof(gufunc_descriptors)/
         sizeof(gufunc_descriptors[0]);
+
     for (i=0; i < gufunc_count; i++) {
         GUFUNC_DESCRIPTOR_t* d = &gufunc_descriptors[i];
-        f = PyUFunc_FromFuncAndDataAndSignature(d->funcs,
-                                                array_of_nulls,
-                                                types,
-                                                16,             // number of types
-                                                2,              // number of inputs
-                                                1,              // number of outputs
-                                                PyUFunc_None,
-                                                d->name,
-                                                d->doc,
-                                                0,              // unused
-                                                "(i),(i)->()");
+
+        f = createPyUFunc(d->funcs,
+                          array_of_nulls,
+                          types,
+                          16,             // number of types
+                          2,              // number of inputs
+                          1,              // number of outputs
+                          PyUFunc_None,
+                          d->name,
+                          d->doc,
+                          0,              // unused
+                          "(i),(i)->()");
+
         PyDict_SetItemString(dictionary, d->name, f);
         Py_DECREF(f);
     }
diff --git a/numpy/core/src/umath/logical_gufuncs.h b/numpy/core/src/umath/logical_gufuncs.h
new file mode 100644
index 000000000000..62ceafb2f073
--- /dev/null
+++ b/numpy/core/src/umath/logical_gufuncs.h
@@ -0,0 +1,13 @@
+typedef PyObject* (*PyUFunc_FromFuncAndDataAndSignature_t)(PyUFuncGenericFunction*,
+														   void**,
+														   char*,
+														   int,
+														   int,
+														   int,
+														   int,
+														   const char*,
+														   const char*,
+														   int,
+														   const char*);
+
+void InitLogicalGufuncs(PyObject *dictionary, PyUFunc_FromFuncAndDataAndSignature_t createPyUFunc);
diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c
index 697b410ef0b4..dac3904af92e 100644
--- a/numpy/core/src/umath/umathmodule.c
+++ b/numpy/core/src/umath/umathmodule.c
@@ -41,7 +41,7 @@
 #include "ufunc_type_resolution.h"
 #include "__umath_generated.c"
 #include "__ufunc_api.c"
-#include "logical_gufuncs.c"
+#include "logical_gufuncs.h"
 
 NPY_NO_EXPORT int initscalarmath(PyObject *);
 
@@ -366,7 +366,7 @@ PyMODINIT_FUNC initumath(void)
 
     /* Load the ufunc operators into the array module's namespace */
     InitOperators(d);
-    InitLogicalGufuncs(d);
+    InitLogicalGufuncs(d, PyUFunc_FromFuncAndDataAndSignature);
 
     PyDict_SetItemString(d, "pi", s = PyFloat_FromDouble(NPY_PI));
     Py_DECREF(s);

From 80ec17b5e38335868aad0bf746748e4e3589898e Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Mon, 6 Feb 2017 19:25:31 -0500
Subject: [PATCH 07/26] fix pull request comments

---
 numpy/core/src/umath/logical_gufuncs.c.src | 34 ++++++++++------------
 numpy/core/src/umath/logical_gufuncs.h     | 20 ++++++-------
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index 8d4aa5699948..a8a1cc312e29 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -5,8 +5,6 @@
 #include "numpy/ufuncobject.h"
 #include "numpy/npy_3kcompat.h"
 #include "logical_gufuncs.h"
-//#include "npy_pycompat.h"
-//#include "npy_config.h"
 
 
 // define the basic real version similar to the macro for complex numbers
@@ -136,22 +134,22 @@ GUFUNC_FUNC_ARRAY(any_ge);
 
 /* -------------------------------------------------------------------------- */
 /* Create type arrays for each gufunc, which are all identical*/
-static char types[48] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
-                         NPY_UBYTE, NPY_UBYTE, NPY_BOOL,
-                         NPY_SHORT, NPY_SHORT, NPY_BOOL,
-                         NPY_USHORT, NPY_USHORT, NPY_BOOL,
-                         NPY_INT, NPY_INT, NPY_BOOL,
-                         NPY_UINT, NPY_UINT, NPY_BOOL,
-                         NPY_LONG, NPY_LONG, NPY_BOOL,
-                         NPY_ULONG, NPY_ULONG, NPY_BOOL,
-                         NPY_LONGLONG, NPY_LONGLONG, NPY_BOOL,
-                         NPY_ULONGLONG, NPY_ULONGLONG, NPY_BOOL,
-                         NPY_FLOAT, NPY_FLOAT, NPY_BOOL,
-                         NPY_DOUBLE, NPY_DOUBLE, NPY_BOOL,
-                         NPY_LONGDOUBLE, NPY_LONGDOUBLE, NPY_BOOL,
-                         NPY_CFLOAT, NPY_CFLOAT, NPY_BOOL,
-                         NPY_CDOUBLE, NPY_CDOUBLE, NPY_BOOL,
-                         NPY_CLONGDOUBLE, NPY_CLONGDOUBLE, NPY_BOOL};
+static char types[] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
+                       NPY_UBYTE, NPY_UBYTE, NPY_BOOL,
+                       NPY_SHORT, NPY_SHORT, NPY_BOOL,
+                       NPY_USHORT, NPY_USHORT, NPY_BOOL,
+                       NPY_INT, NPY_INT, NPY_BOOL,
+                       NPY_UINT, NPY_UINT, NPY_BOOL,
+                       NPY_LONG, NPY_LONG, NPY_BOOL,
+                       NPY_ULONG, NPY_ULONG, NPY_BOOL,
+                       NPY_LONGLONG, NPY_LONGLONG, NPY_BOOL,
+                       NPY_ULONGLONG, NPY_ULONGLONG, NPY_BOOL,
+                       NPY_FLOAT, NPY_FLOAT, NPY_BOOL,
+                       NPY_DOUBLE, NPY_DOUBLE, NPY_BOOL,
+                       NPY_LONGDOUBLE, NPY_LONGDOUBLE, NPY_BOOL,
+                       NPY_CFLOAT, NPY_CFLOAT, NPY_BOOL,
+                       NPY_CDOUBLE, NPY_CDOUBLE, NPY_BOOL,
+                       NPY_CLONGDOUBLE, NPY_CLONGDOUBLE, NPY_BOOL};
 
 
 /* -------------------------------------------------------------------------- */
diff --git a/numpy/core/src/umath/logical_gufuncs.h b/numpy/core/src/umath/logical_gufuncs.h
index 62ceafb2f073..ab3aa429dc02 100644
--- a/numpy/core/src/umath/logical_gufuncs.h
+++ b/numpy/core/src/umath/logical_gufuncs.h
@@ -1,13 +1,13 @@
 typedef PyObject* (*PyUFunc_FromFuncAndDataAndSignature_t)(PyUFuncGenericFunction*,
-														   void**,
-														   char*,
-														   int,
-														   int,
-														   int,
-														   int,
-														   const char*,
-														   const char*,
-														   int,
-														   const char*);
+                                                           void**,
+                                                           char*,
+                                                           int,
+                                                           int,
+                                                           int,
+                                                           int,
+                                                           const char*,
+                                                           const char*,
+                                                           int,
+                                                           const char*);
 
 void InitLogicalGufuncs(PyObject *dictionary, PyUFunc_FromFuncAndDataAndSignature_t createPyUFunc);

From 413e50dc4a392e78fa40e131af72ba9df2230d81 Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Sat, 11 Feb 2017 09:59:24 -0500
Subject: [PATCH 08/26] wrap lines over max length

---
 numpy/core/src/umath/logical_gufuncs.c.src | 37 +++++-----------------
 1 file changed, 8 insertions(+), 29 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index a8a1cc312e29..b3a0ebb1a55a 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -50,7 +50,8 @@
 /* create the family of functions using a template  */
 
 /**begin repeat
-* #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble#
+* #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,
+          double,longdouble,cfloat,cdouble,clongdouble#
 * #OP_PREFIX = R,R,R,R,R,R,R,R,R,R,R,R,R,PyArray_C,PyArray_C,PyArray_C#
 */
 
@@ -69,7 +70,8 @@ static void @TYPE@_@ALL_OR_ANY@_@OPNAME@(char **args, npy_intp *dimensions,
     npy_intp i, n;
     npy_intp N = dimensions[0], I = dimensions[1];
     char *a_n = args[0], *b_n = args[1], *c_n = args[2];
-    npy_intp a_N = steps[0], b_N = steps[1], c_N = steps[2], a_I = steps[3], b_I = steps[4];
+    npy_intp a_N = steps[0], b_N = steps[1], c_N = steps[2];
+    npy_intp a_I = steps[3], b_I = steps[4];
 
     char *a_i, *b_i;
     npy_@TYPE@ a, b;
@@ -164,31 +166,7 @@ typedef struct gufunc_descriptor_struct {
 static GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = {
     {
         "all_equal",
-        "Return True if x1 == x2 for all elements along the last axis, False\n"
-        "otherwise.  Similar to (x1==x2).all(axis=-1), except the last dimension\n"
-        "of x1 and x2 must be equal and greater than 1.  This function short\n"
-        "circuits after the first nonequal element."
-        "\n"
-        "Parameters\n"
-        "----------\n"
-        "x1, x2 : array_like\n"
-        "    Input arrays of the same shape.\n"
-        "\n"
-        "Returns\n"
-        "-------\n"
-        "out : ndarray or bool\n"
-        "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
-        "\n"
-        "See Also\n"
-        "-------\n"
-        "equal, all, any_equal, all_not_equal\n"
-        "\n"
-        "Examples\n"
-        "-------\n"
-        ">>> np.all_equal(np.arange(3), np.arange(3))\n"
-        "True\n"
-        ">>> np.all_equal([[1, 2], [0, 0], [1, 2]], [1, 2])\n"
-        "array([ True, False,  True], dtype=bool)",
+        "all equal docstring",
         FUNC_ARRAY_NAME(all_eq)
     },
     {
@@ -275,7 +253,8 @@ static void *array_of_nulls[] = {
 
 /* -------------------------------------------------------------------------- */
 /* function to create and register all gufuncs*/
-void InitLogicalGufuncs(PyObject *dictionary, PyUFunc_FromFuncAndDataAndSignature_t createPyUFunc) {
+void InitLogicalGufuncs(PyObject *dictionary, 
+                        PyUFunc_FromFuncAndDataAndSignature_t createPyUFunc) {
     PyObject *f;
     int i;
     const int gufunc_count = sizeof(gufunc_descriptors)/
@@ -287,7 +266,7 @@ void InitLogicalGufuncs(PyObject *dictionary, PyUFunc_FromFuncAndDataAndSignatur
         f = createPyUFunc(d->funcs,
                           array_of_nulls,
                           types,
-                          16,             // number of types
+                          sizeof(types),  // number of types
                           2,              // number of inputs
                           1,              // number of outputs
                           PyUFunc_None,

From 107391593da495b43ad12439c79a6ef537428d86 Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Thu, 16 Feb 2017 08:39:23 -0500
Subject: [PATCH 09/26] add vectorization

---
 numpy/core/src/umath/logical_gufuncs.c.src | 253 +++++++++++++--------
 1 file changed, 153 insertions(+), 100 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index b3a0ebb1a55a..c6fc72ece33d 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -51,7 +51,7 @@
 
 /**begin repeat
 * #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,
-          double,longdouble,cfloat,cdouble,clongdouble#
+*         double,longdouble,cfloat,cdouble,clongdouble#
 * #OP_PREFIX = R,R,R,R,R,R,R,R,R,R,R,R,R,PyArray_C,PyArray_C,PyArray_C#
 */
 
@@ -64,8 +64,9 @@
 * #ALL_OR_ANY = all,any#
 */
 
-static void @TYPE@_@ALL_OR_ANY@_@OPNAME@(char **args, npy_intp *dimensions,
-                                  npy_intp* steps, void* data)
+static void base_@TYPE@_@ALL_OR_ANY@_@OPNAME@
+        (char **args, npy_intp *dimensions,
+         npy_intp* steps, void* data)
 {
     npy_intp i, n;
     npy_intp N = dimensions[0], I = dimensions[1];
@@ -94,45 +95,142 @@ static void @TYPE@_@ALL_OR_ANY@_@OPNAME@(char **args, npy_intp *dimensions,
 
 
 /* -------------------------------------------------------------------------- */
-/* create arrays of type specific functions for each gufunc  */
+/* vectorized versions*/
+
+#define BLOCK_SIZE 1024
+#define ALIGNMENT_SIZE 32
+#define ALIGNED(p) ((unsigned long)p & ALIGNMENT_SIZE)
+
+/*
+*  The core array must be contiguous, ie the core dimension steps must
+*  be equal to the sizeof the type
+*/
+/**begin repeat
+* #TYPE = float,double,int#
+**/
+void simd_core_contiguous_@TYPE@_all_eq(
+    char **args, npy_intp *dimensions, npy_intp* steps, void* data)
+{
+    npy_intp i, n, j;
+    npy_intp N = dimensions[0], I = dimensions[1];
+    char *a_n = args[0], *b_n = args[1], *c_n = args[2];
+    npy_intp a_N = steps[0], b_N = steps[1], c_N = steps[2];
+
+    unsigned int true_count;
+    @TYPE@ *x, *y;
 
-#define FUNC_ARRAY_NAME(NAME) NAME ## _funcs
-
-#define GUFUNC_FUNC_ARRAY(NAME)                         \
-    static PyUFuncGenericFunction                       \
-    FUNC_ARRAY_NAME(NAME)[] = {                         \
-        byte_ ## NAME,                                  \
-        ubyte_ ## NAME,                                 \
-        short_ ## NAME,                                 \
-        ushort_ ## NAME,                                \
-        int_ ## NAME,                                   \
-        uint_ ## NAME,                                  \
-        long_ ## NAME,                                  \
-        ulong_ ## NAME,                                 \
-        longlong_ ## NAME,                              \
-        ulonglong_ ## NAME,                             \
-        float_ ## NAME,                                 \
-        double_ ## NAME,                                \
-        longdouble_ ## NAME,                            \
-        cfloat_ ## NAME,                                \
-        cdouble_ ## NAME,                               \
-        clongdouble_ ## NAME,                           \
+    for (n = 0; n < N; n++) {
+        *((npy_bool *)c_n) = NPY_TRUE;
+        x = (@TYPE@ *)a_n;
+        y = (@TYPE@ *)b_n;
+
+        // get x aligned, helps compiler vectorize.  y can use unaligned
+        // loads. do while so at least the first element is checked
+        // before checking an entire chunk
+        i = 0;
+        do {
+            if ((*x) == (*y)) {
+                x++;
+                y++;
+                i++;
+            } else {
+                *((npy_bool *)c_n) = NPY_FALSE;
+                i = I;
+            }
+        } while (!ALIGNED(x) & (i<I));
+
+        // main loop in chunks with auto vectorize simd instructions
+        for ( ; i<(I-BLOCK_SIZE) ; i+=BLOCK_SIZE) {
+            true_count = 0;
+            for (j=0 ; j<BLOCK_SIZE ; j++){
+                true_count += ((*x) == (*y));
+                x++;
+                y++;
+            }
+            if (true_count != BLOCK_SIZE) {
+                *((npy_bool *)c_n) = NPY_FALSE;
+                i = I;
+                break;
+            }
+        };
+
+        // check remaining elements
+        for ( ; i<I ; i++) {
+            if ((*x) == (*y)) {
+                x++;
+                y++;
+            } else {
+                *((npy_bool *)c_n) = NPY_FALSE;
+                break;
+            } 
+        }
+
+        a_n += a_N;
+        b_n += b_N;
+        c_n += c_N;
     }
+}
+/**end repeat**/
+
+/* -------------------------------------------------------------------------- */
+/* dispatchers to select optimal function */
 
+/**begin repeat
+* #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,
+*         double,longdouble,cfloat,cdouble,clongdouble#
+* #IS_VECTD = 0*4,1,0,0*4,1,1,0*4#
+*/
+void dispatch_@TYPE@_all_eq(
+    char **args, npy_intp *dimensions, npy_intp* steps, void* data)
+{
 
-GUFUNC_FUNC_ARRAY(all_eq);
-GUFUNC_FUNC_ARRAY(all_ne);
-GUFUNC_FUNC_ARRAY(all_lt);
-GUFUNC_FUNC_ARRAY(all_le);
-GUFUNC_FUNC_ARRAY(all_gt);
-GUFUNC_FUNC_ARRAY(all_ge);
-GUFUNC_FUNC_ARRAY(any_eq);
-GUFUNC_FUNC_ARRAY(any_ne);
-GUFUNC_FUNC_ARRAY(any_lt);
-GUFUNC_FUNC_ARRAY(any_le);
-GUFUNC_FUNC_ARRAY(any_gt);
-GUFUNC_FUNC_ARRAY(any_ge);
+    #if @IS_VECTD@
+        npy_intp I = dimensions[1];
+        npy_intp a_I = steps[3], b_I = steps[4];
+        if ((I > (2*BLOCK_SIZE)) & (a_I == sizeof(@TYPE@)) 
+                & (b_I == sizeof(@TYPE@)))
+        {
+            simd_core_contiguous_@TYPE@_all_eq(args, dimensions, steps, data);
+            return;
+        }
+    #endif
+    
+    base_@TYPE@_all_eq(args, dimensions, steps, data);
 
+}
+/**end repeat**/
+
+/* -------------------------------------------------------------------------- */
+/* create arrays of type specific functions for each gufunc  */
+
+/**begin repeat
+* #NAME = all_eq,all_ne,all_lt,all_le,all_gt,all_ge,
+*         any_eq,any_ne,any_lt,any_le,any_gt,any_ge#
+* #APPROACH = dispatch,base*11#
+*/
+
+static PyUFuncGenericFunction @NAME@_funcs[] = { 
+    @APPROACH@_byte_@NAME@,
+    @APPROACH@_ubyte_@NAME@,
+    @APPROACH@_short_@NAME@,
+    @APPROACH@_ushort_@NAME@,
+
+    @APPROACH@_int_@NAME@,
+    @APPROACH@_uint_@NAME@,
+    @APPROACH@_long_@NAME@,
+    @APPROACH@_ulong_@NAME@,
+
+    @APPROACH@_longlong_@NAME@,
+    @APPROACH@_ulonglong_@NAME@,
+    @APPROACH@_float_@NAME@,
+    @APPROACH@_double_@NAME@,
+
+    @APPROACH@_longdouble_@NAME@,
+    @APPROACH@_cfloat_@NAME@,
+    @APPROACH@_cdouble_@NAME@,
+    @APPROACH@_clongdouble_@NAME@
+};
+/**end repeat**/
 
 /* -------------------------------------------------------------------------- */
 /* Create type arrays for each gufunc, which are all identical*/
@@ -140,14 +238,17 @@ static char types[] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
                        NPY_UBYTE, NPY_UBYTE, NPY_BOOL,
                        NPY_SHORT, NPY_SHORT, NPY_BOOL,
                        NPY_USHORT, NPY_USHORT, NPY_BOOL,
+
                        NPY_INT, NPY_INT, NPY_BOOL,
                        NPY_UINT, NPY_UINT, NPY_BOOL,
                        NPY_LONG, NPY_LONG, NPY_BOOL,
                        NPY_ULONG, NPY_ULONG, NPY_BOOL,
+
                        NPY_LONGLONG, NPY_LONGLONG, NPY_BOOL,
                        NPY_ULONGLONG, NPY_ULONGLONG, NPY_BOOL,
                        NPY_FLOAT, NPY_FLOAT, NPY_BOOL,
                        NPY_DOUBLE, NPY_DOUBLE, NPY_BOOL,
+
                        NPY_LONGDOUBLE, NPY_LONGDOUBLE, NPY_BOOL,
                        NPY_CFLOAT, NPY_CFLOAT, NPY_BOOL,
                        NPY_CDOUBLE, NPY_CDOUBLE, NPY_BOOL,
@@ -155,75 +256,27 @@ static char types[] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
 
 
 /* -------------------------------------------------------------------------- */
-/* define name and docstring for each gufunc*/
+/* define name pointer to functions for each gufunc*/
 
 typedef struct gufunc_descriptor_struct {
     char *name;
-    char *doc;
     PyUFuncGenericFunction *funcs;
 } GUFUNC_DESCRIPTOR_t;
 
 static GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = {
-    {
-        "all_equal",
-        "all equal docstring",
-        FUNC_ARRAY_NAME(all_eq)
-    },
-    {
-        "all_not_equal",
-        "all not equal docstring",
-        FUNC_ARRAY_NAME(all_ne)
-    },
-    {
-        "all_less",
-        "all less than docstring",
-        FUNC_ARRAY_NAME(all_lt)
-    },
-    {
-        "all_less_equal",
-        "all less than or equal docstring",
-        FUNC_ARRAY_NAME(all_le)
-    },
-    {
-        "all_greater",
-        "all greater than docstring",
-        FUNC_ARRAY_NAME(all_gt)
-    },
-    {
-        "all_greater_equal",
-        "all greater than or equal docstring",
-        FUNC_ARRAY_NAME(all_ge)
-    },
-    {
-        "any_equal",
-        "any equal docstring",
-        FUNC_ARRAY_NAME(any_eq)
-    },
-    {
-        "any_not_equal",
-        "any not equal docstring",
-        FUNC_ARRAY_NAME(any_ne)
-    },
-    {
-        "any_less",
-        "any less than docstring",
-        FUNC_ARRAY_NAME(any_lt)
-    },
-    {
-        "any_less_equal",
-        "any less than or equal docstring",
-        FUNC_ARRAY_NAME(any_le)
-    },
-    {
-        "any_greater",
-        "any greater than docstring",
-        FUNC_ARRAY_NAME(any_gt)
-    },
-    {
-        "any_greater_equal",
-        "any greater than or equal docstring",
-        FUNC_ARRAY_NAME(any_ge)
-    }
+    {"all_equal",           all_eq_funcs},
+    {"all_not_equal",       all_ne_funcs},
+    {"all_less",            all_lt_funcs},
+    {"all_less_equal",      all_le_funcs},
+    {"all_greater",         all_gt_funcs},
+    {"all_greater_equal",   all_ge_funcs},
+
+    {"any_equal",           any_eq_funcs},
+    {"any_not_equal",       any_ne_funcs},
+    {"any_less",            any_lt_funcs},
+    {"any_less_equal",      any_le_funcs},
+    {"any_greater",         any_gt_funcs},
+    {"any_greater_equal",   any_ge_funcs}
 };
 
 
@@ -271,7 +324,7 @@ void InitLogicalGufuncs(PyObject *dictionary,
                           1,              // number of outputs
                           PyUFunc_None,
                           d->name,
-                          d->doc,
+                          "temp docstring",
                           0,              // unused
                           "(i),(i)->()");
 

From 92ac19e9350fd782f12fa24475d866f525c4f072 Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Sun, 5 Mar 2017 19:40:42 -0500
Subject: [PATCH 10/26] use consistent name for funcs

---
 numpy/core/src/umath/logical_gufuncs.c.src | 40 +++++++++++-----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index c6fc72ece33d..447f691609ba 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -57,7 +57,7 @@
 
 /**begin repeat1
 * #OP = EQ,NE,LT,LE,GT,GE#
-* #OPNAME = eq,ne,lt,le,gt,ge#
+* #OPNAME = equal,not_equal,less,less_equal,greater,greater_equal#
 */
 
 /**begin repeat2
@@ -108,7 +108,7 @@ static void base_@TYPE@_@ALL_OR_ANY@_@OPNAME@
 /**begin repeat
 * #TYPE = float,double,int#
 **/
-void simd_core_contiguous_@TYPE@_all_eq(
+void simd_core_contiguous_@TYPE@_all_equal(
     char **args, npy_intp *dimensions, npy_intp* steps, void* data)
 {
     npy_intp i, n, j;
@@ -178,9 +178,9 @@ void simd_core_contiguous_@TYPE@_all_eq(
 /**begin repeat
 * #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,
 *         double,longdouble,cfloat,cdouble,clongdouble#
-* #IS_VECTD = 0*4,1,0,0*4,1,1,0*4#
+* #IS_VECTD = 0*4,1,0*5,1,1,0*4#
 */
-void dispatch_@TYPE@_all_eq(
+void dispatch_@TYPE@_all_equal(
     char **args, npy_intp *dimensions, npy_intp* steps, void* data)
 {
 
@@ -190,7 +190,7 @@ void dispatch_@TYPE@_all_eq(
         if ((I > (2*BLOCK_SIZE)) & (a_I == sizeof(@TYPE@)) 
                 & (b_I == sizeof(@TYPE@)))
         {
-            simd_core_contiguous_@TYPE@_all_eq(args, dimensions, steps, data);
+            simd_core_contiguous_@TYPE@_all_equal(args, dimensions, steps, data);
             return;
         }
     #endif
@@ -204,8 +204,8 @@ void dispatch_@TYPE@_all_eq(
 /* create arrays of type specific functions for each gufunc  */
 
 /**begin repeat
-* #NAME = all_eq,all_ne,all_lt,all_le,all_gt,all_ge,
-*         any_eq,any_ne,any_lt,any_le,any_gt,any_ge#
+* #NAME = all_equal,all_not_equal,all_less,all_less_equal,all_greater,all_greater_equal,
+*         any_equal,any_not_equal,any_less,any_less_equal,any_greater,any_greater_equal#
 * #APPROACH = dispatch,base*11#
 */
 
@@ -264,19 +264,19 @@ typedef struct gufunc_descriptor_struct {
 } GUFUNC_DESCRIPTOR_t;
 
 static GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = {
-    {"all_equal",           all_eq_funcs},
-    {"all_not_equal",       all_ne_funcs},
-    {"all_less",            all_lt_funcs},
-    {"all_less_equal",      all_le_funcs},
-    {"all_greater",         all_gt_funcs},
-    {"all_greater_equal",   all_ge_funcs},
-
-    {"any_equal",           any_eq_funcs},
-    {"any_not_equal",       any_ne_funcs},
-    {"any_less",            any_lt_funcs},
-    {"any_less_equal",      any_le_funcs},
-    {"any_greater",         any_gt_funcs},
-    {"any_greater_equal",   any_ge_funcs}
+    {"all_equal",           all_equal_funcs},
+    {"all_not_equal",       all_not_equal_funcs},
+    {"all_less",            all_less_funcs},
+    {"all_less_equal",      all_less_equal_funcs},
+    {"all_greater",         all_greater_funcs},
+    {"all_greater_equal",   all_greater_equal_funcs},
+
+    {"any_equal",           any_equal_funcs},
+    {"any_not_equal",       any_not_equal_funcs},
+    {"any_less",            any_less_funcs},
+    {"any_less_equal",      any_less_equal_funcs},
+    {"any_greater",         any_greater_funcs},
+    {"any_greater_equal",   any_greater_equal_funcs}
 };
 
 

From 12e20c81be5af53000937b8e94c4420926fb0c68 Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Mon, 6 Mar 2017 05:47:11 -0500
Subject: [PATCH 11/26] refactor ufunc registration to use src template more
 directly

---
 numpy/core/src/umath/logical_gufuncs.c.src | 146 +++++++--------------
 1 file changed, 48 insertions(+), 98 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index 447f691609ba..d8b25e55a735 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -195,42 +195,11 @@ void dispatch_@TYPE@_all_equal(
         }
     #endif
     
-    base_@TYPE@_all_eq(args, dimensions, steps, data);
+    base_@TYPE@_all_equal(args, dimensions, steps, data);
 
 }
 /**end repeat**/
 
-/* -------------------------------------------------------------------------- */
-/* create arrays of type specific functions for each gufunc  */
-
-/**begin repeat
-* #NAME = all_equal,all_not_equal,all_less,all_less_equal,all_greater,all_greater_equal,
-*         any_equal,any_not_equal,any_less,any_less_equal,any_greater,any_greater_equal#
-* #APPROACH = dispatch,base*11#
-*/
-
-static PyUFuncGenericFunction @NAME@_funcs[] = { 
-    @APPROACH@_byte_@NAME@,
-    @APPROACH@_ubyte_@NAME@,
-    @APPROACH@_short_@NAME@,
-    @APPROACH@_ushort_@NAME@,
-
-    @APPROACH@_int_@NAME@,
-    @APPROACH@_uint_@NAME@,
-    @APPROACH@_long_@NAME@,
-    @APPROACH@_ulong_@NAME@,
-
-    @APPROACH@_longlong_@NAME@,
-    @APPROACH@_ulonglong_@NAME@,
-    @APPROACH@_float_@NAME@,
-    @APPROACH@_double_@NAME@,
-
-    @APPROACH@_longdouble_@NAME@,
-    @APPROACH@_cfloat_@NAME@,
-    @APPROACH@_cdouble_@NAME@,
-    @APPROACH@_clongdouble_@NAME@
-};
-/**end repeat**/
 
 /* -------------------------------------------------------------------------- */
 /* Create type arrays for each gufunc, which are all identical*/
@@ -255,80 +224,61 @@ static char types[] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
                        NPY_CLONGDOUBLE, NPY_CLONGDOUBLE, NPY_BOOL};
 
 
-/* -------------------------------------------------------------------------- */
-/* define name pointer to functions for each gufunc*/
-
-typedef struct gufunc_descriptor_struct {
-    char *name;
-    PyUFuncGenericFunction *funcs;
-} GUFUNC_DESCRIPTOR_t;
-
-static GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = {
-    {"all_equal",           all_equal_funcs},
-    {"all_not_equal",       all_not_equal_funcs},
-    {"all_less",            all_less_funcs},
-    {"all_less_equal",      all_less_equal_funcs},
-    {"all_greater",         all_greater_funcs},
-    {"all_greater_equal",   all_greater_equal_funcs},
-
-    {"any_equal",           any_equal_funcs},
-    {"any_not_equal",       any_not_equal_funcs},
-    {"any_less",            any_less_funcs},
-    {"any_less_equal",      any_less_equal_funcs},
-    {"any_greater",         any_greater_funcs},
-    {"any_greater_equal",   any_greater_equal_funcs}
-};
-
-
 /* -------------------------------------------------------------------------- */
 /* create array of nulls for "data" for each gufunc type*/
+
 static void *array_of_nulls[] = {
-    (void *)NULL,
-    (void *)NULL,
-    (void *)NULL,
-    (void *)NULL,
-
-    (void *)NULL,
-    (void *)NULL,
-    (void *)NULL,
-    (void *)NULL,
-
-    (void *)NULL,
-    (void *)NULL,
-    (void *)NULL,
-    (void *)NULL,
-
-    (void *)NULL,
-    (void *)NULL,
-    (void *)NULL,
-    (void *)NULL
+    (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
+    (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
+    (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
+    (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL
 };
 
+
 /* -------------------------------------------------------------------------- */
 /* function to create and register all gufuncs*/
+
 void InitLogicalGufuncs(PyObject *dictionary, 
                         PyUFunc_FromFuncAndDataAndSignature_t createPyUFunc) {
     PyObject *f;
-    int i;
-    const int gufunc_count = sizeof(gufunc_descriptors)/
-        sizeof(gufunc_descriptors[0]);
-
-    for (i=0; i < gufunc_count; i++) {
-        GUFUNC_DESCRIPTOR_t* d = &gufunc_descriptors[i];
-
-        f = createPyUFunc(d->funcs,
-                          array_of_nulls,
-                          types,
-                          sizeof(types),  // number of types
-                          2,              // number of inputs
-                          1,              // number of outputs
-                          PyUFunc_None,
-                          d->name,
-                          "temp docstring",
-                          0,              // unused
-                          "(i),(i)->()");
-
-        PyDict_SetItemString(dictionary, d->name, f);
-        Py_DECREF(f);
-    }
+
+/**begin repeat
+* #NAME = all_equal,all_not_equal,all_less,all_less_equal,all_greater,all_greater_equal,
+*         any_equal,any_not_equal,any_less,any_less_equal,any_greater,any_greater_equal#
+* #APPROACH = dispatch,base*11#
+*/
+
+    static PyUFuncGenericFunction @NAME@_funcs[] = { 
+
+/**begin repeat1
+* #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,
+*         double,longdouble,cfloat,cdouble,clongdouble#
+*/
+
+        @APPROACH@_@TYPE@_@NAME@,
+
+/**end repeat1**/
+
+    }; // close array of funcs
+
+
+
+    f = createPyUFunc(@NAME@_funcs,
+                      array_of_nulls,
+                      types,
+                      sizeof(types),  // number of types
+                      2,              // number of inputs
+                      1,              // number of outputs
+                      PyUFunc_None,
+                      "@NAME@",
+                      "temp docstring",
+                      0,              // unused
+                      "(i),(i)->()");
+
+    PyDict_SetItemString(dictionary, "@NAME@", f);
+    Py_DECREF(f);
+
+/**end repeat**/
+
+
 }

From b9b0b5f63f29abe10350f9046144bb7e319c5246 Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Wed, 8 Mar 2017 05:40:06 -0500
Subject: [PATCH 12/26] added docstrings

---
 numpy/core/src/umath/logical_gufuncs.c.src | 322 ++++++++++++++++++++-
 1 file changed, 321 insertions(+), 1 deletion(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index d8b25e55a735..1b5ce21d4b20 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -104,6 +104,10 @@ static void base_@TYPE@_@ALL_OR_ANY@_@OPNAME@
 /*
 *  The core array must be contiguous, ie the core dimension steps must
 *  be equal to the sizeof the type
+*
+*  TODO: all contiguous signed and unsigned integer types could be handled by
+*  a single vectorized all_equal function.  Besides there should be at least 
+*  one simple C function without src templating.
 */
 /**begin repeat
 * #TYPE = float,double,int#
@@ -235,6 +239,322 @@ static void *array_of_nulls[] = {
 };
 
 
+/* -------------------------------------------------------------------------- */
+/* define docstrings*/
+
+static char const * const all_equal_doc =
+    "Return True if x1 == x2 for all elements along the last axis, False\n"
+    "otherwise.  Similar to (x1==x2).all(axis=-1), except the last dimension\n"
+    "of x1 and x2 must be equal and greater than 1.\n"
+    "\n"
+    "Parameters\n"
+    "----------\n"
+    "x1, x2 : array_like\n"
+    "    Input arrays of the same shape.\n"
+    "\n"
+    "Returns\n"
+    "-------\n"
+    "out : ndarray or bool\n"
+    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+    "\n"
+    "See Also\n"
+    "-------\n"
+    "equal, all, any_equal, all_not_equal\n"
+    "\n"
+    "Examples\n"
+    "-------\n"
+    ">>> np.all_equal(np.arange(3), np.arange(3))\n"
+    "True\n"
+    ">>> np.all_equal([[1, 2], [0, 0], [1, 2]], [1, 2])\n"
+    "array([ True, False,  True], dtype=bool)";
+
+static char const * const all_not_equal_doc =
+    "Return True if x1 != x2 for all elements along the last axis, False\n"
+    "otherwise.  Similar to (x1!=x2).all(axis=-1), except the last dimension\n"
+    "of x1 and x2 must be equal and greater than 1.\n"
+    "\n"
+    "Parameters\n"
+    "----------\n"
+    "x1, x2 : array_like\n"
+    "    Input arrays of the same shape.\n"
+    "\n"
+    "Returns\n"
+    "-------\n"
+    "out : ndarray or bool\n"
+    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+    "\n"
+    "See Also\n"
+    "-------\n"
+    "not_equal, all, any_not_equal, all_equal\n"
+    "\n"
+    "Examples\n"
+    "-------\n"
+    ">>> np.all_not_equal(np.arange(3), np.arange(3))\n"
+    "False\n"
+    ">>> np.all_not_equal([[1, 2], [0, 0], [1, 2]], [1, 2])\n"
+    "array([ False, True,  False], dtype=bool)";
+
+static char const * const all_less_doc =
+    "Return True if x1 < x2 for all elements along the last axis, False\n"
+    "otherwise.  Similar to (x1<x2).all(axis=-1), except the last dimension\n"
+    "of x1 and x2 must be equal and greater than 1.\n"
+    "\n"
+    "Parameters\n"
+    "----------\n"
+    "x1, x2 : array_like\n"
+    "    Input arrays of the same shape.\n"
+    "\n"
+    "Returns\n"
+    "-------\n"
+    "out : ndarray or bool\n"
+    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+    "\n"
+    "See Also\n"
+    "-------\n"
+    "less, all, any_less, all_greater\n"
+    "\n"
+    "Examples\n"
+    "-------\n"
+    ">>> np.all_less(np.arange(3), np.arange(3))\n"
+    "False\n"
+    ">>> np.all_less([[1, 2], [0, 0], [1, 2]], [1, 2])\n"
+    "array([ False, True,  False], dtype=bool)";
+
+static char const * const all_less_equal_doc =
+    "Return True if x1 <= x2 for all elements along the last axis, False\n"
+    "otherwise.  Similar to (x1<=x2).all(axis=-1), except the last dimension\n"
+    "of x1 and x2 must be equal and greater than 1.\n"
+    "\n"
+    "Parameters\n"
+    "----------\n"
+    "x1, x2 : array_like\n"
+    "    Input arrays of the same shape.\n"
+    "\n"
+    "Returns\n"
+    "-------\n"
+    "out : ndarray or bool\n"
+    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+    "\n"
+    "See Also\n"
+    "-------\n"
+    "less_equal, all, any_less_equal, all_greater_equal\n"
+    "\n"
+    "Examples\n"
+    "-------\n"
+    ">>> np.all_less_equal(np.arange(3), np.arange(3))\n"
+    "True\n"
+    ">>> np.all_less_equal([[1, 2], [0, 0], [1, 3]], [1, 2])\n"
+    "array([ True, True,  False], dtype=bool)";
+
+static char const * const all_greater_doc =
+    "Return True if x1 > x2 for all elements along the last axis, False\n"
+    "otherwise.  Similar to (x1>x2).all(axis=-1), except the last dimension\n"
+    "of x1 and x2 must be equal and greater than 1.\n"
+    "\n"
+    "Parameters\n"
+    "----------\n"
+    "x1, x2 : array_like\n"
+    "    Input arrays of the same shape.\n"
+    "\n"
+    "Returns\n"
+    "-------\n"
+    "out : ndarray or bool\n"
+    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+    "\n"
+    "See Also\n"
+    "-------\n"
+    "greater, all, any_less_equal, all_less_equal\n"
+    "\n"
+    "Examples\n"
+    "-------\n"
+    ">>> np.all_greater(np.arange(3), np.arange(3))\n"
+    "False\n"
+    ">>> np.all_greater([[1, 2], [0, 0], [1, 3]], [1, 2])\n"
+    "array([ False, False,  False], dtype=bool)";
+
+static char const * const all_greater_equal_doc =
+    "Return True if x1 >= x2 for all elements along the last axis, False\n"
+    "otherwise.  Similar to (x1<=x2).all(axis=-1), except the last dimension\n"
+    "of x1 and x2 must be equal and greater than 1.\n"
+    "\n"
+    "Parameters\n"
+    "----------\n"
+    "x1, x2 : array_like\n"
+    "    Input arrays of the same shape.\n"
+    "\n"
+    "Returns\n"
+    "-------\n"
+    "out : ndarray or bool\n"
+    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+    "\n"
+    "See Also\n"
+    "-------\n"
+    "greater_equal, all, any_greater, all_less\n"
+    "\n"
+    "Examples\n"
+    "-------\n"
+    ">>> np.all_greater_equal(np.arange(3), np.arange(3))\n"
+    "True\n"
+    ">>> np.all_greater_equal([[1, 2], [0, 0], [2, 3]], [1, 2])\n"
+    "array([ True, False,  True], dtype=bool)";
+
+static char const * const any_equal_doc =
+    "Return True if x1 == x2 for any elements along the last axis, False\n"
+    "otherwise.  Similar to (x1==x2).any(axis=-1), except the last dimension\n"
+    "of x1 and x2 must be equal and greater than 1.\n"
+    "\n"
+    "Parameters\n"
+    "----------\n"
+    "x1, x2 : array_like\n"
+    "    Input arrays of the same shape.\n"
+    "\n"
+    "Returns\n"
+    "-------\n"
+    "out : ndarray or bool\n"
+    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+    "\n"
+    "See Also\n"
+    "-------\n"
+    "equal, any, all_equal, any_not_equal\n"
+    "\n"
+    "Examples\n"
+    "-------\n"
+    ">>> np.any_equal(np.arange(3), np.arange(3))\n"
+    "True\n"
+    ">>> np.any_equal([[1, 2], [0, 0], [2, 3]], [1, 3])\n"
+    "array([ True, False,  True], dtype=bool)";
+
+static char const * const any_not_equal_doc =
+    "Return True if x1 != x2 for any elements along the last axis, False\n"
+    "otherwise.  Similar to (x1!=x2).any(axis=-1), except the last dimension\n"
+    "of x1 and x2 must be equal and greater than 1.\n"
+    "\n"
+    "Parameters\n"
+    "----------\n"
+    "x1, x2 : array_like\n"
+    "    Input arrays of the same shape.\n"
+    "\n"
+    "Returns\n"
+    "-------\n"
+    "out : ndarray or bool\n"
+    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+    "\n"
+    "See Also\n"
+    "-------\n"
+    "not_equal, any, all_not_equal, any_equal\n"
+    "\n"
+    "Examples\n"
+    "-------\n"
+    ">>> np.any_not_equal(np.arange(3), np.arange(3))\n"
+    "False\n"
+    ">>> np.any_not_equal([[1, 2], [0, 0], [2, 3]], [1, 3])\n"
+    "array([ True, True,  True], dtype=bool)";
+
+static char const * const any_less_doc =
+    "Return True if x1 < x2 for any elements along the last axis, False\n"
+    "otherwise.  Similar to (x1<x2).any(axis=-1), except the last dimension\n"
+    "of x1 and x2 must be equal and greater than 1.\n"
+    "\n"
+    "Parameters\n"
+    "----------\n"
+    "x1, x2 : array_like\n"
+    "    Input arrays of the same shape.\n"
+    "\n"
+    "Returns\n"
+    "-------\n"
+    "out : ndarray or bool\n"
+    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+    "\n"
+    "See Also\n"
+    "-------\n"
+    "less, any, all_less, any_greater_equal\n"
+    "\n"
+    "Examples\n"
+    "-------\n"
+    ">>> np.any_less(np.arange(3), np.arange(3))\n"
+    "False\n"
+    ">>> np.any_less([[1, 2], [0, 0], [2, 3]], [1, 3])\n"
+    "array([ True, True,  False], dtype=bool)";
+
+static char const * const any_less_equal_doc =
+    "Return True if x1 <= x2 for any elements along the last axis, False\n"
+    "otherwise.  Similar to (x1<=x2).any(axis=-1), except the last dimension\n"
+    "of x1 and x2 must be equal and greater than 1.\n"
+    "\n"
+    "Parameters\n"
+    "----------\n"
+    "x1, x2 : array_like\n"
+    "    Input arrays of the same shape.\n"
+    "\n"
+    "Returns\n"
+    "-------\n"
+    "out : ndarray or bool\n"
+    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+    "\n"
+    "See Also\n"
+    "-------\n"
+    "less_equal, any, all_less_equal, any_greater\n"
+    "\n"
+    "Examples\n"
+    "-------\n"
+    ">>> np.any_less_equal(np.arange(3), np.arange(3))\n"
+    "True\n"
+    ">>> np.any_less_equal([[1, 2], [0, 0], [2, 3]], [1, 3])\n"
+    "array([ True, True,  True], dtype=bool)";
+
+static char const * const any_greater_doc =
+    "Return True if x1 > x2 for any elements along the last axis, False\n"
+    "otherwise.  Similar to (x1>x2).any(axis=-1), except the last dimension\n"
+    "of x1 and x2 must be equal and greater than 1.\n"
+    "\n"
+    "Parameters\n"
+    "----------\n"
+    "x1, x2 : array_like\n"
+    "    Input arrays of the same shape.\n"
+    "\n"
+    "Returns\n"
+    "-------\n"
+    "out : ndarray or bool\n"
+    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+    "\n"
+    "See Also\n"
+    "-------\n"
+    "greater, any, all_greater, any_less_equal\n"
+    "\n"
+    "Examples\n"
+    "-------\n"
+    ">>> np.any_greater(np.arange(3), np.arange(3))\n"
+    "False\n"
+    ">>> np.any_greater([[1, 2], [0, 0], [2, 3]], [1, 3])\n"
+    "array([ False, False,  True], dtype=bool)";
+
+static char const * const any_greater_equal_doc =
+    "Return True if x1 >= x2 for any elements along the last axis, False\n"
+    "otherwise.  Similar to (x1>=x2).any(axis=-1), except the last dimension\n"
+    "of x1 and x2 must be equal and greater than 1.\n"
+    "\n"
+    "Parameters\n"
+    "----------\n"
+    "x1, x2 : array_like\n"
+    "    Input arrays of the same shape.\n"
+    "\n"
+    "Returns\n"
+    "-------\n"
+    "out : ndarray or bool\n"
+    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
+    "\n"
+    "See Also\n"
+    "-------\n"
+    "greater_equal, any, all_greater_equal, any_less\n"
+    "\n"
+    "Examples\n"
+    "-------\n"
+    ">>> np.any_greater_equal(np.arange(3), np.arange(3))\n"
+    "True\n"
+    ">>> np.any_greater_equal([[1, 2], [0, 0], [2, 3]], [1, 3])\n"
+    "array([ True, False,  True], dtype=bool)";
+
+
 /* -------------------------------------------------------------------------- */
 /* function to create and register all gufuncs*/
 
@@ -271,7 +591,7 @@ void InitLogicalGufuncs(PyObject *dictionary,
                       1,              // number of outputs
                       PyUFunc_None,
                       "@NAME@",
-                      "temp docstring",
+                      (char*) @NAME@_doc,
                       0,              // unused
                       "(i),(i)->()");
 

From 094fa03d01194348bf35621fe0d4623ee29c5948 Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Thu, 9 Mar 2017 07:16:21 -0500
Subject: [PATCH 13/26] fix build issue

---
 .gitignore                                 | 1 +
 numpy/core/src/umath/logical_gufuncs.c.src | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 964910c26524..9b32ffe6b5a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -140,6 +140,7 @@ numpy/core/src/umath/simd.inc
 numpy/core/src/umath/struct_ufunc_test.c
 numpy/core/src/umath/test_rational.c
 numpy/core/src/umath/umath_tests.c
+numpy/core/src/umath/logical_gufuncs.c
 numpy/distutils/__config__.py
 numpy/linalg/umath_linalg.c
 doc/source/reference/generated
diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index 1b5ce21d4b20..c4f22a9b2710 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -568,7 +568,8 @@ void InitLogicalGufuncs(PyObject *dictionary,
 * #APPROACH = dispatch,base*11#
 */
 
-    static PyUFuncGenericFunction @NAME@_funcs[] = { 
+{
+    static PyUFuncGenericFunction @NAME@_funcs[] = {
 
 /**begin repeat1
 * #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,
@@ -582,7 +583,6 @@ void InitLogicalGufuncs(PyObject *dictionary,
     }; // close array of funcs
 
 
-
     f = createPyUFunc(@NAME@_funcs,
                       array_of_nulls,
                       types,
@@ -598,6 +598,8 @@ void InitLogicalGufuncs(PyObject *dictionary,
     PyDict_SetItemString(dictionary, "@NAME@", f);
     Py_DECREF(f);
 
+    }; // close bracket surrounding inner repeat
+
 /**end repeat**/
 
 

From 64fc879bb984c1b928c7581be78084cfc17a8605 Mon Sep 17 00:00:00 2001
From: MattHarrigan <harrigan.matthew@gmail.com>
Date: Thu, 9 Mar 2017 07:57:52 -0500
Subject: [PATCH 14/26] incorp PR comment

---
 numpy/core/src/umath/logical_gufuncs.c.src | 41 +++++++++++-----------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index c4f22a9b2710..9e0fb760c73d 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -563,44 +563,45 @@ void InitLogicalGufuncs(PyObject *dictionary,
     PyObject *f;
 
 /**begin repeat
-* #NAME = all_equal,all_not_equal,all_less,all_less_equal,all_greater,all_greater_equal,
-*         any_equal,any_not_equal,any_less,any_less_equal,any_greater,any_greater_equal#
+* #NAME = all_equal,all_not_equal,all_less,all_less_equal,all_greater,
+          all_greater_equal,any_equal,any_not_equal,any_less,any_less_equal,
+          any_greater,any_greater_equal#
 * #APPROACH = dispatch,base*11#
 */
 
-{
-    static PyUFuncGenericFunction @NAME@_funcs[] = {
+    { // open bracket surrounding inner repeat
+
+        static PyUFuncGenericFunction @NAME@_funcs[] = {
 
 /**begin repeat1
 * #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,
 *         double,longdouble,cfloat,cdouble,clongdouble#
 */
 
-        @APPROACH@_@TYPE@_@NAME@,
+            @APPROACH@_@TYPE@_@NAME@,
 
 /**end repeat1**/
 
-    }; // close array of funcs
+        }; // close array of funcs
 
 
-    f = createPyUFunc(@NAME@_funcs,
-                      array_of_nulls,
-                      types,
-                      sizeof(types),  // number of types
-                      2,              // number of inputs
-                      1,              // number of outputs
-                      PyUFunc_None,
-                      "@NAME@",
-                      (char*) @NAME@_doc,
-                      0,              // unused
-                      "(i),(i)->()");
+        f = createPyUFunc(@NAME@_funcs,
+                          array_of_nulls,
+                          types,
+                          sizeof(types),  // number of types
+                          2,              // number of inputs
+                          1,              // number of outputs
+                          PyUFunc_None,
+                          "@NAME@",
+                          (char*) @NAME@_doc,
+                          0,              // unused
+                          "(i),(i)->()");
 
-    PyDict_SetItemString(dictionary, "@NAME@", f);
-    Py_DECREF(f);
+        PyDict_SetItemString(dictionary, "@NAME@", f);
+        Py_DECREF(f);
 
     }; // close bracket surrounding inner repeat
 
 /**end repeat**/
 
-
 }

From 06d00623ea112363f68d02ad9e7381ab53d960f4 Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Mon, 20 Mar 2017 17:31:43 +0100
Subject: [PATCH 15/26] rewrite to vectorize all types

---
 numpy/core/src/umath/logical_gufuncs.c.src | 184 +++++----------------
 1 file changed, 39 insertions(+), 145 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index 9e0fb760c73d..2eff6b3025c9 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -5,6 +5,7 @@
 #include "numpy/ufuncobject.h"
 #include "numpy/npy_3kcompat.h"
 #include "logical_gufuncs.h"
+#include "lowlevel_strided_loops.h"
 
 
 // define the basic real version similar to the macro for complex numbers
@@ -15,43 +16,15 @@
 #define RGT(a,b) (a>b)
 #define RGE(a,b) (a>=b)
 
-/* -------------------------------------------------------------------------- */
-/* define the inner loops  */
-
-#define all_INNER_LOOP(OP, TYPE)                \
-    *((npy_bool *)c_n) = NPY_TRUE;              \
-    for (i = 0; i < I; i++){                    \
-        a = *(TYPE *)a_i;                       \
-        b = *(TYPE *)b_i;                       \
-        if (OP(a,b)){                           \
-            a_i += a_I;                         \
-            b_i += b_I;                         \
-        } else {                                \
-            *((npy_bool *)c_n) = NPY_FALSE;     \
-            break;                              \
-        }                                       \
-    }
-
-#define any_INNER_LOOP(OP, TYPE)                \
-    *((npy_bool *)c_n) = NPY_FALSE;             \
-    for (i = 0; i < I; i++){                    \
-        a = *(TYPE *)a_i;                       \
-        b = *(TYPE *)b_i;                       \
-        if (OP(a,b)){                           \
-            *((npy_bool *)c_n) = NPY_TRUE;      \
-            break;                              \
-        } else {                                \
-            a_i += a_I;                         \
-            b_i += b_I;                         \
-        }                                       \
-    }
-
 /* -------------------------------------------------------------------------- */
 /* create the family of functions using a template  */
 
+#define BLOCK_SIZE 1024
+
 /**begin repeat
-* #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,
-*         double,longdouble,cfloat,cdouble,clongdouble#
+* #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
+*         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
+*         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
 * #OP_PREFIX = R,R,R,R,R,R,R,R,R,R,R,R,R,PyArray_C,PyArray_C,PyArray_C#
 */
 
@@ -61,10 +34,12 @@
 */
 
 /**begin repeat2
-* #ALL_OR_ANY = all,any#
+* #fname = all,any#
+* #ALL_OR_ANY = NPY_TRUE,NPY_FALSE#
+* #INV = ,!#
 */
 
-static void base_@TYPE@_@ALL_OR_ANY@_@OPNAME@
+static void NPY_GCC_OPT_3 base_@TYPE@_@fname@_@OPNAME@
         (char **args, npy_intp *dimensions,
          npy_intp* steps, void* data)
 {
@@ -75,96 +50,41 @@ static void base_@TYPE@_@ALL_OR_ANY@_@OPNAME@
     npy_intp a_I = steps[3], b_I = steps[4];
 
     char *a_i, *b_i;
-    npy_@TYPE@ a, b;
 
     for (n = 0; n < N; n++) {
         a_i = a_n;
         b_i = b_n;
 
-        @ALL_OR_ANY@_INNER_LOOP(@OP_PREFIX@@OP@, npy_@TYPE@)
-
-        a_n += a_N;
-        b_n += b_N;
-        c_n += c_N;
-    }
-}
+        *((npy_bool *)c_n) = @ALL_OR_ANY@;
 
-/**end repeat2**/
-/**end repeat1**/
-/**end repeat**/
-
-
-/* -------------------------------------------------------------------------- */
-/* vectorized versions*/
-
-#define BLOCK_SIZE 1024
-#define ALIGNMENT_SIZE 32
-#define ALIGNED(p) ((unsigned long)p & ALIGNMENT_SIZE)
-
-/*
-*  The core array must be contiguous, ie the core dimension steps must
-*  be equal to the sizeof the type
-*
-*  TODO: all contiguous signed and unsigned integer types could be handled by
-*  a single vectorized all_equal function.  Besides there should be at least 
-*  one simple C function without src templating.
-*/
-/**begin repeat
-* #TYPE = float,double,int#
-**/
-void simd_core_contiguous_@TYPE@_all_equal(
-    char **args, npy_intp *dimensions, npy_intp* steps, void* data)
-{
-    npy_intp i, n, j;
-    npy_intp N = dimensions[0], I = dimensions[1];
-    char *a_n = args[0], *b_n = args[1], *c_n = args[2];
-    npy_intp a_N = steps[0], b_N = steps[1], c_N = steps[2];
-
-    unsigned int true_count;
-    @TYPE@ *x, *y;
-
-    for (n = 0; n < N; n++) {
-        *((npy_bool *)c_n) = NPY_TRUE;
-        x = (@TYPE@ *)a_n;
-        y = (@TYPE@ *)b_n;
-
-        // get x aligned, helps compiler vectorize.  y can use unaligned
-        // loads. do while so at least the first element is checked
-        // before checking an entire chunk
         i = 0;
-        do {
-            if ((*x) == (*y)) {
-                x++;
-                y++;
-                i++;
-            } else {
-                *((npy_bool *)c_n) = NPY_FALSE;
-                i = I;
-            }
-        } while (!ALIGNED(x) & (i<I));
-
-        // main loop in chunks with auto vectorize simd instructions
-        for ( ; i<(I-BLOCK_SIZE) ; i+=BLOCK_SIZE) {
-            true_count = 0;
-            for (j=0 ; j<BLOCK_SIZE ; j++){
-                true_count += ((*x) == (*y));
-                x++;
-                y++;
-            }
-            if (true_count != BLOCK_SIZE) {
-                *((npy_bool *)c_n) = NPY_FALSE;
-                i = I;
-                break;
+        /* main loop in chunks with auto vectorize simd instructions */
+        if (a_I == sizeof(@TYPE@) && b_I == sizeof(@TYPE@)) {
+            for (i=0; i < npy_blocked_end(0, 1, BLOCK_SIZE, I);
+                 i+=BLOCK_SIZE) {
+                unsigned int true_count = 0, j;
+                for (j=0 ; j<BLOCK_SIZE ; j++){
+                    @TYPE@ a = *(@TYPE@ *)a_i;
+                    @TYPE@ b = *(@TYPE@ *)b_i;
+                    true_count += @INV@@OP_PREFIX@@OP@(a, b);
+                    a_i += a_I;
+                    b_i += b_I;
+                }
+                if (true_count != BLOCK_SIZE) {
+                    *((npy_bool *)c_n) = !@ALL_OR_ANY@;
+                    i = I;
+                    break;
+                }
             }
-        };
+        }
 
         // check remaining elements
         for ( ; i<I ; i++) {
-            if ((*x) == (*y)) {
-                x++;
-                y++;
+            if (@INV@@OP_PREFIX@@OP@(*(@TYPE@ *)a_i, *(@TYPE@ *)b_i)) {
+                a_i += a_I;
+                b_i += b_I;
             } else {
-                *((npy_bool *)c_n) = NPY_FALSE;
+                *((npy_bool *)c_n) = !@ALL_OR_ANY@;
                 break;
             } 
         }
@@ -174,37 +94,11 @@ void simd_core_contiguous_@TYPE@_all_equal(
         c_n += c_N;
     }
 }
-/**end repeat**/
 
-/* -------------------------------------------------------------------------- */
-/* dispatchers to select optimal function */
-
-/**begin repeat
-* #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,
-*         double,longdouble,cfloat,cdouble,clongdouble#
-* #IS_VECTD = 0*4,1,0*5,1,1,0*4#
-*/
-void dispatch_@TYPE@_all_equal(
-    char **args, npy_intp *dimensions, npy_intp* steps, void* data)
-{
-
-    #if @IS_VECTD@
-        npy_intp I = dimensions[1];
-        npy_intp a_I = steps[3], b_I = steps[4];
-        if ((I > (2*BLOCK_SIZE)) & (a_I == sizeof(@TYPE@)) 
-                & (b_I == sizeof(@TYPE@)))
-        {
-            simd_core_contiguous_@TYPE@_all_equal(args, dimensions, steps, data);
-            return;
-        }
-    #endif
-    
-    base_@TYPE@_all_equal(args, dimensions, steps, data);
-
-}
+/**end repeat2**/
+/**end repeat1**/
 /**end repeat**/
 
-
 /* -------------------------------------------------------------------------- */
 /* Create type arrays for each gufunc, which are all identical*/
 static char types[] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
@@ -566,7 +460,6 @@ void InitLogicalGufuncs(PyObject *dictionary,
 * #NAME = all_equal,all_not_equal,all_less,all_less_equal,all_greater,
           all_greater_equal,any_equal,any_not_equal,any_less,any_less_equal,
           any_greater,any_greater_equal#
-* #APPROACH = dispatch,base*11#
 */
 
     { // open bracket surrounding inner repeat
@@ -574,11 +467,12 @@ void InitLogicalGufuncs(PyObject *dictionary,
         static PyUFuncGenericFunction @NAME@_funcs[] = {
 
 /**begin repeat1
-* #TYPE = byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,
-*         double,longdouble,cfloat,cdouble,clongdouble#
+* #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
+*         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
+*         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
 */
 
-            @APPROACH@_@TYPE@_@NAME@,
+            base_@TYPE@_@NAME@,
 
 /**end repeat1**/
 

From 2dc6492147145e81b5d12fd5eca7b103e42228b5 Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Mon, 20 Mar 2017 17:49:55 +0100
Subject: [PATCH 16/26] add avx support

---
 numpy/core/src/umath/logical_gufuncs.c.src | 35 ++++++++++++++++++++--
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index 2eff6b3025c9..7b126c349688 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -39,7 +39,15 @@
 * #INV = ,!#
 */
 
-static void NPY_GCC_OPT_3 base_@TYPE@_@fname@_@OPNAME@
+/**begin repeat3
+ * #isa = , _avx#
+ * #ISA = , AVX#
+ * #CHK = 1, HAVE_ATTRIBUTE_TARGET_AVX#
+ * #ATTR = , NPY_GCC_TARGET_AVX#
+ */
+
+#if @CHK@
+static NPY_GCC_OPT_3 @ATTR@ void base_@TYPE@_@fname@_@OPNAME@@isa@
         (char **args, npy_intp *dimensions,
          npy_intp* steps, void* data)
 {
@@ -94,7 +102,9 @@ static void NPY_GCC_OPT_3 base_@TYPE@_@fname@_@OPNAME@
         c_n += c_N;
     }
 }
+#endif
 
+/**end repeat3**/
 /**end repeat2**/
 /**end repeat1**/
 /**end repeat**/
@@ -464,7 +474,7 @@ void InitLogicalGufuncs(PyObject *dictionary,
 
     { // open bracket surrounding inner repeat
 
-        static PyUFuncGenericFunction @NAME@_funcs[] = {
+        static PyUFuncGenericFunction @NAME@_funcs_base[] = {
 
 /**begin repeat1
 * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
@@ -477,9 +487,28 @@ void InitLogicalGufuncs(PyObject *dictionary,
 /**end repeat1**/
 
         }; // close array of funcs
+        static PyUFuncGenericFunction @NAME@_funcs_avx[] = {
 
+/**begin repeat1
+* #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
+*         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
+*         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
+*/
+
+            base_@TYPE@_@NAME@_avx,
 
-        f = createPyUFunc(@NAME@_funcs,
+/**end repeat1**/
+
+        }; // close array of funcs
+
+        PyUFuncGenericFunction * funcs = @NAME@_funcs_base;
+
+#ifdef HAVE_ATTRIBUTE_TARGET_AVX
+        if (NPY_CPU_SUPPORTS_AVX) {
+            funcs = @NAME@_funcs_avx;
+        }
+#endif
+        f = createPyUFunc(funcs,
                           array_of_nulls,
                           types,
                           sizeof(types),  // number of types

From 066c46f36d05615311f9c1c0ccc008e07d316c47 Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Wed, 22 Mar 2017 13:58:15 +0100
Subject: [PATCH 17/26] cleanup

---
 numpy/core/src/umath/logical_gufuncs.c.src | 462 +++++----------------
 numpy/core/src/umath/logical_gufuncs.h     |  29 +-
 numpy/core/tests/test_logical_gufuncs.py   |  13 +-
 3 files changed, 126 insertions(+), 378 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index 7b126c349688..b52a1e3b8922 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -1,43 +1,43 @@
+#define _UMATHMODULE
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
-#include "Python.h"
-#include "math.h"
+#define NO_IMPORT_ARRAY
+#include "npy_config.h"
 #include "numpy/ndarraytypes.h"
 #include "numpy/ufuncobject.h"
-#include "numpy/npy_3kcompat.h"
+#include "ufunc_type_resolution.h"
 #include "logical_gufuncs.h"
 #include "lowlevel_strided_loops.h"
 
 
-// define the basic real version similar to the macro for complex numbers
-#define REQ(a,b) (a==b)
-#define RNE(a,b) (a!=b)
-#define RLT(a,b) (a<b)
-#define RLE(a,b) (a<=b)
-#define RGT(a,b) (a>b)
-#define RGE(a,b) (a>=b)
+/* define the basic real version similar to the macro for complex numbers */
+#define REQ(a,b) ((a) == (b))
+#define RNE(a,b) ((a) != (b))
+#define RLT(a,b) ((a) < (b))
+#define RLE(a,b) ((a) <= (b))
+#define RGT(a,b) ((a) > (b))
+#define RGE(a,b) ((a) >= (b))
 
-/* -------------------------------------------------------------------------- */
 /* create the family of functions using a template  */
 
-#define BLOCK_SIZE 1024
+#define BLOCK_SIZE 32
 
 /**begin repeat
-* #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
-*         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
-*         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
-* #OP_PREFIX = R,R,R,R,R,R,R,R,R,R,R,R,R,PyArray_C,PyArray_C,PyArray_C#
-*/
+ * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
+ *         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
+ *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
+ * #OP_PREFIX = R,R,R,R,R,R,R,R,R,R,R,R,R,PyArray_C,PyArray_C,PyArray_C#
+ */
 
 /**begin repeat1
-* #OP = EQ,NE,LT,LE,GT,GE#
-* #OPNAME = equal,not_equal,less,less_equal,greater,greater_equal#
-*/
+ * #OP = EQ,NE,LT,LE,GT,GE#
+ * #OPNAME = equal,not_equal,less,less_equal,greater,greater_equal#
+ */
 
 /**begin repeat2
-* #fname = all,any#
-* #ALL_OR_ANY = NPY_TRUE,NPY_FALSE#
-* #INV = ,!#
-*/
+ * #fname = all,any#
+ * #ALL_OR_ANY = NPY_TRUE,NPY_FALSE#
+ * #INV = ,!#
+ */
 
 /**begin repeat3
  * #isa = , _avx#
@@ -47,28 +47,27 @@
  */
 
 #if @CHK@
-static NPY_GCC_OPT_3 @ATTR@ void base_@TYPE@_@fname@_@OPNAME@@isa@
-        (char **args, npy_intp *dimensions,
-         npy_intp* steps, void* data)
+static NPY_GCC_OPT_3 @ATTR@ void
+@TYPE@_@fname@_@OPNAME@@isa@(char **args, npy_intp *dimensions,
+                             npy_intp* steps, void* data)
 {
-    npy_intp i, n;
-    npy_intp N = dimensions[0], I = dimensions[1];
+    npy_intp n;
+    npy_intp N = dimensions[0], nI = dimensions[1];
     char *a_n = args[0], *b_n = args[1], *c_n = args[2];
     npy_intp a_N = steps[0], b_N = steps[1], c_N = steps[2];
     npy_intp a_I = steps[3], b_I = steps[4];
 
-    char *a_i, *b_i;
-
     for (n = 0; n < N; n++) {
-        a_i = a_n;
-        b_i = b_n;
+        npy_intp i;
+        char * a_i = a_n;
+        char * b_i = b_n;
 
         *((npy_bool *)c_n) = @ALL_OR_ANY@;
 
         i = 0;
         /* main loop in chunks with auto vectorize simd instructions */
         if (a_I == sizeof(@TYPE@) && b_I == sizeof(@TYPE@)) {
-            for (i=0; i < npy_blocked_end(0, 1, BLOCK_SIZE, I);
+            for (i=0; i < npy_blocked_end(0, 1, BLOCK_SIZE, nI);
                  i+=BLOCK_SIZE) {
                 unsigned int true_count = 0, j;
                 for (j=0 ; j<BLOCK_SIZE ; j++){
@@ -80,21 +79,21 @@ static NPY_GCC_OPT_3 @ATTR@ void base_@TYPE@_@fname@_@OPNAME@@isa@
                 }
                 if (true_count != BLOCK_SIZE) {
                     *((npy_bool *)c_n) = !@ALL_OR_ANY@;
-                    i = I;
+                    i = nI;
                     break;
                 }
             }
         }
 
-        // check remaining elements
-        for ( ; i<I ; i++) {
+        /* check remaining elements */
+        for ( ; i < nI; i++) {
             if (@INV@@OP_PREFIX@@OP@(*(@TYPE@ *)a_i, *(@TYPE@ *)b_i)) {
                 a_i += a_I;
                 b_i += b_I;
             } else {
                 *((npy_bool *)c_n) = !@ALL_OR_ANY@;
                 break;
-            } 
+            }
         }
 
         a_n += a_N;
@@ -109,8 +108,7 @@ static NPY_GCC_OPT_3 @ATTR@ void base_@TYPE@_@fname@_@OPNAME@@isa@
 /**end repeat1**/
 /**end repeat**/
 
-/* -------------------------------------------------------------------------- */
-/* Create type arrays for each gufunc, which are all identical*/
+/* create type arrays for each gufunc, which are all identical */
 static char types[] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
                        NPY_UBYTE, NPY_UBYTE, NPY_BOOL,
                        NPY_SHORT, NPY_SHORT, NPY_BOOL,
@@ -132,8 +130,7 @@ static char types[] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
                        NPY_CLONGDOUBLE, NPY_CLONGDOUBLE, NPY_BOOL};
 
 
-/* -------------------------------------------------------------------------- */
-/* create array of nulls for "data" for each gufunc type*/
+/* create array of nulls for "data" for each gufunc type */
 
 static void *array_of_nulls[] = {
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
@@ -143,298 +140,34 @@ static void *array_of_nulls[] = {
 };
 
 
-/* -------------------------------------------------------------------------- */
-/* define docstrings*/
-
-static char const * const all_equal_doc =
-    "Return True if x1 == x2 for all elements along the last axis, False\n"
-    "otherwise.  Similar to (x1==x2).all(axis=-1), except the last dimension\n"
-    "of x1 and x2 must be equal and greater than 1.\n"
-    "\n"
-    "Parameters\n"
-    "----------\n"
-    "x1, x2 : array_like\n"
-    "    Input arrays of the same shape.\n"
-    "\n"
-    "Returns\n"
-    "-------\n"
-    "out : ndarray or bool\n"
-    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
-    "\n"
-    "See Also\n"
-    "-------\n"
-    "equal, all, any_equal, all_not_equal\n"
-    "\n"
-    "Examples\n"
-    "-------\n"
-    ">>> np.all_equal(np.arange(3), np.arange(3))\n"
-    "True\n"
-    ">>> np.all_equal([[1, 2], [0, 0], [1, 2]], [1, 2])\n"
-    "array([ True, False,  True], dtype=bool)";
-
-static char const * const all_not_equal_doc =
-    "Return True if x1 != x2 for all elements along the last axis, False\n"
-    "otherwise.  Similar to (x1!=x2).all(axis=-1), except the last dimension\n"
-    "of x1 and x2 must be equal and greater than 1.\n"
-    "\n"
-    "Parameters\n"
-    "----------\n"
-    "x1, x2 : array_like\n"
-    "    Input arrays of the same shape.\n"
-    "\n"
-    "Returns\n"
-    "-------\n"
-    "out : ndarray or bool\n"
-    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
-    "\n"
-    "See Also\n"
-    "-------\n"
-    "not_equal, all, any_not_equal, all_equal\n"
-    "\n"
-    "Examples\n"
-    "-------\n"
-    ">>> np.all_not_equal(np.arange(3), np.arange(3))\n"
-    "False\n"
-    ">>> np.all_not_equal([[1, 2], [0, 0], [1, 2]], [1, 2])\n"
-    "array([ False, True,  False], dtype=bool)";
-
-static char const * const all_less_doc =
-    "Return True if x1 < x2 for all elements along the last axis, False\n"
-    "otherwise.  Similar to (x1<x2).all(axis=-1), except the last dimension\n"
-    "of x1 and x2 must be equal and greater than 1.\n"
-    "\n"
-    "Parameters\n"
-    "----------\n"
-    "x1, x2 : array_like\n"
-    "    Input arrays of the same shape.\n"
-    "\n"
-    "Returns\n"
-    "-------\n"
-    "out : ndarray or bool\n"
-    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
-    "\n"
-    "See Also\n"
-    "-------\n"
-    "less, all, any_less, all_greater\n"
-    "\n"
-    "Examples\n"
-    "-------\n"
-    ">>> np.all_less(np.arange(3), np.arange(3))\n"
-    "False\n"
-    ">>> np.all_less([[1, 2], [0, 0], [1, 2]], [1, 2])\n"
-    "array([ False, True,  False], dtype=bool)";
-
-static char const * const all_less_equal_doc =
-    "Return True if x1 <= x2 for all elements along the last axis, False\n"
-    "otherwise.  Similar to (x1<=x2).all(axis=-1), except the last dimension\n"
-    "of x1 and x2 must be equal and greater than 1.\n"
-    "\n"
-    "Parameters\n"
-    "----------\n"
-    "x1, x2 : array_like\n"
-    "    Input arrays of the same shape.\n"
-    "\n"
-    "Returns\n"
-    "-------\n"
-    "out : ndarray or bool\n"
-    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
-    "\n"
-    "See Also\n"
-    "-------\n"
-    "less_equal, all, any_less_equal, all_greater_equal\n"
-    "\n"
-    "Examples\n"
-    "-------\n"
-    ">>> np.all_less_equal(np.arange(3), np.arange(3))\n"
-    "True\n"
-    ">>> np.all_less_equal([[1, 2], [0, 0], [1, 3]], [1, 2])\n"
-    "array([ True, True,  False], dtype=bool)";
-
-static char const * const all_greater_doc =
-    "Return True if x1 > x2 for all elements along the last axis, False\n"
-    "otherwise.  Similar to (x1>x2).all(axis=-1), except the last dimension\n"
-    "of x1 and x2 must be equal and greater than 1.\n"
-    "\n"
-    "Parameters\n"
-    "----------\n"
-    "x1, x2 : array_like\n"
-    "    Input arrays of the same shape.\n"
-    "\n"
-    "Returns\n"
-    "-------\n"
-    "out : ndarray or bool\n"
-    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
-    "\n"
-    "See Also\n"
-    "-------\n"
-    "greater, all, any_less_equal, all_less_equal\n"
-    "\n"
-    "Examples\n"
-    "-------\n"
-    ">>> np.all_greater(np.arange(3), np.arange(3))\n"
-    "False\n"
-    ">>> np.all_greater([[1, 2], [0, 0], [1, 3]], [1, 2])\n"
-    "array([ False, False,  False], dtype=bool)";
-
-static char const * const all_greater_equal_doc =
-    "Return True if x1 >= x2 for all elements along the last axis, False\n"
-    "otherwise.  Similar to (x1<=x2).all(axis=-1), except the last dimension\n"
-    "of x1 and x2 must be equal and greater than 1.\n"
-    "\n"
-    "Parameters\n"
-    "----------\n"
-    "x1, x2 : array_like\n"
-    "    Input arrays of the same shape.\n"
-    "\n"
-    "Returns\n"
-    "-------\n"
-    "out : ndarray or bool\n"
-    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
-    "\n"
-    "See Also\n"
-    "-------\n"
-    "greater_equal, all, any_greater, all_less\n"
-    "\n"
-    "Examples\n"
-    "-------\n"
-    ">>> np.all_greater_equal(np.arange(3), np.arange(3))\n"
-    "True\n"
-    ">>> np.all_greater_equal([[1, 2], [0, 0], [2, 3]], [1, 2])\n"
-    "array([ True, False,  True], dtype=bool)";
-
-static char const * const any_equal_doc =
-    "Return True if x1 == x2 for any elements along the last axis, False\n"
-    "otherwise.  Similar to (x1==x2).any(axis=-1), except the last dimension\n"
-    "of x1 and x2 must be equal and greater than 1.\n"
-    "\n"
-    "Parameters\n"
-    "----------\n"
-    "x1, x2 : array_like\n"
-    "    Input arrays of the same shape.\n"
-    "\n"
-    "Returns\n"
-    "-------\n"
-    "out : ndarray or bool\n"
-    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
-    "\n"
-    "See Also\n"
-    "-------\n"
-    "equal, any, all_equal, any_not_equal\n"
-    "\n"
-    "Examples\n"
-    "-------\n"
-    ">>> np.any_equal(np.arange(3), np.arange(3))\n"
-    "True\n"
-    ">>> np.any_equal([[1, 2], [0, 0], [2, 3]], [1, 3])\n"
-    "array([ True, False,  True], dtype=bool)";
-
-static char const * const any_not_equal_doc =
-    "Return True if x1 != x2 for any elements along the last axis, False\n"
-    "otherwise.  Similar to (x1!=x2).any(axis=-1), except the last dimension\n"
-    "of x1 and x2 must be equal and greater than 1.\n"
-    "\n"
-    "Parameters\n"
-    "----------\n"
-    "x1, x2 : array_like\n"
-    "    Input arrays of the same shape.\n"
-    "\n"
-    "Returns\n"
-    "-------\n"
-    "out : ndarray or bool\n"
-    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
-    "\n"
-    "See Also\n"
-    "-------\n"
-    "not_equal, any, all_not_equal, any_equal\n"
-    "\n"
-    "Examples\n"
-    "-------\n"
-    ">>> np.any_not_equal(np.arange(3), np.arange(3))\n"
-    "False\n"
-    ">>> np.any_not_equal([[1, 2], [0, 0], [2, 3]], [1, 3])\n"
-    "array([ True, True,  True], dtype=bool)";
+/* define docstrings */
 
-static char const * const any_less_doc =
-    "Return True if x1 < x2 for any elements along the last axis, False\n"
-    "otherwise.  Similar to (x1<x2).any(axis=-1), except the last dimension\n"
-    "of x1 and x2 must be equal and greater than 1.\n"
-    "\n"
-    "Parameters\n"
-    "----------\n"
-    "x1, x2 : array_like\n"
-    "    Input arrays of the same shape.\n"
-    "\n"
-    "Returns\n"
-    "-------\n"
-    "out : ndarray or bool\n"
-    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
-    "\n"
-    "See Also\n"
-    "-------\n"
-    "less, any, all_less, any_greater_equal\n"
-    "\n"
-    "Examples\n"
-    "-------\n"
-    ">>> np.any_less(np.arange(3), np.arange(3))\n"
-    "False\n"
-    ">>> np.any_less([[1, 2], [0, 0], [2, 3]], [1, 3])\n"
-    "array([ True, True,  False], dtype=bool)";
+/**begin repeat
+* #name = equal,not_equal,less,less_equal,greater,greater_equal,
+*         equal,not_equal,less,less_equal,greater,greater_equal#
+* #prefix = all*6, any*6#
+* #op = ==,!=,<,<=,>,>=,==,!=,<,<=,>,>=#
+* #res1 = True,False,False,True,False,True,
+*         True,False,False,True,False,True#
+* #res2 = array([ True False  True]),
+*         array([False  True False]),
+*         array([False  True False]),
+*         array([ True  True  True]),
+*         array([False False False]),
+*         array([ True False  True]),
+*         array([ True False  True]),
+*         array([False  True False]),
+*         array([False  True False]),
+*         array([ True  True  True]),
+*         array([False False False]),
+*         array([ True False  True])#
+*/
 
-static char const * const any_less_equal_doc =
-    "Return True if x1 <= x2 for any elements along the last axis, False\n"
-    "otherwise.  Similar to (x1<=x2).any(axis=-1), except the last dimension\n"
-    "of x1 and x2 must be equal and greater than 1.\n"
-    "\n"
-    "Parameters\n"
-    "----------\n"
-    "x1, x2 : array_like\n"
-    "    Input arrays of the same shape.\n"
-    "\n"
-    "Returns\n"
-    "-------\n"
-    "out : ndarray or bool\n"
-    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
-    "\n"
-    "See Also\n"
-    "-------\n"
-    "less_equal, any, all_less_equal, any_greater\n"
-    "\n"
-    "Examples\n"
-    "-------\n"
-    ">>> np.any_less_equal(np.arange(3), np.arange(3))\n"
-    "True\n"
-    ">>> np.any_less_equal([[1, 2], [0, 0], [2, 3]], [1, 3])\n"
-    "array([ True, True,  True], dtype=bool)";
-
-static char const * const any_greater_doc =
-    "Return True if x1 > x2 for any elements along the last axis, False\n"
-    "otherwise.  Similar to (x1>x2).any(axis=-1), except the last dimension\n"
-    "of x1 and x2 must be equal and greater than 1.\n"
-    "\n"
-    "Parameters\n"
-    "----------\n"
-    "x1, x2 : array_like\n"
-    "    Input arrays of the same shape.\n"
-    "\n"
-    "Returns\n"
-    "-------\n"
-    "out : ndarray or bool\n"
-    "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
-    "\n"
-    "See Also\n"
-    "-------\n"
-    "greater, any, all_greater, any_less_equal\n"
-    "\n"
-    "Examples\n"
-    "-------\n"
-    ">>> np.any_greater(np.arange(3), np.arange(3))\n"
+static char const * const @prefix@_@name@_doc =
+    "Return True if x1 @op@ x2 for @prefix@ elements along the last axis, "
     "False\n"
-    ">>> np.any_greater([[1, 2], [0, 0], [2, 3]], [1, 3])\n"
-    "array([ False, False,  True], dtype=bool)";
-
-static char const * const any_greater_equal_doc =
-    "Return True if x1 >= x2 for any elements along the last axis, False\n"
-    "otherwise.  Similar to (x1>=x2).any(axis=-1), except the last dimension\n"
+    "otherwise.  Similar to (x1 @op@ x2).@prefix@(axis=-1), except the last "
+    "dimension\n"
     "of x1 and x2 must be equal and greater than 1.\n"
     "\n"
     "Parameters\n"
@@ -447,59 +180,57 @@ static char const * const any_greater_equal_doc =
     "out : ndarray or bool\n"
     "   Output array of bools, or a single bool if x1 and x2 are 1D.\n"
     "\n"
-    "See Also\n"
-    "-------\n"
-    "greater_equal, any, all_greater_equal, any_less\n"
     "\n"
     "Examples\n"
     "-------\n"
-    ">>> np.any_greater_equal(np.arange(3), np.arange(3))\n"
-    "True\n"
-    ">>> np.any_greater_equal([[1, 2], [0, 0], [2, 3]], [1, 3])\n"
-    "array([ True, False,  True], dtype=bool)";
+    ">>> np.@prefix@_@name@(np.arange(3), np.arange(3))\n"
+    "@res1@\n"
+    ">>> np.@prefix@_@name@([[1, 2], [0, 0], [1, 2]], [1, 2])\n"
+    "@res2@";
 
+/**end repeat**/
 
-/* -------------------------------------------------------------------------- */
-/* function to create and register all gufuncs*/
+/* function to create and register all gufuncs */
 
-void InitLogicalGufuncs(PyObject *dictionary, 
-                        PyUFunc_FromFuncAndDataAndSignature_t createPyUFunc) {
+void InitLogicalGufuncs(PyObject *dictionary,
+                        PyUFunc_FromFuncAndDataAndSignature_t createPyUFunc)
+{
     PyObject *f;
 
 /**begin repeat
-* #NAME = all_equal,all_not_equal,all_less,all_less_equal,all_greater,
-          all_greater_equal,any_equal,any_not_equal,any_less,any_less_equal,
-          any_greater,any_greater_equal#
-*/
+ * #NAME = all_equal,all_not_equal,all_less,all_less_equal,all_greater,
+ *         all_greater_equal,any_equal,any_not_equal,any_less,any_less_equal,
+ *         any_greater,any_greater_equal#
+ */
 
-    { // open bracket surrounding inner repeat
+    { /* open bracket surrounding inner repeat */
 
         static PyUFuncGenericFunction @NAME@_funcs_base[] = {
 
 /**begin repeat1
-* #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
-*         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
-*         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
-*/
+ * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
+ *         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
+ *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
+ */
 
-            base_@TYPE@_@NAME@,
+            @TYPE@_@NAME@,
 
 /**end repeat1**/
 
-        }; // close array of funcs
+        };
         static PyUFuncGenericFunction @NAME@_funcs_avx[] = {
 
 /**begin repeat1
-* #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
-*         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
-*         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
-*/
+ * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
+ *         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
+ *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
+ */
 
-            base_@TYPE@_@NAME@_avx,
+            @TYPE@_@NAME@_avx,
 
 /**end repeat1**/
 
-        }; // close array of funcs
+        };
 
         PyUFuncGenericFunction * funcs = @NAME@_funcs_base;
 
@@ -511,20 +242,21 @@ void InitLogicalGufuncs(PyObject *dictionary,
         f = createPyUFunc(funcs,
                           array_of_nulls,
                           types,
-                          sizeof(types),  // number of types
-                          2,              // number of inputs
-                          1,              // number of outputs
+                          sizeof(types) / 3,  /* number of types */
+                          2,                  /* number of inputs */
+                          1,                  /* number of outputs */
                           PyUFunc_None,
                           "@NAME@",
                           (char*) @NAME@_doc,
-                          0,              // unused
+                          0,              /* unused */
                           "(i),(i)->()");
 
+        ((PyUFuncObject *)f)->type_resolver =
+            &PyUFunc_SimpleBinaryComparisonTypeResolver;
         PyDict_SetItemString(dictionary, "@NAME@", f);
         Py_DECREF(f);
 
-    }; // close bracket surrounding inner repeat
+    }; /* close bracket surrounding inner repeat */
 
 /**end repeat**/
-
 }
diff --git a/numpy/core/src/umath/logical_gufuncs.h b/numpy/core/src/umath/logical_gufuncs.h
index ab3aa429dc02..e51d918f9335 100644
--- a/numpy/core/src/umath/logical_gufuncs.h
+++ b/numpy/core/src/umath/logical_gufuncs.h
@@ -1,13 +1,18 @@
-typedef PyObject* (*PyUFunc_FromFuncAndDataAndSignature_t)(PyUFuncGenericFunction*,
-                                                           void**,
-                                                           char*,
-                                                           int,
-                                                           int,
-                                                           int,
-                                                           int,
-                                                           const char*,
-                                                           const char*,
-                                                           int,
-                                                           const char*);
+#ifndef _NPY_LOGICAL_GUFUNCS_H_
+#define _NPY_LOGICAL_GUFUNCS_H_
+typedef PyObject*
+(*PyUFunc_FromFuncAndDataAndSignature_t)(PyUFuncGenericFunction*,
+                                         void**,
+                                         char*,
+                                         int,
+                                         int,
+                                         int,
+                                         int,
+                                         const char*,
+                                         const char*,
+                                         int,
+                                         const char*);
 
-void InitLogicalGufuncs(PyObject *dictionary, PyUFunc_FromFuncAndDataAndSignature_t createPyUFunc);
+void InitLogicalGufuncs(PyObject *dictionary,
+                        PyUFunc_FromFuncAndDataAndSignature_t createPyUFunc);
+#endif
diff --git a/numpy/core/tests/test_logical_gufuncs.py b/numpy/core/tests/test_logical_gufuncs.py
index c5b5a944cc39..42e4aa736c91 100644
--- a/numpy/core/tests/test_logical_gufuncs.py
+++ b/numpy/core/tests/test_logical_gufuncs.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 from numpy.testing import (
-    run_module_suite, assert_equal
+    TestCase, run_module_suite, assert_equal
 )
 
 float_types = [np.float32, np.float64, np.longdouble]
@@ -52,5 +52,16 @@ def test_complex():
                     yield x
 
 
+class TestLogicalGUFuncs(TestCase):
+    def test_structure(self):
+        for op in [np.all_equal, np.all_less, np.all_less_equal,
+                   np.all_greater, np.all_greater_equal,
+                   np.any_equal, np.any_less, np.any_less_equal,
+                   np.any_greater, np.any_greater_equal]:
+            self.assertGreater(len(op.types), 0)
+            self.assertEqual(op.nin, 2)
+            self.assertEqual(op.nout, 1)
+
+
 if __name__ == "__main__":
     run_module_suite()

From e9d2ec1565ba072121d81e5b797a79dfa08eb83f Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Wed, 22 Mar 2017 19:30:36 +0100
Subject: [PATCH 18/26] add datetime loops

---
 numpy/core/src/umath/logical_gufuncs.c.src | 134 ++++++++++++++++++++-
 numpy/core/src/umath/loops.c.src           |   2 +
 numpy/core/tests/test_datetime.py          |  30 +++--
 3 files changed, 151 insertions(+), 15 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index b52a1e3b8922..9af8888e4f71 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -108,6 +108,126 @@ static NPY_GCC_OPT_3 @ATTR@ void
 /**end repeat1**/
 /**end repeat**/
 
+/**begin repeat
+ * #TYPE = npy_datetime, npy_timedelta#
+ */
+
+/**begin repeat1
+ * #NAME = all,any#
+ * #ALL_OR_ANY = NPY_TRUE,NPY_FALSE#
+ * #INV = ,!#
+ */
+
+/**begin repeat2
+ * #OPNAME = equal,less,less_equal,greater,greater_equal#
+ * #OP = ==,<,<=,>,>=#
+ */
+
+static  void
+@TYPE@_@NAME@_@OPNAME@(char **args, npy_intp *dimensions,
+                       npy_intp* steps, void* data)
+{
+    npy_intp n;
+    npy_intp N = dimensions[0], nI = dimensions[1];
+    char *a_n = args[0], *b_n = args[1], *c_n = args[2];
+    npy_intp a_N = steps[0], b_N = steps[1], c_N = steps[2];
+    npy_intp a_I = steps[3], b_I = steps[4];
+
+    for (n = 0; n < N; n++) {
+        npy_intp i;
+        char * a_i = a_n;
+        char * b_i = b_n;
+
+        *((npy_bool *)c_n) = @ALL_OR_ANY@;
+
+        for (i=0; i < nI; i++) {
+            @TYPE@ a = *(@TYPE@ *)a_i;
+            @TYPE@ b = *(@TYPE@ *)b_i;
+            npy_bool res = @INV@(a @OP@ b);
+
+            if ((a == NPY_DATETIME_NAT || b == NPY_DATETIME_NAT) && @INV@res) {
+                NPY_ALLOW_C_API_DEF;
+                NPY_ALLOW_C_API;
+                /* 2016-01-18, 1.11 */
+                if (DEPRECATE_FUTUREWARNING(
+                                "In the future, 'NAT @OP@ x' and 'x @OP@ NAT' "
+                                "will always be False.") < 0) {
+                    NPY_DISABLE_C_API;
+                    return;
+                }
+                NPY_DISABLE_C_API;
+            }
+
+            if (res) {
+                a_i += a_I;
+                b_i += b_I;
+            } else {
+                *((npy_bool *)c_n) = !@ALL_OR_ANY@;
+                break;
+            }
+        }
+
+        a_n += a_N;
+        b_n += b_N;
+        c_n += c_N;
+    }
+}
+
+/**end repeat2**/
+
+static  void
+@TYPE@_@NAME@_not_equal(char **args, npy_intp *dimensions,
+                        npy_intp* steps, void* data)
+{
+    npy_intp n;
+    npy_intp N = dimensions[0], nI = dimensions[1];
+    char *a_n = args[0], *b_n = args[1], *c_n = args[2];
+    npy_intp a_N = steps[0], b_N = steps[1], c_N = steps[2];
+    npy_intp a_I = steps[3], b_I = steps[4];
+
+    for (n = 0; n < N; n++) {
+        npy_intp i;
+        char * a_i = a_n;
+        char * b_i = b_n;
+
+        *((npy_bool *)c_n) = @ALL_OR_ANY@;
+
+        for (i=0; i < nI; i++) {
+            @TYPE@ a = *(@TYPE@ *)a_i;
+            @TYPE@ b = *(@TYPE@ *)b_i;
+            npy_bool res = @INV@(a != b);
+
+            if (a == NPY_DATETIME_NAT && a == NPY_DATETIME_NAT) {
+                NPY_ALLOW_C_API_DEF
+                NPY_ALLOW_C_API;
+                /* 2016-01-18, 1.11 */
+                if (DEPRECATE_FUTUREWARNING(
+                                    "In the future, NAT != NAT will be True "
+                                    "rather than False.") < 0) {
+                    NPY_DISABLE_C_API;
+                    return;
+                }
+                NPY_DISABLE_C_API;
+            }
+
+            if (res) {
+                a_i += a_I;
+                b_i += b_I;
+            } else {
+                *((npy_bool *)c_n) = !@ALL_OR_ANY@;
+                break;
+            }
+        }
+
+        a_n += a_N;
+        b_n += b_N;
+        c_n += c_N;
+    }
+}
+
+/**end repeat1**/
+/**end repeat**/
+
 /* create type arrays for each gufunc, which are all identical */
 static char types[] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
                        NPY_UBYTE, NPY_UBYTE, NPY_BOOL,
@@ -127,7 +247,10 @@ static char types[] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
                        NPY_LONGDOUBLE, NPY_LONGDOUBLE, NPY_BOOL,
                        NPY_CFLOAT, NPY_CFLOAT, NPY_BOOL,
                        NPY_CDOUBLE, NPY_CDOUBLE, NPY_BOOL,
-                       NPY_CLONGDOUBLE, NPY_CLONGDOUBLE, NPY_BOOL};
+                       NPY_CLONGDOUBLE, NPY_CLONGDOUBLE, NPY_BOOL,
+
+                       NPY_DATETIME, NPY_DATETIME, NPY_BOOL,
+                       NPY_TIMEDELTA, NPY_TIMEDELTA, NPY_BOOL};
 
 
 /* create array of nulls for "data" for each gufunc type */
@@ -136,7 +259,8 @@ static void *array_of_nulls[] = {
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
-    (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL
+    (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
+    (void *)NULL, (void *)NULL
 };
 
 
@@ -216,7 +340,8 @@ void InitLogicalGufuncs(PyObject *dictionary,
             @TYPE@_@NAME@,
 
 /**end repeat1**/
-
+            npy_datetime_@NAME@,
+            npy_timedelta_@NAME@
         };
         static PyUFuncGenericFunction @NAME@_funcs_avx[] = {
 
@@ -229,7 +354,8 @@ void InitLogicalGufuncs(PyObject *dictionary,
             @TYPE@_@NAME@_avx,
 
 /**end repeat1**/
-
+            npy_datetime_@NAME@,
+            npy_timedelta_@NAME@
         };
 
         PyUFuncGenericFunction * funcs = @NAME@_funcs_base;
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 107d525fcacb..c364eaec39d8 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1196,6 +1196,7 @@ NPY_NO_EXPORT void
 NPY_NO_EXPORT void
 @TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
 {
+    /* NOTE logical_gufuncs.c.src implements the same code */
     BINARY_LOOP {
         const @type@ in1 = *(@type@ *)ip1;
         const @type@ in2 = *(@type@ *)ip2;
@@ -1221,6 +1222,7 @@ NPY_NO_EXPORT void
 NPY_NO_EXPORT void
 @TYPE@_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
 {
+    /* NOTE logical_gufuncs.c.src implements the same code */
     BINARY_LOOP {
         const @type@ in1 = *(@type@ *)ip1;
         const @type@ in2 = *(@type@ *)ip2;
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index 94391f84c880..5e600f8cecf1 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -1097,24 +1097,32 @@ def test_datetime_compare_nat(self):
         dt_other = np.datetime64('2000-01-01')
         td_nat = np.timedelta64('NaT', 'h')
         td_other = np.timedelta64(1, 'h')
+        dt_nat = np.array([np.datetime64('NaT', 'D')]*2)
+        dt_other = np.array([np.datetime64('2000-01-01')]*2)
+        td_nat = np.array([np.timedelta64('NaT', 'h')]*2)
+        td_other = np.array([np.timedelta64(1, 'h')]*2)
 
         with suppress_warnings() as sup:
             # The assert warns contexts will again see the warning:
             sup.filter(FutureWarning, ".*NAT")
 
             for op in [np.equal, np.less, np.less_equal,
-                       np.greater, np.greater_equal]:
-                if op(dt_nat, dt_nat):
+                       np.greater, np.greater_equal,
+                       np.all_equal, np.all_less, np.all_less_equal,
+                       np.all_greater, np.all_greater_equal,
+                       np.any_equal, np.any_less, np.any_less_equal,
+                       np.any_greater, np.any_greater_equal]:
+                if op(dt_nat, dt_nat).all():
                     assert_warns(FutureWarning, op, dt_nat, dt_nat)
-                if op(dt_nat, dt_other):
+                if op(dt_nat, dt_other).all():
                     assert_warns(FutureWarning, op, dt_nat, dt_other)
-                if op(dt_other, dt_nat):
+                if op(dt_other, dt_nat).all():
                     assert_warns(FutureWarning, op, dt_other, dt_nat)
-                if op(td_nat, td_nat):
+                if op(td_nat, td_nat).all():
                     assert_warns(FutureWarning, op, td_nat, td_nat)
-                if op(td_nat, td_other):
+                if op(td_nat, td_other).all():
                     assert_warns(FutureWarning, op, td_nat, td_other)
-                if op(td_other, td_nat):
+                if op(td_other, td_nat).all():
                     assert_warns(FutureWarning, op, td_other, td_nat)
 
             assert_warns(FutureWarning, np.not_equal, dt_nat, dt_nat)
@@ -1122,10 +1130,10 @@ def test_datetime_compare_nat(self):
 
         with suppress_warnings() as sup:
             sup.record(FutureWarning)
-            assert_(np.not_equal(dt_nat, dt_other))
-            assert_(np.not_equal(dt_other, dt_nat))
-            assert_(np.not_equal(td_nat, td_other))
-            assert_(np.not_equal(td_other, td_nat))
+            assert_(np.not_equal(dt_nat, dt_other).all())
+            assert_(np.not_equal(dt_other, dt_nat).all())
+            assert_(np.not_equal(td_nat, td_other).all())
+            assert_(np.not_equal(td_other, td_nat).all())
             self.assertEqual(len(sup.log), 0)
 
     def test_datetime_minmax(self):

From 88446e170470c9577244c4178d105112f3be4074 Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Thu, 23 Mar 2017 13:05:49 +0100
Subject: [PATCH 19/26] add object loops

---
 numpy/core/src/umath/logical_gufuncs.c.src | 73 +++++++++++++++++++++-
 numpy/core/src/umath/loops.c.src           |  1 +
 numpy/core/tests/test_datetime.py          | 12 ++--
 numpy/core/tests/test_logical_gufuncs.py   | 23 ++++++-
 numpy/core/tests/test_umath.py             | 12 ++++
 5 files changed, 109 insertions(+), 12 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index 9af8888e4f71..487a463f17df 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -228,6 +228,74 @@ static  void
 /**end repeat1**/
 /**end repeat**/
 
+/**begin repeat
+ * #NAME = all,any#
+ * #ALL_OR_ANY = NPY_TRUE,NPY_FALSE#
+ * #INV = ,!#
+ */
+
+/**begin repeat1
+ * #OPNAME = equal, not_equal, greater, greater_equal, less, less_equal#
+ * #OP = EQ, NE, GT, GE, LT, LE#
+ */
+NPY_NO_EXPORT void
+OBJECT_@NAME@_@OPNAME@(char **args, npy_intp *dimensions,
+                       npy_intp *steps, void *NPY_UNUSED(func))
+{
+    npy_intp n;
+    npy_intp N = dimensions[0], nI = dimensions[1];
+    char *a_n = args[0], *b_n = args[1], *c_n = args[2];
+    npy_intp a_N = steps[0], b_N = steps[1], c_N = steps[2];
+    npy_intp a_I = steps[3], b_I = steps[4];
+
+    for (n = 0; n < N; n++) {
+        npy_intp i;
+        char * a_i = a_n;
+        char * b_i = b_n;
+
+        *((npy_bool *)c_n) = @ALL_OR_ANY@;
+
+        for (i=0; i < nI; i++) {
+            int res;
+            PyObject *ret_obj;
+            PyObject * a = *(PyObject **)a_i;
+            PyObject * b = *(PyObject **)b_i;
+
+            a = a ? a : Py_None;
+            b = b ? b : Py_None;
+
+            /*
+             * Do not use RichCompareBool because it includes an identity check
+             * for == and !=. This is wrong for elementwise behaviour, since it
+             * means that NaN can be equal to NaN and an array is equal to
+             * itself.
+             */
+            ret_obj = PyObject_RichCompare(a, b, Py_@OP@);
+            if (ret_obj == NULL) {
+                return;
+            }
+            res = PyObject_IsTrue(ret_obj);
+            Py_DECREF(ret_obj);
+            if (res == -1) {
+                return;
+            }
+
+            if (@INV@res) {
+                a_i += a_I;
+                b_i += b_I;
+            } else {
+                *((npy_bool *)c_n) = !@ALL_OR_ANY@;
+                break;
+            }
+        }
+
+        a_n += a_N;
+        b_n += b_N;
+        c_n += c_N;
+    }
+}
+/**end repeat**/
+
 /* create type arrays for each gufunc, which are all identical */
 static char types[] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
                        NPY_UBYTE, NPY_UBYTE, NPY_BOOL,
@@ -249,6 +317,7 @@ static char types[] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
                        NPY_CDOUBLE, NPY_CDOUBLE, NPY_BOOL,
                        NPY_CLONGDOUBLE, NPY_CLONGDOUBLE, NPY_BOOL,
 
+                       NPY_OBJECT, NPY_OBJECT, NPY_BOOL,
                        NPY_DATETIME, NPY_DATETIME, NPY_BOOL,
                        NPY_TIMEDELTA, NPY_TIMEDELTA, NPY_BOOL};
 
@@ -260,7 +329,7 @@ static void *array_of_nulls[] = {
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
-    (void *)NULL, (void *)NULL
+    (void *)NULL, (void *)NULL, (void *)NULL
 };
 
 
@@ -340,6 +409,7 @@ void InitLogicalGufuncs(PyObject *dictionary,
             @TYPE@_@NAME@,
 
 /**end repeat1**/
+            OBJECT_@NAME@,
             npy_datetime_@NAME@,
             npy_timedelta_@NAME@
         };
@@ -354,6 +424,7 @@ void InitLogicalGufuncs(PyObject *dictionary,
             @TYPE@_@NAME@_avx,
 
 /**end repeat1**/
+            OBJECT_@NAME@,
             npy_datetime_@NAME@,
             npy_timedelta_@NAME@
         };
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index c364eaec39d8..9cc14f56e538 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -2663,6 +2663,7 @@ NPY_NO_EXPORT void
  */
 NPY_NO_EXPORT void
 OBJECT_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) {
+    /* NOTE logical_gufuncs.c.src implements the same code */
     BINARY_LOOP {
         int ret;
         PyObject *ret_obj;
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index 5e600f8cecf1..c74969d907b0 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -1093,14 +1093,10 @@ def test_datetime_compare(self):
         assert_equal(np.greater_equal(a, b), [1, 1, 0, 1, 0])
 
     def test_datetime_compare_nat(self):
-        dt_nat = np.datetime64('NaT', 'D')
-        dt_other = np.datetime64('2000-01-01')
-        td_nat = np.timedelta64('NaT', 'h')
-        td_other = np.timedelta64(1, 'h')
-        dt_nat = np.array([np.datetime64('NaT', 'D')]*2)
-        dt_other = np.array([np.datetime64('2000-01-01')]*2)
-        td_nat = np.array([np.timedelta64('NaT', 'h')]*2)
-        td_other = np.array([np.timedelta64(1, 'h')]*2)
+        dt_nat = np.array([np.datetime64('NaT', 'D')])
+        dt_other = np.array([np.datetime64('2000-01-01')])
+        td_nat = np.array([np.timedelta64('NaT', 'h')])
+        td_other = np.array([np.timedelta64(1, 'h')])
 
         with suppress_warnings() as sup:
             # The assert warns contexts will again see the warning:
diff --git a/numpy/core/tests/test_logical_gufuncs.py b/numpy/core/tests/test_logical_gufuncs.py
index 42e4aa736c91..09db10a2cdf5 100644
--- a/numpy/core/tests/test_logical_gufuncs.py
+++ b/numpy/core/tests/test_logical_gufuncs.py
@@ -7,8 +7,9 @@
 
 float_types = [np.float32, np.float64, np.longdouble]
 complex_types = [np.cfloat, np.cdouble, np.clongdouble]
-int_types = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32, np.int64, 
-          np.uint64, np.longlong, np.ulonglong]
+int_types = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32,
+             np.int64, np.uint64, np.longlong, np.ulonglong]
+datetime = ['M8[s]', 'm8[h]']
 
 # helper functions
 def check(f, x1, x2, expected):
@@ -34,7 +35,8 @@ def test_real():
     inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
     for i in range(inputs.shape[0]):
         for j in range(inputs.shape[0]):
-            for dtype in int_types + float_types + complex_types:
+            for dtype in (int_types + float_types + complex_types +
+                          [object] + datetime):
                 x1 = inputs[i, :].astype(dtype)
                 x2 = inputs[j, :].astype(dtype)
                 for x in check_all(x1, x2):
@@ -52,6 +54,21 @@ def test_complex():
                     yield x
 
 
+def test_simd():
+    for dtype in [np.float32, np.float64, np.int32]:
+        x1 = np.arange(4000, dtype=dtype)
+        x2 = x1.copy()
+        yield check, np.all_equal, x1, x2, (x1==x2).all()
+        x2[-1] = -1
+        yield check, np.all_equal, x1, x2, (x1==x2).all()
+        x2 = x1.copy()
+        x2[500] = -2
+        yield check, np.all_equal, x1, x2, (x1==x2).all()
+        x2 = x1.copy()
+        x2[0] = -3
+        yield check, np.all_equal, x1, x2, (x1==x2).all()
+
+
 class TestLogicalGUFuncs(TestCase):
     def test_structure(self):
         for op in [np.all_equal, np.all_less, np.all_less_equal,
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 965166934bb9..1e219f9608bf 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -179,6 +179,8 @@ def test_ignore_object_identity_in_equal(self):
         # is not a simple boolean, e.g., arrays that are compared elementwise.
         a = np.array([np.array([1, 2, 3]), None], dtype=object)
         assert_raises(ValueError, np.equal, a, a)
+        assert_raises(ValueError, np.all_equal, a, a)
+        assert_raises(ValueError, np.any_equal, a, a)
 
         # Check error raised when comparing identical non-comparable objects.
         class FunkyType(object):
@@ -187,16 +189,22 @@ def __eq__(self, other):
 
         a = np.array([FunkyType()])
         assert_raises(TypeError, np.equal, a, a)
+        assert_raises(TypeError, np.all_equal, a, a)
+        assert_raises(TypeError, np.any_equal, a, a)
 
         # Check identity doesn't override comparison mismatch.
         a = np.array([np.nan], dtype=object)
         assert_equal(np.equal(a, a), [False])
+        assert_equal(np.all_equal(a, a), False)
+        assert_equal(np.any_equal(a, a), False)
 
     def test_ignore_object_identity_in_not_equal(self):
         # Check error raised when comparing identical objects whose comparison
         # is not a simple boolean, e.g., arrays that are compared elementwise.
         a = np.array([np.array([1, 2, 3]), None], dtype=object)
         assert_raises(ValueError, np.not_equal, a, a)
+        assert_raises(ValueError, np.all_not_equal, a, a)
+        assert_raises(ValueError, np.any_not_equal, a, a)
 
         # Check error raised when comparing identical non-comparable objects.
         class FunkyType(object):
@@ -205,10 +213,14 @@ def __ne__(self, other):
 
         a = np.array([FunkyType()])
         assert_raises(TypeError, np.not_equal, a, a)
+        assert_raises(TypeError, np.all_not_equal, a, a)
+        assert_raises(TypeError, np.any_not_equal, a, a)
 
         # Check identity doesn't override comparison mismatch.
         a = np.array([np.nan], dtype=object)
         assert_equal(np.not_equal(a, a), [True])
+        assert_equal(np.all_not_equal(a, a), True)
+        assert_equal(np.any_not_equal(a, a), True)
 
 
 class TestDivision(TestCase):

From d86fb1eb99dced10d3bbf46843c9f751cd0686c5 Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Thu, 23 Mar 2017 13:11:56 +0100
Subject: [PATCH 20/26] add bool loops

---
 numpy/core/src/umath/logical_gufuncs.c.src | 7 +++++--
 numpy/core/tests/test_logical_gufuncs.py   | 4 ++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index 487a463f17df..f953e004513b 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -297,7 +297,8 @@ OBJECT_@NAME@_@OPNAME@(char **args, npy_intp *dimensions,
 /**end repeat**/
 
 /* create type arrays for each gufunc, which are all identical */
-static char types[] = {NPY_BYTE, NPY_BYTE, NPY_BOOL,
+static char types[] = {NPY_BOOL, NPY_BOOL, NPY_BOOL,
+                       NPY_BYTE, NPY_BYTE, NPY_BOOL,
                        NPY_UBYTE, NPY_UBYTE, NPY_BOOL,
                        NPY_SHORT, NPY_SHORT, NPY_BOOL,
                        NPY_USHORT, NPY_USHORT, NPY_BOOL,
@@ -329,7 +330,7 @@ static void *array_of_nulls[] = {
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
-    (void *)NULL, (void *)NULL, (void *)NULL
+    (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL
 };
 
 
@@ -400,6 +401,7 @@ void InitLogicalGufuncs(PyObject *dictionary,
 
         static PyUFuncGenericFunction @NAME@_funcs_base[] = {
 
+            npy_ubyte_@NAME@, /* equal to npy_bool */
 /**begin repeat1
  * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
  *         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
@@ -415,6 +417,7 @@ void InitLogicalGufuncs(PyObject *dictionary,
         };
         static PyUFuncGenericFunction @NAME@_funcs_avx[] = {
 
+            npy_ubyte_@NAME@, /* equal to npy_bool */
 /**begin repeat1
  * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
  *         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
diff --git a/numpy/core/tests/test_logical_gufuncs.py b/numpy/core/tests/test_logical_gufuncs.py
index 09db10a2cdf5..284db8f442de 100644
--- a/numpy/core/tests/test_logical_gufuncs.py
+++ b/numpy/core/tests/test_logical_gufuncs.py
@@ -7,8 +7,8 @@
 
 float_types = [np.float32, np.float64, np.longdouble]
 complex_types = [np.cfloat, np.cdouble, np.clongdouble]
-int_types = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32,
-             np.int64, np.uint64, np.longlong, np.ulonglong]
+int_types = [np.bool, np.int8, np.uint8, np.int16, np.uint16, np.int32,
+             np.uint32, np.int64, np.uint64, np.longlong, np.ulonglong]
 datetime = ['M8[s]', 'm8[h]']
 
 # helper functions

From fefce3d18a92ffa3ce1f5fd4d2103dde03220b56 Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Thu, 23 Mar 2017 13:22:41 +0100
Subject: [PATCH 21/26] add half loops

---
 numpy/core/src/umath/logical_gufuncs.c.src | 24 +++++++++++++++++-----
 numpy/core/tests/test_logical_gufuncs.py   |  4 ++--
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index f953e004513b..5335a962c6d4 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -4,6 +4,7 @@
 #include "npy_config.h"
 #include "numpy/ndarraytypes.h"
 #include "numpy/ufuncobject.h"
+#include "numpy/halffloat.h"
 #include "ufunc_type_resolution.h"
 #include "logical_gufuncs.h"
 #include "lowlevel_strided_loops.h"
@@ -17,15 +18,22 @@
 #define RGT(a,b) ((a) > (b))
 #define RGE(a,b) ((a) >= (b))
 
+#define HEQ(a,b) (npy_half_eq(a, b))
+#define HNE(a,b) (npy_half_ne(a, b))
+#define HLT(a,b) (npy_half_lt(a, b))
+#define HLE(a,b) (npy_half_le(a, b))
+#define HGT(a,b) (npy_half_gt(a, b))
+#define HGE(a,b) (npy_half_ge(a, b))
+
 /* create the family of functions using a template  */
 
 #define BLOCK_SIZE 32
 
 /**begin repeat
  * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
- *         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
+ *         npy_ulong,npy_longlong,npy_ulonglong,npy_half,npy_float,npy_double,
  *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
- * #OP_PREFIX = R,R,R,R,R,R,R,R,R,R,R,R,R,PyArray_C,PyArray_C,PyArray_C#
+ * #OP_PREFIX = R,R,R,R,R,R,R,R,R,R,H,R,R,R,PyArray_C,PyArray_C,PyArray_C#
  */
 
 /**begin repeat1
@@ -310,6 +318,7 @@ static char types[] = {NPY_BOOL, NPY_BOOL, NPY_BOOL,
 
                        NPY_LONGLONG, NPY_LONGLONG, NPY_BOOL,
                        NPY_ULONGLONG, NPY_ULONGLONG, NPY_BOOL,
+                       NPY_HALF, NPY_HALF, NPY_BOOL,
                        NPY_FLOAT, NPY_FLOAT, NPY_BOOL,
                        NPY_DOUBLE, NPY_DOUBLE, NPY_BOOL,
 
@@ -330,7 +339,8 @@ static void *array_of_nulls[] = {
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
-    (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL
+    (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
+    (void *)NULL
 };
 
 
@@ -404,7 +414,7 @@ void InitLogicalGufuncs(PyObject *dictionary,
             npy_ubyte_@NAME@, /* equal to npy_bool */
 /**begin repeat1
  * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
- *         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
+ *         npy_ulong,npy_longlong,npy_ulonglong,npy_half,npy_float,npy_double,
  *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
  */
 
@@ -420,7 +430,7 @@ void InitLogicalGufuncs(PyObject *dictionary,
             npy_ubyte_@NAME@, /* equal to npy_bool */
 /**begin repeat1
  * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
- *         npy_ulong,npy_longlong,npy_ulonglong,npy_float,npy_double,
+ *         npy_ulong,npy_longlong,npy_ulonglong,npy_half,npy_float,npy_double,
  *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
  */
 
@@ -439,6 +449,10 @@ void InitLogicalGufuncs(PyObject *dictionary,
             funcs = @NAME@_funcs_avx;
         }
 #endif
+
+        assert(sizeof(array_of_nulls) / sizeof(void*) == sizeof(types) / 3);
+        assert(sizeof(@NAME@_funcs_base) / sizeof(void*) == sizeof(types) / 3);
+        assert(sizeof(@NAME@_funcs_avx) / sizeof(void*) == sizeof(types) / 3);
         f = createPyUFunc(funcs,
                           array_of_nulls,
                           types,
diff --git a/numpy/core/tests/test_logical_gufuncs.py b/numpy/core/tests/test_logical_gufuncs.py
index 284db8f442de..ab4bd2dfbb6e 100644
--- a/numpy/core/tests/test_logical_gufuncs.py
+++ b/numpy/core/tests/test_logical_gufuncs.py
@@ -5,7 +5,7 @@
     TestCase, run_module_suite, assert_equal
 )
 
-float_types = [np.float32, np.float64, np.longdouble]
+float_types = [np.float16, np.float32, np.float64, np.longdouble]
 complex_types = [np.cfloat, np.cdouble, np.clongdouble]
 int_types = [np.bool, np.int8, np.uint8, np.int16, np.uint16, np.int32,
              np.uint32, np.int64, np.uint64, np.longlong, np.ulonglong]
@@ -75,7 +75,7 @@ def test_structure(self):
                    np.all_greater, np.all_greater_equal,
                    np.any_equal, np.any_less, np.any_less_equal,
                    np.any_greater, np.any_greater_equal]:
-            self.assertGreater(len(op.types), 0)
+            assert_equal(op.types, np.equal.types)
             self.assertEqual(op.nin, 2)
             self.assertEqual(op.nout, 1)
 

From ece072c0e1d5757b75f0a467343613b80d638e3e Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Fri, 24 Mar 2017 13:03:13 +0100
Subject: [PATCH 22/26] skip some duplicates for non vectorizable types

---
 numpy/core/src/umath/logical_gufuncs.c.src | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index 5335a962c6d4..ebf9535d1fb3 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -34,6 +34,7 @@
  *         npy_ulong,npy_longlong,npy_ulonglong,npy_half,npy_float,npy_double,
  *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
  * #OP_PREFIX = R,R,R,R,R,R,R,R,R,R,H,R,R,R,PyArray_C,PyArray_C,PyArray_C#
+ * #VECTORIZE = 1*10,0,1,1,0,1,1,0#
  */
 
 /**begin repeat1
@@ -50,11 +51,15 @@
 /**begin repeat3
  * #isa = , _avx#
  * #ISA = , AVX#
- * #CHK = 1, HAVE_ATTRIBUTE_TARGET_AVX#
+ * #CHK = 1, defined HAVE_ATTRIBUTE_TARGET_AVX && defined DO_VECTORIZE#
  * #ATTR = , NPY_GCC_TARGET_AVX#
  */
+#if @VECTORIZE@
+#define DO_VECTORIZE
+#endif
 
 #if @CHK@
+#define HAVE_@TYPE@_@fname@_@OPNAME@@isa@
 static NPY_GCC_OPT_3 @ATTR@ void
 @TYPE@_@fname@_@OPNAME@@isa@(char **args, npy_intp *dimensions,
                              npy_intp* steps, void* data)
@@ -74,7 +79,7 @@ static NPY_GCC_OPT_3 @ATTR@ void
 
         i = 0;
         /* main loop in chunks with auto vectorize simd instructions */
-        if (a_I == sizeof(@TYPE@) && b_I == sizeof(@TYPE@)) {
+        if (@VECTORIZE@ && a_I == sizeof(@TYPE@) && b_I == sizeof(@TYPE@)) {
             for (i=0; i < npy_blocked_end(0, 1, BLOCK_SIZE, nI);
                  i+=BLOCK_SIZE) {
                 unsigned int true_count = 0, j;
@@ -111,6 +116,8 @@ static NPY_GCC_OPT_3 @ATTR@ void
 }
 #endif
 
+#undef DO_VECTORIZE
+
 /**end repeat3**/
 /**end repeat2**/
 /**end repeat1**/
@@ -433,8 +440,11 @@ void InitLogicalGufuncs(PyObject *dictionary,
  *         npy_ulong,npy_longlong,npy_ulonglong,npy_half,npy_float,npy_double,
  *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
  */
-
+#ifdef HAVE_@TYPE@_@NAME@_avx
             @TYPE@_@NAME@_avx,
+#else
+            @TYPE@_@NAME@,
+#endif
 
 /**end repeat1**/
             OBJECT_@NAME@,

From b46d6e774beaf2c6976ede0a3f8488e9501fd308 Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Fri, 24 Mar 2017 14:01:31 +0100
Subject: [PATCH 23/26] remove long/longlong duplication

---
 numpy/core/src/umath/logical_gufuncs.c.src | 59 ++++++++++++++++++----
 1 file changed, 49 insertions(+), 10 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index ebf9535d1fb3..cbf2c47cea3f 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -30,11 +30,11 @@
 #define BLOCK_SIZE 32
 
 /**begin repeat
- * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
- *         npy_ulong,npy_longlong,npy_ulonglong,npy_half,npy_float,npy_double,
+ * #TYPE = npy_int8,npy_uint8,npy_int16,npy_uint16,npy_int32,npy_uint32,
+ *         npy_int64,npy_uint64,npy_half,npy_float,npy_double,
  *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
- * #OP_PREFIX = R,R,R,R,R,R,R,R,R,R,H,R,R,R,PyArray_C,PyArray_C,PyArray_C#
- * #VECTORIZE = 1*10,0,1,1,0,1,1,0#
+ * #OP_PREFIX = R,R,R,R,R,R,R,R,H,R,R,R,PyArray_C,PyArray_C,PyArray_C#
+ * #VECTORIZE = 1*8,0,1,1,0,1,1,0#
  */
 
 /**begin repeat1
@@ -403,6 +403,46 @@ static char const * const @prefix@_@name@_doc =
 
 /* function to create and register all gufuncs */
 
+/*
+ * define bad integer names to sized names to assign long and longlong to the
+ * int64 functions (or int and long to int32)
+ * this avoids code duplication
+ */
+#define NPY_FUNCNAME__(type, name, suffix) type##_##name##suffix
+#define NPY_FUNCNAME_(type, name, suffix) NPY_FUNCNAME__(type, name, suffix)
+#define NPY_FUNCNAME(type, name, suffix) NPY_FUNCNAME_(type, name, suffix)
+
+/**begin repeat
+ * #name = byte, short, int, long, longlong#
+ * #NAME = BYTE, SHORT, INT, LONG, LONGLONG#
+ */
+/**begin repeat1
+ * #SIZE = 1, 2, 4, 8#
+ * #BITS = 8, 16, 32, 64#
+ */
+#if NPY_SIZEOF_@NAME@ == @SIZE@
+#define npy_@name@ npy_int@BITS@
+#define npy_u@name@ npy_uint@BITS@
+
+/**begin repeat2
+ * #OPNAME = equal,not_equal,less,less_equal,greater,greater_equal#
+ */
+/**begin repeat3
+ * #fname = all,any#
+ */
+#ifdef HAVE_npy_int@BITS@_@fname@_@OPNAME@_avx
+#define HAVE_npy_@name@_@fname@_@OPNAME@_avx
+#endif
+#ifdef HAVE_npy_uint@BITS@_@fname@_@OPNAME@_avx
+#define HAVE_npy_u@name@_@fname@_@OPNAME@_avx
+#endif
+/**end repeat3**/
+/**end repeat2**/
+
+#endif
+/**end repeat1**/
+/**end repeat**/
+
 void InitLogicalGufuncs(PyObject *dictionary,
                         PyUFunc_FromFuncAndDataAndSignature_t createPyUFunc)
 {
@@ -418,14 +458,13 @@ void InitLogicalGufuncs(PyObject *dictionary,
 
         static PyUFuncGenericFunction @NAME@_funcs_base[] = {
 
-            npy_ubyte_@NAME@, /* equal to npy_bool */
+            npy_uint8_@NAME@, /* equal to npy_bool */
 /**begin repeat1
  * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
  *         npy_ulong,npy_longlong,npy_ulonglong,npy_half,npy_float,npy_double,
  *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
  */
-
-            @TYPE@_@NAME@,
+            NPY_FUNCNAME(@TYPE@,@NAME@,),
 
 /**end repeat1**/
             OBJECT_@NAME@,
@@ -434,16 +473,16 @@ void InitLogicalGufuncs(PyObject *dictionary,
         };
         static PyUFuncGenericFunction @NAME@_funcs_avx[] = {
 
-            npy_ubyte_@NAME@, /* equal to npy_bool */
+            npy_uint8_@NAME@, /* equal to npy_bool */
 /**begin repeat1
  * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
  *         npy_ulong,npy_longlong,npy_ulonglong,npy_half,npy_float,npy_double,
  *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
  */
 #ifdef HAVE_@TYPE@_@NAME@_avx
-            @TYPE@_@NAME@_avx,
+            NPY_FUNCNAME(@TYPE@,@NAME@,_avx),
 #else
-            @TYPE@_@NAME@,
+            NPY_FUNCNAME(@TYPE@,@NAME@,),
 #endif
 
 /**end repeat1**/

From fdcf7ed2d97547e6848e2fc499cb0e965fa79cfc Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Fri, 24 Mar 2017 14:22:43 +0100
Subject: [PATCH 24/26] bool needs to convert the bytes to boolean first

---
 numpy/core/src/umath/logical_gufuncs.c.src | 32 ++++++++++++----------
 numpy/core/tests/test_logical_gufuncs.py   | 10 +++++++
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index cbf2c47cea3f..fe9b1cae6b39 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -30,11 +30,13 @@
 #define BLOCK_SIZE 32
 
 /**begin repeat
- * #TYPE = npy_int8,npy_uint8,npy_int16,npy_uint16,npy_int32,npy_uint32,
- *         npy_int64,npy_uint64,npy_half,npy_float,npy_double,
- *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
- * #OP_PREFIX = R,R,R,R,R,R,R,R,H,R,R,R,PyArray_C,PyArray_C,PyArray_C#
- * #VECTORIZE = 1*8,0,1,1,0,1,1,0#
+ * #TYPE = npy_bool,npy_int8,npy_uint8,npy_int16,npy_uint16,
+ *         npy_int32,npy_uint32,npy_int64,npy_uint64,
+ *         npy_half,npy_float,npy_double,npy_longdouble,
+ *         npy_cfloat,npy_cdouble,npy_clongdouble#
+ * #OP_PREFIX = R,R,R,R,R,R,R,R,R,H,R,R,R,PyArray_C,PyArray_C,PyArray_C#
+ * #ISBOOL = != 0,,,,,,,,,,,,,,,#
+ * #VECTORIZE = 1*9,0,1,1,0,1,1,0#
  */
 
 /**begin repeat1
@@ -84,8 +86,8 @@ static NPY_GCC_OPT_3 @ATTR@ void
                  i+=BLOCK_SIZE) {
                 unsigned int true_count = 0, j;
                 for (j=0 ; j<BLOCK_SIZE ; j++){
-                    @TYPE@ a = *(@TYPE@ *)a_i;
-                    @TYPE@ b = *(@TYPE@ *)b_i;
+                    @TYPE@ a = (*(@TYPE@ *)a_i) @ISBOOL@;
+                    @TYPE@ b = (*(@TYPE@ *)b_i) @ISBOOL@;
                     true_count += @INV@@OP_PREFIX@@OP@(a, b);
                     a_i += a_I;
                     b_i += b_I;
@@ -100,7 +102,9 @@ static NPY_GCC_OPT_3 @ATTR@ void
 
         /* check remaining elements */
         for ( ; i < nI; i++) {
-            if (@INV@@OP_PREFIX@@OP@(*(@TYPE@ *)a_i, *(@TYPE@ *)b_i)) {
+            @TYPE@ a = (*(@TYPE@ *)a_i) @ISBOOL@;
+            @TYPE@ b = (*(@TYPE@ *)b_i) @ISBOOL@;
+            if (@INV@@OP_PREFIX@@OP@(a, b)) {
                 a_i += a_I;
                 b_i += b_I;
             } else {
@@ -458,10 +462,10 @@ void InitLogicalGufuncs(PyObject *dictionary,
 
         static PyUFuncGenericFunction @NAME@_funcs_base[] = {
 
-            npy_uint8_@NAME@, /* equal to npy_bool */
 /**begin repeat1
- * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
- *         npy_ulong,npy_longlong,npy_ulonglong,npy_half,npy_float,npy_double,
+ * #TYPE = npy_bool,npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,
+ *         npy_long,npy_ulong,npy_longlong,npy_ulonglong,
+ *         npy_half,npy_float,npy_double,
  *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
  */
             NPY_FUNCNAME(@TYPE@,@NAME@,),
@@ -473,10 +477,10 @@ void InitLogicalGufuncs(PyObject *dictionary,
         };
         static PyUFuncGenericFunction @NAME@_funcs_avx[] = {
 
-            npy_uint8_@NAME@, /* equal to npy_bool */
 /**begin repeat1
- * #TYPE = npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,npy_long,
- *         npy_ulong,npy_longlong,npy_ulonglong,npy_half,npy_float,npy_double,
+ * #TYPE = npy_bool,npy_byte,npy_ubyte,npy_short,npy_ushort,npy_int,npy_uint,
+ *         npy_long,npy_ulong,npy_longlong,npy_ulonglong,
+ *         npy_half,npy_float,npy_double,
  *         npy_longdouble,npy_cfloat,npy_cdouble,npy_clongdouble#
  */
 #ifdef HAVE_@TYPE@_@NAME@_avx
diff --git a/numpy/core/tests/test_logical_gufuncs.py b/numpy/core/tests/test_logical_gufuncs.py
index ab4bd2dfbb6e..1e4dc0325fd0 100644
--- a/numpy/core/tests/test_logical_gufuncs.py
+++ b/numpy/core/tests/test_logical_gufuncs.py
@@ -43,6 +43,16 @@ def test_real():
                     yield x
 
 
+def test_bool():
+    inputs = np.array([[0,0],[0,12],[1,0],[1,16]])
+    for i in range(inputs.shape[0]):
+        for j in range(inputs.shape[0]):
+            x1 = inputs[i, :].astype(np.int8).view(np.bool)
+            x2 = inputs[j, :].astype(np.int8).view(np.bool)
+            for x in check_all(x1, x2):
+                yield x
+
+
 def test_complex():
     j = 1j
     for m in range(-1, 2):

From 99c13c0734f885ab93ef9e14028eda0c343ae7d7 Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Fri, 24 Mar 2017 18:19:01 +0100
Subject: [PATCH 25/26] remove unnecessary template parameter

---
 numpy/core/src/umath/logical_gufuncs.c.src | 1 -
 1 file changed, 1 deletion(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index fe9b1cae6b39..b1eda4d9ed25 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -52,7 +52,6 @@
 
 /**begin repeat3
  * #isa = , _avx#
- * #ISA = , AVX#
  * #CHK = 1, defined HAVE_ATTRIBUTE_TARGET_AVX && defined DO_VECTORIZE#
  * #ATTR = , NPY_GCC_TARGET_AVX#
  */

From ba27857b8dfda57c6bbb5f026211df53e9a7e11c Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Fri, 24 Mar 2017 18:19:24 +0100
Subject: [PATCH 26/26] fix wrong not_equal NAT conditional

---
 numpy/core/src/umath/logical_gufuncs.c.src |  2 +-
 numpy/core/tests/test_datetime.py          | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/numpy/core/src/umath/logical_gufuncs.c.src b/numpy/core/src/umath/logical_gufuncs.c.src
index b1eda4d9ed25..baab72984021 100644
--- a/numpy/core/src/umath/logical_gufuncs.c.src
+++ b/numpy/core/src/umath/logical_gufuncs.c.src
@@ -215,7 +215,7 @@ static  void
             @TYPE@ b = *(@TYPE@ *)b_i;
             npy_bool res = @INV@(a != b);
 
-            if (a == NPY_DATETIME_NAT && a == NPY_DATETIME_NAT) {
+            if (a == NPY_DATETIME_NAT && b == NPY_DATETIME_NAT) {
                 NPY_ALLOW_C_API_DEF
                 NPY_ALLOW_C_API;
                 /* 2016-01-18, 1.11 */
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index c74969d907b0..d0f57f7ee911 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -1123,13 +1123,16 @@ def test_datetime_compare_nat(self):
 
             assert_warns(FutureWarning, np.not_equal, dt_nat, dt_nat)
             assert_warns(FutureWarning, np.not_equal, td_nat, td_nat)
+            assert_warns(FutureWarning, np.all_not_equal, dt_nat, dt_nat)
+            assert_warns(FutureWarning, np.any_not_equal, td_nat, td_nat)
 
         with suppress_warnings() as sup:
             sup.record(FutureWarning)
-            assert_(np.not_equal(dt_nat, dt_other).all())
-            assert_(np.not_equal(dt_other, dt_nat).all())
-            assert_(np.not_equal(td_nat, td_other).all())
-            assert_(np.not_equal(td_other, td_nat).all())
+            for op in [np.not_equal, np.all_not_equal, np.any_not_equal]:
+                assert_(op(dt_nat, dt_other).all())
+                assert_(op(dt_other, dt_nat).all())
+                assert_(op(td_nat, td_other).all())
+                assert_(op(td_other, td_nat).all())
             self.assertEqual(len(sup.log), 0)
 
     def test_datetime_minmax(self):