numpy · mattip · May 16, 2019 · May 9, 2019 · May 10, 2019 · May 11, 2019
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
@@ -1621,21 +1621,23 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
 NPY_NO_EXPORT NPY_GCC_OPT_3 void
 FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
 {
+    if (!run_unary_@isa@_@func@_FLOAT(args, dimensions, steps)) {
+        UNARY_LOOP {
+            /*
+             * We use the AVX function to compute exp/log for scalar elements as well.
+             * This is needed to ensure the output of strided and non-strided
+             * cases match. But this worsens the performance of strided arrays.
+             * There is plan to fix this in a subsequent patch by using gather
+             * instructions for strided arrays in the AVX function.
+             */
 #if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS
-    @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
+            @ISA@_@func@_FLOAT((npy_float *)op1, (npy_float *)ip1, 1);
 #else
-    /*
-     * This is the path it would take if ISA was runtime detected, but not
-     * compiled for. It fixes the error on clang6.0 which fails to compile
-     * AVX512F version. Not sure if I like this idea, if during runtime it
-     * detects AXV512F, it will end up running the scalar version instead
-     * of AVX2.
-     */
-    UNARY_LOOP {
-	const npy_float in1 = *(npy_float *)ip1;
-	*(npy_float *)op1 = @scalarf@(in1);
-    }
+            const npy_float in1 = *(npy_float *)ip1;
+            *(npy_float *)op1 = @scalarf@(in1);
 #endif
+        }
+    }
 }
 
 /**end repeat1**/

diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
@@ -122,20 +122,36 @@ abs_ptrdiff(char *a, char *b)
 
 /**begin repeat
  * #ISA = AVX2, AVX512F#
+ * #isa = avx2, avx512f#
+ * #REGISTER_SIZE = 32, 64#
  */
 
 /* prototypes */
-#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
 
 /**begin repeat1
  * #func = exp, log#
  */
 
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
 static NPY_INLINE void
 @ISA@_@func@_FLOAT(npy_float *, npy_float *, const npy_intp n);
+#endif
 
-/**end repeat1**/
+static NPY_INLINE int
+run_unary_@isa@_@func@_FLOAT(char **args, npy_intp *dimensions, npy_intp *steps)
+{
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
+    if (IS_BLOCKABLE_UNARY(sizeof(npy_float), @REGISTER_SIZE@)) {
+        @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
+        return 1;
+    }
+    else
+        return 0;
 #endif
+    return 0;
+}
+
+/**end repeat1**/
 
 /**end repeat**/
 

diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
@@ -1964,3 +1964,28 @@ def test_ufunc_types(ufunc):
                 assert r.dtype == np.dtype(t)
         else:
             assert res.dtype == np.dtype(out)
+
+@pytest.mark.parametrize('ufunc', [getattr(np, x) for x in dir(np)
+                                if isinstance(getattr(np, x), np.ufunc)])
+def test_ufunc_noncontiguous(ufunc):
+    '''
+    Check that contiguous and non-contiguous calls to ufuncs
+    have the same results for values in range(9)
+    '''
+    for typ in ufunc.types:
+        # types is a list of strings like ii->i
+        if any(set('O?mM') & set(typ)):
+            # bool, object, datetime are too irregular for this simple test
+            continue
+        inp, out = typ.split('->')
+        args_c = [np.empty(6, t) for t in inp]
+        args_n = [np.empty(18, t)[::3] for t in inp]
+        for a in args_c:
+            a.flat = range(1,7)
+        for a in args_n:
+            a.flat = range(1,7)
+        with warnings.catch_warnings(record=True):
+            warnings.filterwarnings("always")
+            res_c = ufunc(*args_c)
+            res_n = ufunc(*args_n)
+        assert_equal(res_c, res_n)