From 56201bb0cadbf36806aed14391d399f6a1cb6152 Mon Sep 17 00:00:00 2001 From: mattip Date: Thu, 9 May 2019 11:16:01 -0700 Subject: [PATCH 1/3] TEST: add test for non-contiguous input to ufuncs --- numpy/core/tests/test_ufunc.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index 4b26c2208638..8790a8c3efe8 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -1964,3 +1964,28 @@ def test_ufunc_types(ufunc): assert r.dtype == np.dtype(t) else: assert res.dtype == np.dtype(out) + +@pytest.mark.parametrize('ufunc', [getattr(np, x) for x in dir(np) + if isinstance(getattr(np, x), np.ufunc)]) +def test_ufunc_noncontiguous(ufunc): + ''' + Check that contiguous and non-contiguous calls to ufuncs + have the same results for values in range(9) + ''' + for typ in ufunc.types: + # types is a list of strings like ii->i + if any(set('O?mM') & set(typ)): + # bool, object, datetime are too irregular for this simple test + continue + inp, out = typ.split('->') + args_c = [np.empty(6, t) for t in inp] + args_n = [np.empty(18, t)[::3] for t in inp] + for a in args_c: + a.flat = range(6) + for a in args_n: + a.flat = range(6) + with warnings.catch_warnings(record=True): + warnings.filterwarnings("always") + res_c = ufunc(*args_c) + res_n = ufunc(*args_n) + assert_equal(res_c, res_n) From 1afc95d44a6322ea2ad690e71a96e7b990eed3ad Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Thu, 9 May 2019 18:19:07 -0700 Subject: [PATCH 2/3] BUG: exp, log AVX loops do not use steps --- numpy/core/src/umath/loops.c.src | 19 +++++++------------ numpy/core/src/umath/simd.inc.src | 20 ++++++++++++++++++-- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 89eeb0c47302..e6d65b307d1e 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1621,21 +1621,16 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE NPY_NO_EXPORT NPY_GCC_OPT_3 void FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) { + if (!run_unary_@isa@_@func@_FLOAT(args, dimensions, steps)) { + UNARY_LOOP { #if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS - @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]); + @ISA@_@func@_FLOAT((npy_float *)op1, (npy_float *)ip1, 1); #else - /* - * This is the path it would take if ISA was runtime detected, but not - * compiled for. It fixes the error on clang6.0 which fails to compile - * AVX512F version. Not sure if I like this idea, if during runtime it - * detects AXV512F, it will end up running the scalar version instead - * of AVX2. - */ - UNARY_LOOP { - const npy_float in1 = *(npy_float *)ip1; - *(npy_float *)op1 = @scalarf@(in1); - } + const npy_float in1 = *(npy_float *)ip1; + *(npy_float *)op1 = @scalarf@(in1); #endif + } + } } /**end repeat1**/ diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 72493e3084f3..1c6ac44260f5 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -122,20 +122,36 @@ abs_ptrdiff(char *a, char *b) /**begin repeat * #ISA = AVX2, AVX512F# + * #isa = avx2, avx512f# + * #REGISTER_SIZE = 32, 64# */ /* prototypes */ -#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS /**begin repeat1 * #func = exp, log# */ +#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS static NPY_INLINE void @ISA@_@func@_FLOAT(npy_float *, npy_float *, const npy_intp n); +#endif -/**end repeat1**/ +static NPY_INLINE int +run_unary_@isa@_@func@_FLOAT(char **args, npy_intp *dimensions, npy_intp *steps) +{ +#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS + if (IS_BLOCKABLE_UNARY(sizeof(npy_float), @REGISTER_SIZE@)) { + @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]); + return 1; + } + else + return 0; #endif + return 0; +} + +/**end repeat1**/ /**end repeat**/ From 4b4d2ab1f4be70467313975422b668596338c9b7 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Sat, 11 May 2019 08:52:29 -0700 Subject: [PATCH 3/3] TEST: changing range(6) to range(1,7) to avoid failure in reciprocal test --- numpy/core/src/umath/loops.c.src | 7 +++++++ numpy/core/tests/test_ufunc.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index e6d65b307d1e..a2649ed93d8d 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1623,6 +1623,13 @@ FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY { if (!run_unary_@isa@_@func@_FLOAT(args, dimensions, steps)) { UNARY_LOOP { + /* + * We use the AVX function to compute exp/log for scalar elements as well. + * This is needed to ensure the output of strided and non-strided + * cases match. But this worsens the performance of strided arrays. + * There is plan to fix this in a subsequent patch by using gather + * instructions for strided arrays in the AVX function. + */ #if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS @ISA@_@func@_FLOAT((npy_float *)op1, (npy_float *)ip1, 1); #else diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index 8790a8c3efe8..caeea39f4bf9 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -1981,9 +1981,9 @@ def test_ufunc_noncontiguous(ufunc): args_c = [np.empty(6, t) for t in inp] args_n = [np.empty(18, t)[::3] for t in inp] for a in args_c: - a.flat = range(6) + a.flat = range(1,7) for a in args_n: - a.flat = range(6) + a.flat = range(1,7) with warnings.catch_warnings(record=True): warnings.filterwarnings("always") res_c = ufunc(*args_c)