Thanks to visit codestin.com
Credit goes to github.com

Skip to content

BUG: exp, log AVX loops do not use steps #13520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions numpy/core/src/umath/loops.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -1621,21 +1621,23 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
NPY_NO_EXPORT NPY_GCC_OPT_3 void
FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
if (!run_unary_@isa@_@func@_FLOAT(args, dimensions, steps)) {
UNARY_LOOP {
/*
* We use the AVX function to compute exp/log for scalar elements as well.
* This is needed to ensure the output of strided and non-strided
* cases match. But this worsens the performance of strided arrays.
* There is plan to fix this in a subsequent patch by using gather
* instructions for strided arrays in the AVX function.
*/
#if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS
@ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
@ISA@_@func@_FLOAT((npy_float *)op1, (npy_float *)ip1, 1);
Copy link
Contributor

@juliantaylor juliantaylor May 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not correct, the existing logic is to dispatch to the vectorized code function if appropriate and if not return false so the caller can run the fallback code.

the logic may be unnecessarily convoluted, a dispatcher that just runs both variants would be nicer. The simd code is in need of refactoring but that is a different issue.
To fix your code just remove this line and the preprocessor directives around it.

the NPY_GCC_OPT_3 macro can probably also be removed, the UNARY_LOOP macro does not allow for much optimization and to vectorize it with UNARY_LOOP_FAST gcc would require the -ffast-math which we intentionally do not use as it has some unwanted side effects (mostly handling of special float values and exceptions).

#else
/*
* This is the path it would take if ISA was runtime detected, but not
* compiled for. It fixes the error on clang6.0 which fails to compile
* AVX512F version. Not sure if I like this idea, if during runtime it
* detects AXV512F, it will end up running the scalar version instead
* of AVX2.
*/
UNARY_LOOP {
const npy_float in1 = *(npy_float *)ip1;
*(npy_float *)op1 = @scalarf@(in1);
}
const npy_float in1 = *(npy_float *)ip1;
*(npy_float *)op1 = @scalarf@(in1);
#endif
}
}
}

/**end repeat1**/
Expand Down
20 changes: 18 additions & 2 deletions numpy/core/src/umath/simd.inc.src
Original file line number Diff line number Diff line change
Expand Up @@ -122,20 +122,36 @@ abs_ptrdiff(char *a, char *b)

/**begin repeat
* #ISA = AVX2, AVX512F#
* #isa = avx2, avx512f#
* #REGISTER_SIZE = 32, 64#
*/

/* prototypes */
#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS

/**begin repeat1
* #func = exp, log#
*/

#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are SSE2 intrinsic needed in this avx2/avx512 section?

static NPY_INLINE void
@ISA@_@func@_FLOAT(npy_float *, npy_float *, const npy_intp n);
#endif

/**end repeat1**/
static NPY_INLINE int
run_unary_@isa@_@func@_FLOAT(char **args, npy_intp *dimensions, npy_intp *steps)
{
#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
if (IS_BLOCKABLE_UNARY(sizeof(npy_float), @REGISTER_SIZE@)) {
@ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
return 1;
}
else
return 0;
#endif
return 0;
}

/**end repeat1**/

/**end repeat**/

Expand Down
25 changes: 25 additions & 0 deletions numpy/core/tests/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1964,3 +1964,28 @@ def test_ufunc_types(ufunc):
assert r.dtype == np.dtype(t)
else:
assert res.dtype == np.dtype(out)

@pytest.mark.parametrize('ufunc', [getattr(np, x) for x in dir(np)
if isinstance(getattr(np, x), np.ufunc)])
def test_ufunc_noncontiguous(ufunc):
'''
Check that contiguous and non-contiguous calls to ufuncs
have the same results for values in range(9)
'''
for typ in ufunc.types:
# types is a list of strings like ii->i
if any(set('O?mM') & set(typ)):
# bool, object, datetime are too irregular for this simple test
continue
inp, out = typ.split('->')
args_c = [np.empty(6, t) for t in inp]
args_n = [np.empty(18, t)[::3] for t in inp]
for a in args_c:
a.flat = range(1,7)
for a in args_n:
a.flat = range(1,7)
with warnings.catch_warnings(record=True):
warnings.filterwarnings("always")
res_c = ufunc(*args_c)
res_n = ufunc(*args_n)
assert_equal(res_c, res_n)