Thanks to visit codestin.com
Credit goes to github.com

Skip to content

BUG: Fix bug in AVX-512F np.maximum and np.minimum #15612

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 44 additions & 9 deletions numpy/core/src/umath/simd.inc.src
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,37 @@ abs_ptrdiff(char *a, char *b)
return (a > b) ? (a - b) : (b - a);
}

/*
* nomemoverlap - returns true if two strided arrays have an overlapping
* region in memory. ip_size/op_size = size of the arrays which can be negative
* indicating negative steps.
*/
static NPY_INLINE npy_bool
nomemoverlap(char *ip,
npy_intp ip_size,
char *op,
npy_intp op_size)
{
char *ip_start, *ip_end, *op_start, *op_end;
if (ip_size < 0) {
ip_start = ip + ip_size;
ip_end = ip;
}
else {
ip_start = ip;
ip_end = ip + ip_size;
}
if (op_size < 0) {
op_start = op + op_size;
op_end = op;
}
else {
op_start = op;
op_end = op + op_size;
}
return (ip_start > op_end) | (op_start > ip_end);
}

#define IS_BINARY_STRIDE_ONE(esize, vsize) \
((steps[0] == esize) && \
(steps[1] == esize) && \
Expand Down Expand Up @@ -83,21 +114,25 @@ abs_ptrdiff(char *a, char *b)
* cross page boundaries.
*
* We instead rely on i32gather/scatter_ps instructions which use a 32-bit index
* element. The index needs to be < INT_MAX to avoid overflow. MAX_STEP_SIZE ensures this.
* element. The index needs to be < INT_MAX to avoid overflow. MAX_STEP_SIZE
* ensures this. The condition also requires that the input and output arrays
* should have no overlap in memory.
*/
#define IS_BINARY_SMALL_STEPS \
#define IS_BINARY_SMALL_STEPS_AND_NOMEMOVERLAP \
((abs(steps[0]) < MAX_STEP_SIZE) && \
(abs(steps[1]) < MAX_STEP_SIZE) && \
(abs(steps[2]) < MAX_STEP_SIZE))
(abs(steps[2]) < MAX_STEP_SIZE) && \
(nomemoverlap(args[0], steps[0] * dimensions[0], args[2], steps[2] * dimensions[0])) && \
(nomemoverlap(args[1], steps[1] * dimensions[0], args[2], steps[2] * dimensions[0])))

/*
* output should be contiguous, can handle strided input data
* Input step should be smaller than MAX_STEP_SIZE for performance
* 1) Output should be contiguous, can handle strided input data
* 2) Input step should be smaller than MAX_STEP_SIZE for performance
* 3) Input and output arrays should have no overlap in memory
*/
#define IS_OUTPUT_BLOCKABLE_UNARY(esize, vsize) \
(steps[1] == (esize) && abs(steps[0]) < MAX_STEP_SIZE && \
((abs_ptrdiff(args[1], args[0]) >= (vsize)) || \
((abs_ptrdiff(args[1], args[0]) == 0))))
(nomemoverlap(args[1], steps[1] * dimensions[0], args[0], steps[0] * dimensions[0])))

#define IS_BLOCKABLE_REDUCE(esize, vsize) \
(steps[1] == (esize) && abs_ptrdiff(args[1], args[0]) >= (vsize) && \
Expand Down Expand Up @@ -252,7 +287,7 @@ static NPY_INLINE int
run_binary_avx512f_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp const *steps)
{
#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS && @EXISTS@
if (IS_BINARY_SMALL_STEPS) {
if (IS_BINARY_SMALL_STEPS_AND_NOMEMOVERLAP) {
AVX512F_@func@_@TYPE@(args, dimensions, steps);
return 1;
}
Expand Down Expand Up @@ -1942,7 +1977,7 @@ AVX512F_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp const *s
/*
* Note: while generally indices are npy_intp, we ensure that our maximum index
* will fit in an int32 as a precondition for this function via
* IS_BINARY_SMALL_STEPS
* IS_BINARY_SMALL_STEPS_AND_NOMEMOVERLAP
*/

npy_int32 index_ip1[@num_lanes@], index_ip2[@num_lanes@], index_op[@num_lanes@];
Expand Down
8 changes: 8 additions & 0 deletions numpy/core/tests/test_umath.py
Original file line number Diff line number Diff line change
Expand Up @@ -3157,6 +3157,14 @@ def test_rint_big_int():
# Rint should not change the value
assert_equal(val, np.rint(val))

@pytest.mark.parametrize('ftype', [np.float32, np.float64])
def test_memoverlap_accumulate(ftype):
# Reproduces bug https://github.com/numpy/numpy/issues/15597
arr = np.array([0.61, 0.60, 0.77, 0.41, 0.19], dtype=ftype)
out_max = np.array([0.61, 0.61, 0.77, 0.77, 0.77], dtype=ftype)
out_min = np.array([0.61, 0.60, 0.60, 0.41, 0.19], dtype=ftype)
assert_equal(np.maximum.accumulate(arr), out_max)
assert_equal(np.minimum.accumulate(arr), out_min)

def test_signaling_nan_exceptions():
with assert_no_warnings():
Expand Down