Thanks to visit codestin.com
Credit goes to github.com

Skip to content

BUG: Fix FP overflow error in division when the divisor is scalar #25129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions numpy/_core/src/umath/loops_arithm_fp.dispatch.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,29 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@)
#endif
return;
}
#if @VECTOR@
#if @is_div@ && defined(NPY_HAVE_NEON) && !NPY_SIMD_F64
/**
* The SIMD branch is disabled on armhf(armv7) due to the absence of native SIMD
* support for single-precision floating-point division. Only scalar division is
* supported natively, and without hardware for performance and accuracy comparison,
* it's challenging to evaluate the benefits of emulated SIMD intrinsic versus
* native scalar division.
*
* The `npyv_div_f32` universal intrinsic emulates the division operation using an
* approximate reciprocal combined with 3 Newton-Raphson iterations for enhanced
* precision. However, this approach has limitations:
*
* - It can cause unexpected floating-point overflows in special cases, such as when
* the divisor is subnormal (refer: https://github.com/numpy/numpy/issues/25097).
*
* - The precision may vary between the emulated SIMD and scalar division due to
* non-uniform branches (non-contiguous) in the code, leading to precision
* inconsistencies.
*
* - Considering the necessity of multiple Newton-Raphson iterations, the performance
* gain may not sufficiently offset these drawbacks.
*/
#elif @VECTOR@
if (len > npyv_nlanes_@sfx@*2 &&
!is_mem_overlap(src0, ssrc0, dst, sdst, len) &&
!is_mem_overlap(src1, ssrc1, dst, sdst, len)
Expand Down Expand Up @@ -138,8 +160,10 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@)
npyv_store_@sfx@((@type@*)(dst + vstep), r1);
}
for (; len > 0; len -= hstep, src0 += vstep, dst += vstep) {
#if @is_div@ || @is_mul@
#if @is_mul@
npyv_@sfx@ a = npyv_load_till_@sfx@((const @type@*)src0, len, 1.0@c@);
#elif @is_div@
npyv_@sfx@ a = npyv_load_till_@sfx@((const @type@*)src0, len, NPY_NAN@C@);
#else
npyv_@sfx@ a = npyv_load_tillz_@sfx@((const @type@*)src0, len);
#endif
Expand Down
18 changes: 15 additions & 3 deletions numpy/_core/tests/test_umath.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
assert_, assert_equal, assert_raises, assert_raises_regex,
assert_array_equal, assert_almost_equal, assert_array_almost_equal,
assert_array_max_ulp, assert_allclose, assert_no_warnings, suppress_warnings,
_gen_alignment_data, assert_array_almost_equal_nulp, IS_WASM, IS_MUSL,
_gen_alignment_data, assert_array_almost_equal_nulp, IS_WASM, IS_MUSL,
IS_PYPY
)
from numpy.testing._private.utils import _glibc_older_than
Expand Down Expand Up @@ -1818,6 +1818,18 @@ def test_unary_spurious_fpexception(self, ufunc, dtype, data, escape):
with assert_no_warnings():
ufunc(array)

@pytest.mark.parametrize("dtype", ('e', 'f', 'd'))
def test_divide_spurious_fpexception(self, dtype):
dt = np.dtype(dtype)
dt_info = np.finfo(dt)
subnorm = dt_info.smallest_subnormal
# Verify a bug fix caused due to filling the remaining lanes of the
# partially loaded dividend SIMD vector with ones, which leads to
# raising an overflow warning when the divisor is denormal.
# see https://github.com/numpy/numpy/issues/25097
with assert_no_warnings():
np.zeros(128 + 1, dtype=dt) / subnorm

class TestFPClass:
@pytest.mark.parametrize("stride", [-5, -4, -3, -2, -1, 1,
2, 4, 5, 6, 7, 8, 9, 10])
Expand Down Expand Up @@ -4218,7 +4230,7 @@ def test_against_cmath(self):
a = complex(func(np.complex128(p)))
b = cfunc(p)
assert_(
abs(a - b) < atol,
abs(a - b) < atol,
"%s %s: %s; cmath: %s" % (fname, p, a, b)
)

Expand Down Expand Up @@ -4788,7 +4800,7 @@ def test_different_docstring_fails(self):
# test for attributes (which are C-level defined)
with assert_raises(RuntimeError):
ncu.add_docstring(np.ndarray.flat, "different docstring")

# And typical functions:
def func():
"""docstring"""
Expand Down