From 823c9a85f3c33052beb0292d66319faa1139f431 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 30 Oct 2025 11:25:07 +0100 Subject: [PATCH 1/6] ENH: Make FPE blas check a runtime check for all arm systems If an arm system uses SME some BLAS versions just set FPEs spuriously. The culprit is really Accelerate so we might limit this to Mac OS as well (and then with an SME check also -- see changes prior to this commit). However, some OpenBLAS versions also caused this, although OpenBLAS is likely to clear the FPEs on their side. Closes gh-29820 --- numpy/_core/src/common/blas_utils.c | 108 ++++++------------ numpy/_core/src/common/blas_utils.h | 10 +- numpy/_core/src/multiarray/multiarraymodule.c | 19 ++- numpy/testing/_private/utils.py | 9 +- 4 files changed, 59 insertions(+), 87 deletions(-) diff --git a/numpy/_core/src/common/blas_utils.c b/numpy/_core/src/common/blas_utils.c index 409d3818ae0f..1c2ffdebbcac 100644 --- a/numpy/_core/src/common/blas_utils.c +++ b/numpy/_core/src/common/blas_utils.c @@ -1,6 +1,9 @@ +#include + #include "numpy/npy_math.h" // npy_get_floatstatus_barrier #include "numpy/numpyconfig.h" // NPY_VISIBILITY_HIDDEN #include "blas_utils.h" +#include "npy_cblas.h" #include #include @@ -11,92 +14,50 @@ #endif #if NPY_BLAS_CHECK_FPE_SUPPORT - -/* Return whether we're running on macOS 15.4 or later +/* + * Static variable to cache runtime check of BLAS FPE support. */ -static inline bool -is_macOS_version_15_4_or_later(void){ -#if !defined(__APPLE__) - return false; -#else - char *osProductVersion = NULL; - size_t size = 0; - bool ret = false; - - // Query how large OS version string should be - if(-1 == sysctlbyname("kern.osproductversion", NULL, &size, NULL, 0)){ - goto cleanup; - } - - osProductVersion = malloc(size + 1); - - // Get the OS version string - if(-1 == sysctlbyname("kern.osproductversion", osProductVersion, &size, NULL, 0)){ - goto cleanup; - } - - osProductVersion[size] = '\0'; - - // Parse the version string - int major = 0, minor = 0; - if(2 > sscanf(osProductVersion, "%d.%d", &major, &minor)) { - goto cleanup; - } - - if (major > 15 || (major == 15 && minor >= 4)) { - ret = true; - } + static bool blas_supports_fpe = true; -cleanup: - if(osProductVersion){ - free(osProductVersion); - } - - return ret; -#endif -} - -/* ARM Scalable Matrix Extension (SME) raises all floating-point error flags +/* + * ARM Scalable Matrix Extension (SME) raises all floating-point error flags * when it's used regardless of values or operations. As a consequence, * when SME is used, all FPE state is lost and special handling is needed. * * For NumPy, SME is not currently used directly, but can be used via * BLAS / LAPACK libraries. This function does a runtime check for whether * BLAS / LAPACK can use SME and special handling around FPE is required. + * + * This may be an Accelerate bug (at least OpenBLAS consider it that way) + * but when we find an ARM system with SVE we do a runtime check for whether + * FPEs are spuriously given. */ -static inline bool -BLAS_can_use_ARM_SME(void) +static inline int +set_BLAS_causes_spurious_FPEs(void) { -#if defined(__APPLE__) && defined(__aarch64__) && defined(ACCELERATE_NEW_LAPACK) - // ARM SME can be used by Apple's Accelerate framework for BLAS / LAPACK - // - macOS 15.4+ - // - Apple silicon M4+ - - // Does OS / Accelerate support ARM SME? - if(!is_macOS_version_15_4_or_later()){ - return false; + // These are all small, so just work on stack to not worry about error + // handling. + double *x = PyMem_Malloc(20*20*3*sizeof(double)); + if (x == NULL) { + PyErr_NoMemory(); + return -1; } + double *y = x + 20*20; + double *res = y + 20*20; - // Does hardware support SME? - int has_SME = 0; - size_t size = sizeof(has_SME); - if(-1 == sysctlbyname("hw.optional.arm.FEAT_SME", &has_SME, &size, NULL, 0)){ - return false; - } + npy_clear_floatstatus_barrier((char *)x); - if(has_SME){ - return true; - } -#endif + CBLAS_FUNC(cblas_dgemm)( + CblasRowMajor, CblasNoTrans, CblasNoTrans, 20, 20, 20, 1., + x, 20, y, 20, 0., res, 20); + PyMem_Free(x); - // default assume SME is not used - return false; + int fpe_status = npy_get_floatstatus_barrier((char *)x); + // Entries were all zero, so we shouldn't see any FPEs + blas_supports_fpe = fpe_status != 0; + return 0; } -/* Static variable to cache runtime check of BLAS FPE support. - */ -static bool blas_supports_fpe = true; - #endif // NPY_BLAS_CHECK_FPE_SUPPORT @@ -110,19 +71,20 @@ npy_blas_supports_fpe(void) #endif } -NPY_VISIBILITY_HIDDEN void +NPY_VISIBILITY_HIDDEN int npy_blas_init(void) { #if NPY_BLAS_CHECK_FPE_SUPPORT - blas_supports_fpe = !BLAS_can_use_ARM_SME(); + return set_BLAS_causes_spurious_FPEs(); #endif + return 0; } NPY_VISIBILITY_HIDDEN int npy_get_floatstatus_after_blas(void) { #if NPY_BLAS_CHECK_FPE_SUPPORT - if(!blas_supports_fpe){ + if (!blas_supports_fpe){ // BLAS does not support FPE and we need to return FPE state. // Instead of clearing and then grabbing state, just return // that no flags are set. diff --git a/numpy/_core/src/common/blas_utils.h b/numpy/_core/src/common/blas_utils.h index 34d6321c2920..115e60576557 100644 --- a/numpy/_core/src/common/blas_utils.h +++ b/numpy/_core/src/common/blas_utils.h @@ -2,10 +2,14 @@ #include -/* NPY_BLAS_CHECK_FPE_SUPPORT controls whether we need a runtime check +/* + * NPY_BLAS_CHECK_FPE_SUPPORT controls whether we need a runtime check * for floating-point error (FPE) support in BLAS. + * The known culprit right now is SVM likely only on mac, but that is not + * quite clear. + * This checks always on all ARM (it is a small check overall). */ -#if defined(__APPLE__) && defined(__aarch64__) && defined(ACCELERATE_NEW_LAPACK) +#if defined(__aarch64__) #define NPY_BLAS_CHECK_FPE_SUPPORT 1 #else #define NPY_BLAS_CHECK_FPE_SUPPORT 0 @@ -13,7 +17,7 @@ /* Initialize BLAS environment, if needed */ -NPY_VISIBILITY_HIDDEN void +NPY_VISIBILITY_HIDDEN int npy_blas_init(void); /* Runtime check if BLAS supports floating-point errors. diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c index 4ab3f5bae02c..44ac8a678bbb 100644 --- a/numpy/_core/src/multiarray/multiarraymodule.c +++ b/numpy/_core/src/multiarray/multiarraymodule.c @@ -4448,6 +4448,17 @@ _set_numpy_warn_if_no_mem_policy(PyObject *NPY_UNUSED(self), PyObject *arg) } +static PyObject * +_blas_supports_fpe(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args)) { + if (npy_blas_supports_fpe()) { + Py_RETURN_TRUE; + } + else { + Py_RETURN_FALSE; + } +} + + static PyObject * _reload_guard(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args)) { #if !defined(PYPY_VERSION) @@ -4688,6 +4699,8 @@ static struct PyMethodDef array_module_methods[] = { METH_NOARGS, NULL}, {"_set_madvise_hugepage", (PyCFunction)_set_madvise_hugepage, METH_O, NULL}, + {"_blas_supports_fpe", (PyCFunction)_blas_supports_fpe, + METH_NOARGS, NULL}, {"_reload_guard", (PyCFunction)_reload_guard, METH_NOARGS, "Give a warning on reload and big warning in sub-interpreters."}, @@ -4904,9 +4917,9 @@ _multiarray_umath_exec(PyObject *m) { return -1; } -#if NPY_BLAS_CHECK_FPE_SUPPORT - npy_blas_init(); -#endif + if (npy_blas_init() < 0) { + return -1; + } #if defined(MS_WIN64) && defined(__GNUC__) PyErr_WarnEx(PyExc_Warning, diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py index 9be98f9d2fbe..967d67e14a13 100644 --- a/numpy/testing/_private/utils.py +++ b/numpy/testing/_private/utils.py @@ -90,14 +90,7 @@ class KnownFailureException(Exception): IS_PYPY = sys.implementation.name == 'pypy' IS_PYSTON = hasattr(sys, "pyston_version_info") HAS_REFCOUNT = getattr(sys, 'getrefcount', None) is not None and not IS_PYSTON -BLAS_SUPPORTS_FPE = True -if platform.system() == 'Darwin' or platform.machine() == 'arm64': - try: - blas = np.__config__.CONFIG['Build Dependencies']['blas'] - if blas['name'] == 'accelerate': - BLAS_SUPPORTS_FPE = False - except KeyError: - pass +BLAS_SUPPORTS_FPE = np._core._multiarray_umath._blas_supports_fpe() HAS_LAPACK64 = numpy.linalg._umath_linalg._ilp64 From 7bc099dc9f3ce73d08caf001af57cb825691fa81 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 30 Oct 2025 11:48:53 +0100 Subject: [PATCH 2/6] Guard for HAVE_CBLAS and always compile the helpers --- numpy/_core/meson.build | 2 +- numpy/_core/src/common/blas_utils.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/_core/meson.build b/numpy/_core/meson.build index dc07586bcf8e..6dcbaea0cf1a 100644 --- a/numpy/_core/meson.build +++ b/numpy/_core/meson.build @@ -1109,6 +1109,7 @@ endforeach # ------------------------------ src_multiarray_umath_common = [ 'src/common/array_assign.c', + 'src/common/blas_utils.c', 'src/common/gil_utils.c', 'src/common/mem_overlap.c', 'src/common/npy_argparse.c', @@ -1123,7 +1124,6 @@ src_multiarray_umath_common = [ ] if have_blas src_multiarray_umath_common += [ - 'src/common/blas_utils.c', 'src/common/cblasfuncs.c', 'src/common/python_xerbla.c', ] diff --git a/numpy/_core/src/common/blas_utils.h b/numpy/_core/src/common/blas_utils.h index 115e60576557..3f5bb735281d 100644 --- a/numpy/_core/src/common/blas_utils.h +++ b/numpy/_core/src/common/blas_utils.h @@ -9,7 +9,7 @@ * quite clear. * This checks always on all ARM (it is a small check overall). */ -#if defined(__aarch64__) +#if defined(__aarch64__) && defined(HAVE_CBLAS) #define NPY_BLAS_CHECK_FPE_SUPPORT 1 #else #define NPY_BLAS_CHECK_FPE_SUPPORT 0 From fbd08f47676bea39aae58fe7b89dd72642be4644 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 30 Oct 2025 12:31:19 +0100 Subject: [PATCH 3/6] ooops, needs to be a calloc of course. --- numpy/_core/src/common/blas_utils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/numpy/_core/src/common/blas_utils.c b/numpy/_core/src/common/blas_utils.c index 1c2ffdebbcac..365289067b88 100644 --- a/numpy/_core/src/common/blas_utils.c +++ b/numpy/_core/src/common/blas_utils.c @@ -37,13 +37,13 @@ set_BLAS_causes_spurious_FPEs(void) { // These are all small, so just work on stack to not worry about error // handling. - double *x = PyMem_Malloc(20*20*3*sizeof(double)); + double *x = PyMem_Calloc(20 * 20 * 3, sizeof(double)); if (x == NULL) { PyErr_NoMemory(); return -1; } - double *y = x + 20*20; - double *res = y + 20*20; + double *y = x + 20 * 20; + double *res = y + 20 * 20; npy_clear_floatstatus_barrier((char *)x); From 7c8625e6d56f6938529346b6d24dc34a7d9f5446 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 13 Nov 2025 20:24:56 +0100 Subject: [PATCH 4/6] Move test to Python --- numpy/__init__.py | 17 ++++++ numpy/_core/src/common/blas_utils.c | 52 +++---------------- numpy/_core/src/common/blas_utils.h | 9 ++-- numpy/_core/src/multiarray/multiarraymodule.c | 23 ++++---- numpy/testing/_private/utils.py | 2 +- 5 files changed, 41 insertions(+), 62 deletions(-) diff --git a/numpy/__init__.py b/numpy/__init__.py index a0178b211258..ef7c1ed7678a 100644 --- a/numpy/__init__.py +++ b/numpy/__init__.py @@ -870,6 +870,23 @@ def _mac_os_check(): del w del _mac_os_check + def blas_fpe_check(): + # Check if BLAS adds spurious FPEs, mostly seen on M4 arms with Accelerate. + with errstate(all='raise'): + x = ones((20, 20)) + try: + x @ x + except FloatingPointError: + res = _core._multiarray_umath._blas_supports_fpe(False) + if res: # res was not modified (hardcoded to True for now) + warnings.warn( + "Spurious warnings given by blas but suppression not " + "set up on this platform. Please open a NumPy issue.", + UserWarning, stacklevel=2) + + blas_fpe_check() + del blas_fpe_check + def hugepage_setup(): """ We usually use madvise hugepages support, but on some old kernels it diff --git a/numpy/_core/src/common/blas_utils.c b/numpy/_core/src/common/blas_utils.c index 365289067b88..cbf8e0dc05c5 100644 --- a/numpy/_core/src/common/blas_utils.c +++ b/numpy/_core/src/common/blas_utils.c @@ -1,5 +1,3 @@ -#include - #include "numpy/npy_math.h" // npy_get_floatstatus_barrier #include "numpy/numpyconfig.h" // NPY_VISIBILITY_HIDDEN #include "blas_utils.h" @@ -17,46 +15,7 @@ /* * Static variable to cache runtime check of BLAS FPE support. */ - static bool blas_supports_fpe = true; - -/* - * ARM Scalable Matrix Extension (SME) raises all floating-point error flags - * when it's used regardless of values or operations. As a consequence, - * when SME is used, all FPE state is lost and special handling is needed. - * - * For NumPy, SME is not currently used directly, but can be used via - * BLAS / LAPACK libraries. This function does a runtime check for whether - * BLAS / LAPACK can use SME and special handling around FPE is required. - * - * This may be an Accelerate bug (at least OpenBLAS consider it that way) - * but when we find an ARM system with SVE we do a runtime check for whether - * FPEs are spuriously given. - */ -static inline int -set_BLAS_causes_spurious_FPEs(void) -{ - // These are all small, so just work on stack to not worry about error - // handling. - double *x = PyMem_Calloc(20 * 20 * 3, sizeof(double)); - if (x == NULL) { - PyErr_NoMemory(); - return -1; - } - double *y = x + 20 * 20; - double *res = y + 20 * 20; - - npy_clear_floatstatus_barrier((char *)x); - - CBLAS_FUNC(cblas_dgemm)( - CblasRowMajor, CblasNoTrans, CblasNoTrans, 20, 20, 20, 1., - x, 20, y, 20, 0., res, 20); - PyMem_Free(x); - - int fpe_status = npy_get_floatstatus_barrier((char *)x); - // Entries were all zero, so we shouldn't see any FPEs - blas_supports_fpe = fpe_status != 0; - return 0; -} +static bool blas_supports_fpe = true; #endif // NPY_BLAS_CHECK_FPE_SUPPORT @@ -71,13 +30,14 @@ npy_blas_supports_fpe(void) #endif } -NPY_VISIBILITY_HIDDEN int -npy_blas_init(void) +NPY_VISIBILITY_HIDDEN bool +npy_set_blas_supports_fpe(bool value) { #if NPY_BLAS_CHECK_FPE_SUPPORT - return set_BLAS_causes_spurious_FPEs(); + blas_supports_fpe = (bool)value; + return blas_supports_fpe; #endif - return 0; + return true; // ignore input not set up on this platform } NPY_VISIBILITY_HIDDEN int diff --git a/numpy/_core/src/common/blas_utils.h b/numpy/_core/src/common/blas_utils.h index 3f5bb735281d..840c4940a87e 100644 --- a/numpy/_core/src/common/blas_utils.h +++ b/numpy/_core/src/common/blas_utils.h @@ -15,11 +15,6 @@ #define NPY_BLAS_CHECK_FPE_SUPPORT 0 #endif -/* Initialize BLAS environment, if needed - */ -NPY_VISIBILITY_HIDDEN int -npy_blas_init(void); - /* Runtime check if BLAS supports floating-point errors. * true - BLAS supports FPE and one can rely on them to indicate errors * false - BLAS does not support FPE. Special handling needed for FPE state @@ -27,6 +22,10 @@ npy_blas_init(void); NPY_VISIBILITY_HIDDEN bool npy_blas_supports_fpe(void); +/* Allow setting the BLAS FPE flag from Python.*/ +NPY_VISIBILITY_HIDDEN bool +npy_set_blas_supports_fpe(bool value); + /* If BLAS supports FPE, exactly the same as npy_get_floatstatus_barrier(). * Otherwise, we can't rely on FPE state and need special handling. */ diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c index 44ac8a678bbb..73ef0760d979 100644 --- a/numpy/_core/src/multiarray/multiarraymodule.c +++ b/numpy/_core/src/multiarray/multiarraymodule.c @@ -4429,7 +4429,6 @@ _populate_finfo_constants(PyObject *NPY_UNUSED(self), PyObject *args) } - static PyObject * _set_numpy_warn_if_no_mem_policy(PyObject *NPY_UNUSED(self), PyObject *arg) { @@ -4449,12 +4448,20 @@ _set_numpy_warn_if_no_mem_policy(PyObject *NPY_UNUSED(self), PyObject *arg) static PyObject * -_blas_supports_fpe(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args)) { - if (npy_blas_supports_fpe()) { - Py_RETURN_TRUE; +_blas_supports_fpe(PyObject *NPY_UNUSED(self), PyObject *arg) { + if (arg == Py_None) { + return PyBool_FromLong(npy_blas_supports_fpe()); + } + else if (arg == Py_True) { + return PyBool_FromLong(npy_set_blas_supports_fpe(true)); + } + else if (arg == Py_False) { + return PyBool_FromLong(npy_set_blas_supports_fpe(false)); } else { - Py_RETURN_FALSE; + PyErr_SetString(PyExc_TypeError, + "BLAS FPE support must be None, True, or False"); + return NULL; } } @@ -4700,7 +4707,7 @@ static struct PyMethodDef array_module_methods[] = { {"_set_madvise_hugepage", (PyCFunction)_set_madvise_hugepage, METH_O, NULL}, {"_blas_supports_fpe", (PyCFunction)_blas_supports_fpe, - METH_NOARGS, NULL}, + METH_O, "BLAS FPE support pass None, True, or False and returns new value"}, {"_reload_guard", (PyCFunction)_reload_guard, METH_NOARGS, "Give a warning on reload and big warning in sub-interpreters."}, @@ -4917,10 +4924,6 @@ _multiarray_umath_exec(PyObject *m) { return -1; } - if (npy_blas_init() < 0) { - return -1; - } - #if defined(MS_WIN64) && defined(__GNUC__) PyErr_WarnEx(PyExc_Warning, "Numpy built with MINGW-W64 on Windows 64 bits is experimental, " \ diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py index 967d67e14a13..ed928a5ec7b4 100644 --- a/numpy/testing/_private/utils.py +++ b/numpy/testing/_private/utils.py @@ -90,7 +90,7 @@ class KnownFailureException(Exception): IS_PYPY = sys.implementation.name == 'pypy' IS_PYSTON = hasattr(sys, "pyston_version_info") HAS_REFCOUNT = getattr(sys, 'getrefcount', None) is not None and not IS_PYSTON -BLAS_SUPPORTS_FPE = np._core._multiarray_umath._blas_supports_fpe() +BLAS_SUPPORTS_FPE = np._core._multiarray_umath._blas_supports_fpe(None) HAS_LAPACK64 = numpy.linalg._umath_linalg._ilp64 From 7e17ceb3384facaad9cc7d3ba93ec995e9c9219a Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Fri, 14 Nov 2025 10:58:26 +0100 Subject: [PATCH 5/6] Report FPE ignoring in matmul in `show_runtime()` --- numpy/lib/_utils_impl.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/numpy/lib/_utils_impl.py b/numpy/lib/_utils_impl.py index 2e1ee23d7d58..164aa4ee3d8c 100644 --- a/numpy/lib/_utils_impl.py +++ b/numpy/lib/_utils_impl.py @@ -61,6 +61,11 @@ def show_runtime(): "not_found": features_not_found } }) + config_found.append({ + "ignore_floating_point_errors_in_matmul": + not np._core._multiarray_umath._blas_supports_fpe(None), + }) + try: from threadpoolctl import threadpool_info config_found.extend(threadpool_info()) From 467897cd5eee996f8552c7d5ab10823242350954 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Sun, 16 Nov 2025 07:06:25 +0100 Subject: [PATCH 6/6] Update numpy/_core/src/common/blas_utils.h Co-authored-by: Matti Picus --- numpy/_core/src/common/blas_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/_core/src/common/blas_utils.h b/numpy/_core/src/common/blas_utils.h index 840c4940a87e..79d1e5ce274c 100644 --- a/numpy/_core/src/common/blas_utils.h +++ b/numpy/_core/src/common/blas_utils.h @@ -9,7 +9,7 @@ * quite clear. * This checks always on all ARM (it is a small check overall). */ -#if defined(__aarch64__) && defined(HAVE_CBLAS) +#if defined(__APPLE__) && defined(__aarch64__) && defined(HAVE_CBLAS) #define NPY_BLAS_CHECK_FPE_SUPPORT 1 #else #define NPY_BLAS_CHECK_FPE_SUPPORT 0