diff --git a/numpy/__init__.py b/numpy/__init__.py index a0178b211258..ef7c1ed7678a 100644 --- a/numpy/__init__.py +++ b/numpy/__init__.py @@ -870,6 +870,23 @@ def _mac_os_check(): del w del _mac_os_check + def blas_fpe_check(): + # Check if BLAS adds spurious FPEs, mostly seen on M4 arms with Accelerate. + with errstate(all='raise'): + x = ones((20, 20)) + try: + x @ x + except FloatingPointError: + res = _core._multiarray_umath._blas_supports_fpe(False) + if res: # res was not modified (hardcoded to True for now) + warnings.warn( + "Spurious warnings given by blas but suppression not " + "set up on this platform. Please open a NumPy issue.", + UserWarning, stacklevel=2) + + blas_fpe_check() + del blas_fpe_check + def hugepage_setup(): """ We usually use madvise hugepages support, but on some old kernels it diff --git a/numpy/_core/meson.build b/numpy/_core/meson.build index dc07586bcf8e..6dcbaea0cf1a 100644 --- a/numpy/_core/meson.build +++ b/numpy/_core/meson.build @@ -1109,6 +1109,7 @@ endforeach # ------------------------------ src_multiarray_umath_common = [ 'src/common/array_assign.c', + 'src/common/blas_utils.c', 'src/common/gil_utils.c', 'src/common/mem_overlap.c', 'src/common/npy_argparse.c', @@ -1123,7 +1124,6 @@ src_multiarray_umath_common = [ ] if have_blas src_multiarray_umath_common += [ - 'src/common/blas_utils.c', 'src/common/cblasfuncs.c', 'src/common/python_xerbla.c', ] diff --git a/numpy/_core/src/common/blas_utils.c b/numpy/_core/src/common/blas_utils.c index 409d3818ae0f..cbf8e0dc05c5 100644 --- a/numpy/_core/src/common/blas_utils.c +++ b/numpy/_core/src/common/blas_utils.c @@ -1,6 +1,7 @@ #include "numpy/npy_math.h" // npy_get_floatstatus_barrier #include "numpy/numpyconfig.h" // NPY_VISIBILITY_HIDDEN #include "blas_utils.h" +#include "npy_cblas.h" #include #include @@ -11,89 +12,8 @@ #endif #if NPY_BLAS_CHECK_FPE_SUPPORT - -/* Return whether we're running on macOS 15.4 or later - */ -static inline bool -is_macOS_version_15_4_or_later(void){ -#if !defined(__APPLE__) - return false; -#else - char *osProductVersion = NULL; - size_t size = 0; - bool ret = false; - - // Query how large OS version string should be - if(-1 == sysctlbyname("kern.osproductversion", NULL, &size, NULL, 0)){ - goto cleanup; - } - - osProductVersion = malloc(size + 1); - - // Get the OS version string - if(-1 == sysctlbyname("kern.osproductversion", osProductVersion, &size, NULL, 0)){ - goto cleanup; - } - - osProductVersion[size] = '\0'; - - // Parse the version string - int major = 0, minor = 0; - if(2 > sscanf(osProductVersion, "%d.%d", &major, &minor)) { - goto cleanup; - } - - if (major > 15 || (major == 15 && minor >= 4)) { - ret = true; - } - -cleanup: - if(osProductVersion){ - free(osProductVersion); - } - - return ret; -#endif -} - -/* ARM Scalable Matrix Extension (SME) raises all floating-point error flags - * when it's used regardless of values or operations. As a consequence, - * when SME is used, all FPE state is lost and special handling is needed. - * - * For NumPy, SME is not currently used directly, but can be used via - * BLAS / LAPACK libraries. This function does a runtime check for whether - * BLAS / LAPACK can use SME and special handling around FPE is required. - */ -static inline bool -BLAS_can_use_ARM_SME(void) -{ -#if defined(__APPLE__) && defined(__aarch64__) && defined(ACCELERATE_NEW_LAPACK) - // ARM SME can be used by Apple's Accelerate framework for BLAS / LAPACK - // - macOS 15.4+ - // - Apple silicon M4+ - - // Does OS / Accelerate support ARM SME? - if(!is_macOS_version_15_4_or_later()){ - return false; - } - - // Does hardware support SME? - int has_SME = 0; - size_t size = sizeof(has_SME); - if(-1 == sysctlbyname("hw.optional.arm.FEAT_SME", &has_SME, &size, NULL, 0)){ - return false; - } - - if(has_SME){ - return true; - } -#endif - - // default assume SME is not used - return false; -} - -/* Static variable to cache runtime check of BLAS FPE support. +/* + * Static variable to cache runtime check of BLAS FPE support. */ static bool blas_supports_fpe = true; @@ -110,19 +30,21 @@ npy_blas_supports_fpe(void) #endif } -NPY_VISIBILITY_HIDDEN void -npy_blas_init(void) +NPY_VISIBILITY_HIDDEN bool +npy_set_blas_supports_fpe(bool value) { #if NPY_BLAS_CHECK_FPE_SUPPORT - blas_supports_fpe = !BLAS_can_use_ARM_SME(); + blas_supports_fpe = (bool)value; + return blas_supports_fpe; #endif + return true; // ignore input not set up on this platform } NPY_VISIBILITY_HIDDEN int npy_get_floatstatus_after_blas(void) { #if NPY_BLAS_CHECK_FPE_SUPPORT - if(!blas_supports_fpe){ + if (!blas_supports_fpe){ // BLAS does not support FPE and we need to return FPE state. // Instead of clearing and then grabbing state, just return // that no flags are set. diff --git a/numpy/_core/src/common/blas_utils.h b/numpy/_core/src/common/blas_utils.h index 34d6321c2920..79d1e5ce274c 100644 --- a/numpy/_core/src/common/blas_utils.h +++ b/numpy/_core/src/common/blas_utils.h @@ -2,20 +2,19 @@ #include -/* NPY_BLAS_CHECK_FPE_SUPPORT controls whether we need a runtime check +/* + * NPY_BLAS_CHECK_FPE_SUPPORT controls whether we need a runtime check * for floating-point error (FPE) support in BLAS. + * The known culprit right now is SVM likely only on mac, but that is not + * quite clear. + * This checks always on all ARM (it is a small check overall). */ -#if defined(__APPLE__) && defined(__aarch64__) && defined(ACCELERATE_NEW_LAPACK) +#if defined(__APPLE__) && defined(__aarch64__) && defined(HAVE_CBLAS) #define NPY_BLAS_CHECK_FPE_SUPPORT 1 #else #define NPY_BLAS_CHECK_FPE_SUPPORT 0 #endif -/* Initialize BLAS environment, if needed - */ -NPY_VISIBILITY_HIDDEN void -npy_blas_init(void); - /* Runtime check if BLAS supports floating-point errors. * true - BLAS supports FPE and one can rely on them to indicate errors * false - BLAS does not support FPE. Special handling needed for FPE state @@ -23,6 +22,10 @@ npy_blas_init(void); NPY_VISIBILITY_HIDDEN bool npy_blas_supports_fpe(void); +/* Allow setting the BLAS FPE flag from Python.*/ +NPY_VISIBILITY_HIDDEN bool +npy_set_blas_supports_fpe(bool value); + /* If BLAS supports FPE, exactly the same as npy_get_floatstatus_barrier(). * Otherwise, we can't rely on FPE state and need special handling. */ diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c index 4ab3f5bae02c..73ef0760d979 100644 --- a/numpy/_core/src/multiarray/multiarraymodule.c +++ b/numpy/_core/src/multiarray/multiarraymodule.c @@ -4429,7 +4429,6 @@ _populate_finfo_constants(PyObject *NPY_UNUSED(self), PyObject *args) } - static PyObject * _set_numpy_warn_if_no_mem_policy(PyObject *NPY_UNUSED(self), PyObject *arg) { @@ -4448,6 +4447,25 @@ _set_numpy_warn_if_no_mem_policy(PyObject *NPY_UNUSED(self), PyObject *arg) } +static PyObject * +_blas_supports_fpe(PyObject *NPY_UNUSED(self), PyObject *arg) { + if (arg == Py_None) { + return PyBool_FromLong(npy_blas_supports_fpe()); + } + else if (arg == Py_True) { + return PyBool_FromLong(npy_set_blas_supports_fpe(true)); + } + else if (arg == Py_False) { + return PyBool_FromLong(npy_set_blas_supports_fpe(false)); + } + else { + PyErr_SetString(PyExc_TypeError, + "BLAS FPE support must be None, True, or False"); + return NULL; + } +} + + static PyObject * _reload_guard(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args)) { #if !defined(PYPY_VERSION) @@ -4688,6 +4706,8 @@ static struct PyMethodDef array_module_methods[] = { METH_NOARGS, NULL}, {"_set_madvise_hugepage", (PyCFunction)_set_madvise_hugepage, METH_O, NULL}, + {"_blas_supports_fpe", (PyCFunction)_blas_supports_fpe, + METH_O, "BLAS FPE support pass None, True, or False and returns new value"}, {"_reload_guard", (PyCFunction)_reload_guard, METH_NOARGS, "Give a warning on reload and big warning in sub-interpreters."}, @@ -4904,10 +4924,6 @@ _multiarray_umath_exec(PyObject *m) { return -1; } -#if NPY_BLAS_CHECK_FPE_SUPPORT - npy_blas_init(); -#endif - #if defined(MS_WIN64) && defined(__GNUC__) PyErr_WarnEx(PyExc_Warning, "Numpy built with MINGW-W64 on Windows 64 bits is experimental, " \ diff --git a/numpy/lib/_utils_impl.py b/numpy/lib/_utils_impl.py index 2e1ee23d7d58..164aa4ee3d8c 100644 --- a/numpy/lib/_utils_impl.py +++ b/numpy/lib/_utils_impl.py @@ -61,6 +61,11 @@ def show_runtime(): "not_found": features_not_found } }) + config_found.append({ + "ignore_floating_point_errors_in_matmul": + not np._core._multiarray_umath._blas_supports_fpe(None), + }) + try: from threadpoolctl import threadpool_info config_found.extend(threadpool_info()) diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py index 9be98f9d2fbe..ed928a5ec7b4 100644 --- a/numpy/testing/_private/utils.py +++ b/numpy/testing/_private/utils.py @@ -90,14 +90,7 @@ class KnownFailureException(Exception): IS_PYPY = sys.implementation.name == 'pypy' IS_PYSTON = hasattr(sys, "pyston_version_info") HAS_REFCOUNT = getattr(sys, 'getrefcount', None) is not None and not IS_PYSTON -BLAS_SUPPORTS_FPE = True -if platform.system() == 'Darwin' or platform.machine() == 'arm64': - try: - blas = np.__config__.CONFIG['Build Dependencies']['blas'] - if blas['name'] == 'accelerate': - BLAS_SUPPORTS_FPE = False - except KeyError: - pass +BLAS_SUPPORTS_FPE = np._core._multiarray_umath._blas_supports_fpe(None) HAS_LAPACK64 = numpy.linalg._umath_linalg._ilp64