Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions numpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,23 @@ def _mac_os_check():
del w
del _mac_os_check

def blas_fpe_check():
# Check if BLAS adds spurious FPEs, mostly seen on M4 arms with Accelerate.
with errstate(all='raise'):
x = ones((20, 20))
try:
x @ x
except FloatingPointError:
res = _core._multiarray_umath._blas_supports_fpe(False)
if res: # res was not modified (hardcoded to True for now)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

res is not modified?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_blas_supports_fpe

I made it _blas_supports_fpe(None) just to keep the C code a very bit simpler, didn't seem useful enough to care?

res is the state after changing it. If we are not on an ARM machine, it will still be True.

add the current state of _blas_supports_fpe() to show_runtime().

Hmmm, yeah that makes sense.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the show runtime

I would prefer we not blanket-disable FPEs on ARM64, and use the import test to disable it if needed.

Sorry, but what do you mean? On non ARM64, it's blanket enabled. On ARM64 it's disabled based on this test. I have not bothered to skip this test on non-ARM64, instead it might give that warning when things seem off.

warnings.warn(
"Spurious warnings given by blas but suppression not "
"set up on this platform. Please open a NumPy issue.",
UserWarning, stacklevel=2)

blas_fpe_check()
del blas_fpe_check

def hugepage_setup():
"""
We usually use madvise hugepages support, but on some old kernels it
Expand Down
2 changes: 1 addition & 1 deletion numpy/_core/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -1109,6 +1109,7 @@ endforeach
# ------------------------------
src_multiarray_umath_common = [
'src/common/array_assign.c',
'src/common/blas_utils.c',
'src/common/gil_utils.c',
'src/common/mem_overlap.c',
'src/common/npy_argparse.c',
Expand All @@ -1123,7 +1124,6 @@ src_multiarray_umath_common = [
]
if have_blas
src_multiarray_umath_common += [
'src/common/blas_utils.c',
'src/common/cblasfuncs.c',
'src/common/python_xerbla.c',
]
Expand Down
96 changes: 9 additions & 87 deletions numpy/_core/src/common/blas_utils.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "numpy/npy_math.h" // npy_get_floatstatus_barrier
#include "numpy/numpyconfig.h" // NPY_VISIBILITY_HIDDEN
#include "blas_utils.h"
#include "npy_cblas.h"

#include <stdbool.h>
#include <stdio.h>
Expand All @@ -11,89 +12,8 @@
#endif

#if NPY_BLAS_CHECK_FPE_SUPPORT

/* Return whether we're running on macOS 15.4 or later
*/
static inline bool
is_macOS_version_15_4_or_later(void){
#if !defined(__APPLE__)
return false;
#else
char *osProductVersion = NULL;
size_t size = 0;
bool ret = false;

// Query how large OS version string should be
if(-1 == sysctlbyname("kern.osproductversion", NULL, &size, NULL, 0)){
goto cleanup;
}

osProductVersion = malloc(size + 1);

// Get the OS version string
if(-1 == sysctlbyname("kern.osproductversion", osProductVersion, &size, NULL, 0)){
goto cleanup;
}

osProductVersion[size] = '\0';

// Parse the version string
int major = 0, minor = 0;
if(2 > sscanf(osProductVersion, "%d.%d", &major, &minor)) {
goto cleanup;
}

if (major > 15 || (major == 15 && minor >= 4)) {
ret = true;
}

cleanup:
if(osProductVersion){
free(osProductVersion);
}

return ret;
#endif
}

/* ARM Scalable Matrix Extension (SME) raises all floating-point error flags
* when it's used regardless of values or operations. As a consequence,
* when SME is used, all FPE state is lost and special handling is needed.
*
* For NumPy, SME is not currently used directly, but can be used via
* BLAS / LAPACK libraries. This function does a runtime check for whether
* BLAS / LAPACK can use SME and special handling around FPE is required.
*/
static inline bool
BLAS_can_use_ARM_SME(void)
{
#if defined(__APPLE__) && defined(__aarch64__) && defined(ACCELERATE_NEW_LAPACK)
// ARM SME can be used by Apple's Accelerate framework for BLAS / LAPACK
// - macOS 15.4+
// - Apple silicon M4+

// Does OS / Accelerate support ARM SME?
if(!is_macOS_version_15_4_or_later()){
return false;
}

// Does hardware support SME?
int has_SME = 0;
size_t size = sizeof(has_SME);
if(-1 == sysctlbyname("hw.optional.arm.FEAT_SME", &has_SME, &size, NULL, 0)){
return false;
}

if(has_SME){
return true;
}
#endif

// default assume SME is not used
return false;
}

/* Static variable to cache runtime check of BLAS FPE support.
/*
* Static variable to cache runtime check of BLAS FPE support.
*/
static bool blas_supports_fpe = true;

Expand All @@ -110,19 +30,21 @@ npy_blas_supports_fpe(void)
#endif
}

NPY_VISIBILITY_HIDDEN void
npy_blas_init(void)
NPY_VISIBILITY_HIDDEN bool
npy_set_blas_supports_fpe(bool value)
{
#if NPY_BLAS_CHECK_FPE_SUPPORT
blas_supports_fpe = !BLAS_can_use_ARM_SME();
blas_supports_fpe = (bool)value;
return blas_supports_fpe;
#endif
return true; // ignore input not set up on this platform
}

NPY_VISIBILITY_HIDDEN int
npy_get_floatstatus_after_blas(void)
{
#if NPY_BLAS_CHECK_FPE_SUPPORT
if(!blas_supports_fpe){
if (!blas_supports_fpe){
// BLAS does not support FPE and we need to return FPE state.
// Instead of clearing and then grabbing state, just return
// that no flags are set.
Expand Down
17 changes: 10 additions & 7 deletions numpy/_core/src/common/blas_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,30 @@

#include <stdbool.h>

/* NPY_BLAS_CHECK_FPE_SUPPORT controls whether we need a runtime check
/*
* NPY_BLAS_CHECK_FPE_SUPPORT controls whether we need a runtime check
* for floating-point error (FPE) support in BLAS.
* The known culprit right now is SVM likely only on mac, but that is not
* quite clear.
* This checks always on all ARM (it is a small check overall).
*/
#if defined(__APPLE__) && defined(__aarch64__) && defined(ACCELERATE_NEW_LAPACK)
#if defined(__APPLE__) && defined(__aarch64__) && defined(HAVE_CBLAS)
#define NPY_BLAS_CHECK_FPE_SUPPORT 1
#else
#define NPY_BLAS_CHECK_FPE_SUPPORT 0
#endif

/* Initialize BLAS environment, if needed
*/
NPY_VISIBILITY_HIDDEN void
npy_blas_init(void);

/* Runtime check if BLAS supports floating-point errors.
* true - BLAS supports FPE and one can rely on them to indicate errors
* false - BLAS does not support FPE. Special handling needed for FPE state
*/
NPY_VISIBILITY_HIDDEN bool
npy_blas_supports_fpe(void);

/* Allow setting the BLAS FPE flag from Python.*/
NPY_VISIBILITY_HIDDEN bool
npy_set_blas_supports_fpe(bool value);

/* If BLAS supports FPE, exactly the same as npy_get_floatstatus_barrier().
* Otherwise, we can't rely on FPE state and need special handling.
*/
Expand Down
26 changes: 21 additions & 5 deletions numpy/_core/src/multiarray/multiarraymodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -4429,7 +4429,6 @@ _populate_finfo_constants(PyObject *NPY_UNUSED(self), PyObject *args)
}



static PyObject *
_set_numpy_warn_if_no_mem_policy(PyObject *NPY_UNUSED(self), PyObject *arg)
{
Expand All @@ -4448,6 +4447,25 @@ _set_numpy_warn_if_no_mem_policy(PyObject *NPY_UNUSED(self), PyObject *arg)
}


static PyObject *
_blas_supports_fpe(PyObject *NPY_UNUSED(self), PyObject *arg) {
if (arg == Py_None) {
return PyBool_FromLong(npy_blas_supports_fpe());
}
else if (arg == Py_True) {
return PyBool_FromLong(npy_set_blas_supports_fpe(true));
}
else if (arg == Py_False) {
return PyBool_FromLong(npy_set_blas_supports_fpe(false));
}
else {
PyErr_SetString(PyExc_TypeError,
"BLAS FPE support must be None, True, or False");
return NULL;
}
}


static PyObject *
_reload_guard(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args)) {
#if !defined(PYPY_VERSION)
Expand Down Expand Up @@ -4688,6 +4706,8 @@ static struct PyMethodDef array_module_methods[] = {
METH_NOARGS, NULL},
{"_set_madvise_hugepage", (PyCFunction)_set_madvise_hugepage,
METH_O, NULL},
{"_blas_supports_fpe", (PyCFunction)_blas_supports_fpe,
METH_O, "BLAS FPE support pass None, True, or False and returns new value"},
{"_reload_guard", (PyCFunction)_reload_guard,
METH_NOARGS,
"Give a warning on reload and big warning in sub-interpreters."},
Expand Down Expand Up @@ -4904,10 +4924,6 @@ _multiarray_umath_exec(PyObject *m) {
return -1;
}

#if NPY_BLAS_CHECK_FPE_SUPPORT
npy_blas_init();
#endif

#if defined(MS_WIN64) && defined(__GNUC__)
PyErr_WarnEx(PyExc_Warning,
"Numpy built with MINGW-W64 on Windows 64 bits is experimental, " \
Expand Down
5 changes: 5 additions & 0 deletions numpy/lib/_utils_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ def show_runtime():
"not_found": features_not_found
}
})
config_found.append({
"ignore_floating_point_errors_in_matmul":
not np._core._multiarray_umath._blas_supports_fpe(None),
})

try:
from threadpoolctl import threadpool_info
config_found.extend(threadpool_info())
Expand Down
9 changes: 1 addition & 8 deletions numpy/testing/_private/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,7 @@ class KnownFailureException(Exception):
IS_PYPY = sys.implementation.name == 'pypy'
IS_PYSTON = hasattr(sys, "pyston_version_info")
HAS_REFCOUNT = getattr(sys, 'getrefcount', None) is not None and not IS_PYSTON
BLAS_SUPPORTS_FPE = True
if platform.system() == 'Darwin' or platform.machine() == 'arm64':
try:
blas = np.__config__.CONFIG['Build Dependencies']['blas']
if blas['name'] == 'accelerate':
BLAS_SUPPORTS_FPE = False
except KeyError:
pass
BLAS_SUPPORTS_FPE = np._core._multiarray_umath._blas_supports_fpe(None)

HAS_LAPACK64 = numpy.linalg._umath_linalg._ilp64

Expand Down
Loading