Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 16 additions & 9 deletions scipy/stats/_hypotests.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from . import distributions
from ._common import ConfidenceInterval
from ._continuous_distns import norm
from scipy._lib._array_api import xp_capabilities, array_namespace, xp_size
from scipy._lib._array_api import xp_capabilities, array_namespace, xp_size, xp_promote
from scipy._lib._util import _apply_over_batch
import scipy._lib.array_api_extra as xpx
from scipy.special import gamma, kv, gammaln
Expand All @@ -29,7 +29,7 @@
('statistic', 'pvalue'))


# remove when array-api-extra#502 is resolved
# remove when array-api-extra#502 is resolved
@_apply_over_batch(('x', 2))
def cov(x):
return xpx.cov(x)
Expand All @@ -54,7 +54,7 @@ def epps_singleton_2samp(x, y, t=(0.4, 0.8), *, axis=0):
to be evaluated. It should be positive distinct numbers. The default
value (0.4, 0.8) is proposed in [1]_. Input must not have more than
one dimension.
axis : int or tuple of ints, default: None
axis : int or tuple of ints, default: 0
If an int or tuple of ints, the axis or axes of the input along which
to compute the statistic. The statistic of each axis-slice (e.g. row)
of the input will appear in a corresponding element of the output.
Expand Down Expand Up @@ -112,17 +112,13 @@ def epps_singleton_2samp(x, y, t=(0.4, 0.8), *, axis=0):
np = array_namespace(x, y)
# x and y are converted to arrays by the decorator
# and `axis` is guaranteed to be -1.
t = np.asarray(t)
x, y = xp_promote(x, y, force_floating=True, xp=np)
t = np.asarray(t, dtype=x.dtype)
Comment on lines +115 to +116
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not worth rerunning CI for but if we are coming back here in the near future then

Suggested change
x, y = xp_promote(x, y, force_floating=True, xp=np)
t = np.asarray(t, dtype=x.dtype)
x, y, t = xp_promote(x, y, t, force_floating=True, xp=np)

Copy link
Contributor Author

@mdhaber mdhaber Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wrote it this way intentionally because even if t is treated as an array internally, I think the user may think of it like a sequence of separate floats. It is 1D and doesn't follow broadcasting rules with the samples. I have similar thoughts about rng in iqr.

I should double check, but I didn't think we wrote xp_promote to accept non-array array-likes, and we don't want to coerce it to an array before dtypes determination because that could upcast everything to the default float.

That said, I am a bit conflicted about it. I'll think some more.

# check if x and y are valid inputs
nx, ny = x.shape[-1], y.shape[-1]
if (nx < 5) or (ny < 5): # only used by test_axis_nan_policy
raise ValueError('x and y should have at least 5 elements, but len(x) '
f'= {nx} and len(y) = {ny}.')
# should replace this behavior by returning NaN
if not np.isfinite(x).all():
raise ValueError('x must not contain nonfinite values.')
if not np.isfinite(y).all():
raise ValueError('y must not contain nonfinite values.')
n = nx + ny

# check if t is valid
Expand All @@ -131,6 +127,15 @@ def epps_singleton_2samp(x, y, t=(0.4, 0.8), *, axis=0):
if np.any(t <= 0):
raise ValueError('t must contain positive elements only.')

# Previously, non-finite input caused an error in linalg functions.
# To prevent an issue in one slice from halting the calculation, replace non-finite
# values with a harmless one, and replace results with NaN at the end.
i_x = ~np.isfinite(x)
i_y = ~np.isfinite(y)
x = xpx.at(x)[i_x].set(1)
y = xpx.at(y)[i_y].set(1)
invalid_result = np.any(i_x, axis=-1) | np.any(i_y, axis=-1)

# rescale t with semi-iqr as proposed in [1]; import iqr here to avoid
# circular import
from scipy.stats import iqr
Expand Down Expand Up @@ -165,6 +170,8 @@ def epps_singleton_2samp(x, y, t=(0.4, 0.8), *, axis=0):
chi2 = _stats_py._SimpleChi2(r)
p = _stats_py._get_pvalue(w, chi2, alternative='greater', symmetric=False, xp=np)

w = xpx.at(w)[invalid_result].set(np.nan)
p = xpx.at(p)[invalid_result].set(np.nan)
return Epps_Singleton_2sampResult(w[()], p[()])


Expand Down
2 changes: 1 addition & 1 deletion scipy/stats/_stats_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -8433,7 +8433,7 @@ def kruskal(*samples, nan_policy='propagate', axis=0):
* 'propagate': returns nan
* 'raise': throws an error
* 'omit': performs the calculations ignoring nan values
axis : int or tuple of ints, default: None
axis : int or tuple of ints, default: 0
If an int or tuple of ints, the axis or axes of the input along which
to compute the statistic. The statistic of each axis-slice (e.g. row)
of the input will appear in a corresponding element of the output.
Expand Down
19 changes: 16 additions & 3 deletions scipy/stats/tests/test_hypotests.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,22 @@ def test_epps_singleton_size(self):
assert_equal(res.pvalue, np.nan)

def test_epps_singleton_nonfinite(self):
# raise error if there are non-finite values
x, y = (1, 2, 3, 4, 5, np.inf), np.arange(10)
assert_raises(ValueError, epps_singleton_2samp, x, y)
rng = np.random.default_rng(83249872384543)
x = rng.random(size=(10, 11))
y = rng.random(size=(10, 12))
i = np.asarray([1, 4, 9]) # arbitrary rows

w_ref, p_ref = epps_singleton_2samp(x, y, axis=-1)
w_ref[i] = np.nan
p_ref[i] = np.nan

x[i[0], 0] = np.nan
x[i[1], 1] = np.inf
y[i[2], 2] = -np.inf
w_res, p_res = epps_singleton_2samp(x, y, axis=-1)

assert_allclose(w_res, w_ref)
assert_allclose(p_res, p_ref)

def test_names(self):
x, y = np.arange(20), np.arange(30)
Expand Down
Loading