-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
ENH: stats.epps_singleton_2samp: vectorize implementation #23881
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,7 +9,9 @@ | |
| from . import distributions | ||
| from ._common import ConfidenceInterval | ||
| from ._continuous_distns import norm | ||
| from scipy._lib._array_api import xp_capabilities | ||
| from scipy._lib._array_api import xp_capabilities, array_namespace, xp_size | ||
| from scipy._lib._util import _apply_over_batch | ||
| import scipy._lib.array_api_extra as xpx | ||
| from scipy.special import gamma, kv, gammaln | ||
| from scipy.fft import ifft | ||
| from ._stats_pythran import _a_ij_Aij_Dij2 | ||
|
|
@@ -27,9 +29,15 @@ | |
| ('statistic', 'pvalue')) | ||
|
|
||
|
|
||
| # remove when array-api-extra#502 is resolved | ||
| @_apply_over_batch(('x', 2)) | ||
| def cov(x): | ||
| return xpx.cov(x) | ||
|
|
||
|
|
||
| @xp_capabilities(np_only=True) | ||
| @_axis_nan_policy_factory(Epps_Singleton_2sampResult, n_samples=2, too_small=4) | ||
| def epps_singleton_2samp(x, y, t=(0.4, 0.8)): | ||
| def epps_singleton_2samp(x, y, t=(0.4, 0.8), *, axis=0): | ||
lucascolley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| """Compute the Epps-Singleton (ES) test statistic. | ||
|
|
||
| Test the null hypothesis that two samples have the same underlying | ||
|
|
@@ -46,6 +54,11 @@ def epps_singleton_2samp(x, y, t=(0.4, 0.8)): | |
| to be evaluated. It should be positive distinct numbers. The default | ||
| value (0.4, 0.8) is proposed in [1]_. Input must not have more than | ||
| one dimension. | ||
| axis : int or tuple of ints, default: None | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops I missed this the default axis is 0
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| If an int or tuple of ints, the axis or axes of the input along which | ||
| to compute the statistic. The statistic of each axis-slice (e.g. row) | ||
| of the input will appear in a corresponding element of the output. | ||
| If ``None``, the input will be raveled before computing the statistic. | ||
|
|
||
| Returns | ||
| ------- | ||
|
|
@@ -96,13 +109,16 @@ def epps_singleton_2samp(x, y, t=(0.4, 0.8)): | |
| function", The Stata Journal 9(3), p. 454--465, 2009. | ||
|
|
||
| """ | ||
| np = array_namespace(x, y) | ||
| # x and y are converted to arrays by the decorator | ||
mdhaber marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # and `axis` is guaranteed to be -1. | ||
| t = np.asarray(t) | ||
| # check if x and y are valid inputs | ||
| nx, ny = len(x), len(y) | ||
| if (nx < 5) or (ny < 5): | ||
| nx, ny = x.shape[-1], y.shape[-1] | ||
| if (nx < 5) or (ny < 5): # only used by test_axis_nan_policy | ||
| raise ValueError('x and y should have at least 5 elements, but len(x) ' | ||
| f'= {nx} and len(y) = {ny}.') | ||
| # should replace this behavior by returning NaN | ||
| if not np.isfinite(x).all(): | ||
| raise ValueError('x must not contain nonfinite values.') | ||
| if not np.isfinite(y).all(): | ||
|
|
@@ -112,42 +128,44 @@ def epps_singleton_2samp(x, y, t=(0.4, 0.8)): | |
| # check if t is valid | ||
| if t.ndim > 1: | ||
| raise ValueError(f't must be 1d, but t.ndim equals {t.ndim}.') | ||
| if np.less_equal(t, 0).any(): | ||
| if np.any(t <= 0): | ||
| raise ValueError('t must contain positive elements only.') | ||
|
|
||
| # rescale t with semi-iqr as proposed in [1]; import iqr here to avoid | ||
| # circular import | ||
| from scipy.stats import iqr | ||
| sigma = iqr(np.hstack((x, y))) / 2 | ||
| ts = np.reshape(t, (-1, 1)) / sigma | ||
| sigma = iqr(np.concat((x, y), axis=-1), axis=-1, keepdims=True) / 2 | ||
| ts = np.reshape(t, (-1,) + (1,)*x.ndim) / sigma | ||
|
|
||
| # covariance estimation of ES test | ||
| gx = np.vstack((np.cos(ts*x), np.sin(ts*x))).T # shape = (nx, 2*len(t)) | ||
| gy = np.vstack((np.cos(ts*y), np.sin(ts*y))).T | ||
| cov_x = np.cov(gx.T, bias=True) # the test uses biased cov-estimate | ||
| cov_y = np.cov(gy.T, bias=True) | ||
| gx = np.concat((np.cos(ts*x), np.sin(ts*x)), axis=0) | ||
| gy = np.concat((np.cos(ts*y), np.sin(ts*y)), axis=0) | ||
| gx, gy = np.moveaxis(gx, 0, -2), np.moveaxis(gy, 0, -2) | ||
| cov_x = cov(gx) * (nx-1)/nx # the test uses biased cov-estimate | ||
| cov_y = cov(gy) * (ny-1)/ny | ||
| est_cov = (n/nx)*cov_x + (n/ny)*cov_y | ||
| est_cov_inv = np.linalg.pinv(est_cov) | ||
| r = np.linalg.matrix_rank(est_cov_inv) | ||
| if r < 2*len(t): | ||
| if np.any(r < 2*xp_size(t)): | ||
| warnings.warn('Estimated covariance matrix does not have full rank. ' | ||
| 'This indicates a bad choice of the input t and the ' | ||
| 'test might not be consistent.', # see p. 183 in [1]_ | ||
| stacklevel=2) | ||
|
|
||
| # compute test statistic w distributed asympt. as chisquare with df=r | ||
| g_diff = np.mean(gx, axis=0) - np.mean(gy, axis=0) | ||
| w = n*np.dot(g_diff.T, np.dot(est_cov_inv, g_diff)) | ||
| g_diff = np.mean(gx, axis=-1, keepdims=True) - np.mean(gy, axis=-1, keepdims=True) | ||
| w = n*np.matmul(np.matrix_transpose(g_diff), np.matmul(est_cov_inv, g_diff)) | ||
| w = w[..., 0, 0] | ||
|
|
||
| # apply small-sample correction | ||
| if (max(nx, ny) < 25): | ||
| corr = 1.0/(1.0 + n**(-0.45) + 10.1*(nx**(-1.7) + ny**(-1.7))) | ||
| w = corr * w | ||
| w *= corr | ||
|
|
||
| chi2 = _stats_py._SimpleChi2(r) | ||
| p = _stats_py._get_pvalue(w, chi2, alternative='greater', symmetric=False, xp=np) | ||
|
|
||
| return Epps_Singleton_2sampResult(w, p) | ||
| return Epps_Singleton_2sampResult(w[()], p[()]) | ||
|
|
||
|
|
||
| @xp_capabilities(np_only=True) | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.

Uh oh!
There was an error while loading. Please reload this page.