From ad46670c2aaa453ff540ac87758292ce24305e8b Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 15 Jun 2022 15:39:15 +0200 Subject: [PATCH 01/16] MAINT fix the way to call stats.mode --- sklearn/impute/_base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index bb4bfed8098bf..606d8ab2c417c 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -51,9 +51,7 @@ def _most_frequent(array, extra_value, n_repeat): if count == most_frequent_count ) else: - mode = stats.mode(array) - most_frequent_value = mode[0][0] - most_frequent_count = mode[1][0] + most_frequent_value, most_frequent_count = stats.mode(array) else: most_frequent_value = 0 most_frequent_count = 0 From 8dfe99ae015c821dd323f8188c2d912fcf5d0c8c Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 16 Jun 2022 10:48:40 +0200 Subject: [PATCH 02/16] Trigger [scipy-dev] From b7acf877543ae873c8eb571350246a64836c680b Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 16 Jun 2022 20:07:12 +0200 Subject: [PATCH 03/16] introduce sklearn.fixes.mode --- sklearn/impute/_base.py | 4 +-- sklearn/neighbors/_classification.py | 5 +-- sklearn/utils/fixes.py | 51 ++++++++++++++++++++++++++++ sklearn/utils/tests/test_extmath.py | 9 +++-- 4 files changed, 63 insertions(+), 6 deletions(-) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index 606d8ab2c417c..ead8582eeaa34 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -9,9 +9,9 @@ import numpy as np import numpy.ma as ma from scipy import sparse as sp -from scipy import stats from ..base import BaseEstimator, TransformerMixin +from ..utils.fixes import mode from ..utils.sparsefuncs import _get_median from ..utils.validation import check_is_fitted from ..utils.validation import FLOAT_DTYPES @@ -51,7 +51,7 @@ def _most_frequent(array, extra_value, n_repeat): if count == most_frequent_count ) else: - most_frequent_value, most_frequent_count = stats.mode(array) + most_frequent_value, most_frequent_count = mode(array) else: most_frequent_value = 0 most_frequent_count = 0 diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index bcad8c71aee07..b35d0340b800c 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -9,8 +9,9 @@ # License: BSD 3 clause (C) INRIA, University of Amsterdam import numpy as np -from scipy import stats + from ..utils.extmath import weighted_mode +from ..utils import fixes from ..utils.validation import _is_arraylike, _num_samples import warnings @@ -234,7 +235,7 @@ def predict(self, X): y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: - mode, _ = stats.mode(_y[neigh_ind, k], axis=1) + mode, _ = fixes.mode(_y[neigh_ind, k], axis=1) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index b0074ae7e3a18..4021eb7f66432 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -39,6 +39,57 @@ except ImportError: # SciPy < 1.8 from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1 # type: ignore # noqa +if sp_version >= parse_version("1.9"): + from scipy.stats import mode +else: + + def mode(a, axis=0, nan_policy="propagate"): + """Return an array of the modal (most common) value in the passed array. + + If there is more than one such value, only the smallest is returned. + The bin-count for the modal bins is also returned. + + Parameters + ---------- + a : array_like + n-dimensional array of which to find mode(s). + axis : int or None, optional + Axis along which to operate. Default is 0. If None, compute over + the whole array `a`. + nan_policy : {'propagate', 'raise', 'omit'}, optional + Defines how to handle when input contains nan. + The following options are available (default is 'propagate'): + * 'propagate': returns nan + * 'raise': throws an error + * 'omit': performs the calculations ignoring nan values + + Returns + ------- + mode : ndarray + Array of modal values. + count : ndarray + Array of counts for each mode. + + Examples + -------- + >>> a = np.array([[6, 8, 3, 0], + ... [3, 2, 1, 7], + ... [8, 1, 8, 4], + ... [5, 3, 0, 5], + ... [4, 7, 5, 9]]) + >>> from scipy import stats + >>> stats.mode(a) + ModeResult(mode=array([3, 1, 0, 0]), count=array([1, 1, 1, 1])) + To get mode of whole array, specify ``axis=None``: + >>> stats.mode(a, axis=None) + ModeResult(mode=3, count=3) + """ + from scipy.stats import mode + from scipy.stats._stats_py import ModeResult + + mode_results = mode(a, axis=axis, nan_policy=nan_policy) + return ModeResult(mode_results.mode[()], mode_results.count[()]) + def _object_dtype_isnan(X): return X != X diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 07a553c8cf09d..a6861a6ea5e82 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -6,7 +6,6 @@ import numpy as np from scipy import sparse from scipy import linalg -from scipy import stats from scipy.sparse.linalg import eigsh from scipy.special import expit @@ -20,6 +19,7 @@ from sklearn.utils._testing import assert_array_almost_equal from sklearn.utils._testing import skip_if_32bit +from sklearn.utils import fixes from sklearn.utils.extmath import density, _safe_accumulator_op from sklearn.utils.extmath import randomized_svd, _randomized_eigsh from sklearn.utils.extmath import row_norms @@ -56,9 +56,14 @@ def test_uniform_weights(): weights = np.ones(x.shape) for axis in (None, 0, 1): - mode, score = stats.mode(x, axis) + mode, score = fixes.mode(x, axis) mode2, score2 = weighted_mode(x, weights, axis=axis) + if fixes.sp_version >= fixes.parse_version("1.9.0") and axis is not None: + # See https://github.com/scipy/scipy/issues/16418 + mode = np.expand_dims(mode, axis=axis) + score = np.expand_dims(score, axis=axis) + assert_array_equal(mode, mode2) assert_array_equal(score, score2) From 3f1dab5759543464e636da4ea42d91273ed2d3bc Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 16 Jun 2022 20:39:30 +0200 Subject: [PATCH 04/16] iter --- sklearn/utils/fixes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index 4021eb7f66432..6ba7de976189d 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -10,6 +10,7 @@ # # License: BSD 3 clause +from collections import namedtuple from functools import update_wrapper import functools @@ -85,7 +86,8 @@ def mode(a, axis=0, nan_policy="propagate"): ModeResult(mode=3, count=3) """ from scipy.stats import mode - from scipy.stats._stats_py import ModeResult + + ModeResult = namedtuple("ModeResult", ("mode", "count")) mode_results = mode(a, axis=axis, nan_policy=nan_policy) return ModeResult(mode_results.mode[()], mode_results.count[()]) From 59c5a26d6c68b11fa99da7c93763bd2388bbf65e Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 16 Jun 2022 21:15:38 +0200 Subject: [PATCH 05/16] iter --- sklearn/utils/fixes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index 6ba7de976189d..b64ae996b6410 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -73,16 +73,17 @@ def mode(a, axis=0, nan_policy="propagate"): Examples -------- + >>> import numpy as np >>> a = np.array([[6, 8, 3, 0], ... [3, 2, 1, 7], ... [8, 1, 8, 4], ... [5, 3, 0, 5], ... [4, 7, 5, 9]]) - >>> from scipy import stats - >>> stats.mode(a) + >>> from sklearn.utils.fixes import mode + >>> mode(a) ModeResult(mode=array([3, 1, 0, 0]), count=array([1, 1, 1, 1])) To get mode of whole array, specify ``axis=None``: - >>> stats.mode(a, axis=None) + >>> mode(a, axis=None) ModeResult(mode=3, count=3) """ from scipy.stats import mode From f49740cb8f70f39ed3214d0ae404ef6438d02d8f Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 16 Jun 2022 23:06:26 +0100 Subject: [PATCH 06/16] iter --- sklearn/utils/fixes.py | 5 +++-- sklearn/utils/tests/test_fixes.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index b64ae996b6410..db59a34b41ec8 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -90,8 +90,9 @@ def mode(a, axis=0, nan_policy="propagate"): ModeResult = namedtuple("ModeResult", ("mode", "count")) - mode_results = mode(a, axis=axis, nan_policy=nan_policy) - return ModeResult(mode_results.mode[()], mode_results.count[()]) + m, c = mode(a, axis=axis, nan_policy=nan_policy) + m, c = m.squeeze(axis=axis), c.squeeze(axis=axis) + return ModeResult(m[()], c[()]) def _object_dtype_isnan(X): diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py index 3566897da5efc..314a1d26f53d6 100644 --- a/sklearn/utils/tests/test_fixes.py +++ b/sklearn/utils/tests/test_fixes.py @@ -13,6 +13,7 @@ from sklearn.utils.fixes import _object_dtype_isnan from sklearn.utils.fixes import loguniform +from sklearn.utils.fixes import mode @pytest.mark.parametrize("dtype, val", ([object, 1], [object, "a"], [float, 1])) @@ -46,3 +47,15 @@ def test_loguniform(low, high, base): assert loguniform(base**low, base**high).rvs(random_state=0) == loguniform( base**low, base**high ).rvs(random_state=0) + + +def test_mode(): + """Check that `mode` returns the expected type of data and array dimensions.""" + a = np.array([[6, 8, 3, 0], [3, 2, 1, 7], [8, 1, 8, 4], [5, 3, 0, 5], [4, 7, 5, 9]]) + mode_result = mode(a) + assert_array_equal(mode_result.mode, np.array([3, 1, 0, 0])) + assert_array_equal(mode_result.count, np.array([1, 1, 1, 1])) + + mode_result = mode(a, axis=None) + assert mode_result.mode == 3 + assert mode_result.count == 3 From f8df7db10eade8057ae734add3dc3a69fa6c972a Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 16 Jun 2022 23:08:17 +0100 Subject: [PATCH 07/16] docstring numpydoc --- sklearn/utils/fixes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index db59a34b41ec8..385cad79dcf4f 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -53,7 +53,7 @@ def mode(a, axis=0, nan_policy="propagate"): Parameters ---------- a : array_like - n-dimensional array of which to find mode(s). + N-dimensional array of which to find mode(s). axis : int or None, optional Axis along which to operate. Default is 0. If None, compute over the whole array `a`. From 4b21ff6827bb9d4911e28ca0d5ca08d71e195350 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 20 Jun 2022 16:35:30 +0200 Subject: [PATCH 08/16] Update sklearn/utils/fixes.py Co-authored-by: Meekail Zain <34613774+Micky774@users.noreply.github.com> --- sklearn/utils/fixes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index 385cad79dcf4f..d8884d3a3c25a 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -82,6 +82,7 @@ def mode(a, axis=0, nan_policy="propagate"): >>> from sklearn.utils.fixes import mode >>> mode(a) ModeResult(mode=array([3, 1, 0, 0]), count=array([1, 1, 1, 1])) + To get mode of whole array, specify ``axis=None``: >>> mode(a, axis=None) ModeResult(mode=3, count=3) From c1b5e8a9701438af23bf7ad82d7a2dd9d9872d03 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 21 Jun 2022 08:57:58 +0200 Subject: [PATCH 09/16] iter --- sklearn/utils/tests/test_extmath.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index a6861a6ea5e82..3e9ce20b4bab4 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -59,7 +59,9 @@ def test_uniform_weights(): mode, score = fixes.mode(x, axis) mode2, score2 = weighted_mode(x, weights, axis=axis) - if fixes.sp_version >= fixes.parse_version("1.9.0") and axis is not None: + if axis is not None: + # TODO: `weighted_mode` should be modified to output the same array + # dimension than `scipy.stats.mode` for scipy >= 1.9 # See https://github.com/scipy/scipy/issues/16418 mode = np.expand_dims(mode, axis=axis) score = np.expand_dims(score, axis=axis) From 7a5c86d03bccf1b3e831b4853a28e7e006472134 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 26 Jul 2022 08:57:31 +0200 Subject: [PATCH 10/16] revert backport --- sklearn/impute/_base.py | 4 +- sklearn/neighbors/_classification.py | 4 +- sklearn/utils/fixes.py | 61 ---------------------------- sklearn/utils/tests/test_extmath.py | 4 +- sklearn/utils/tests/test_fixes.py | 13 ------ 5 files changed, 6 insertions(+), 80 deletions(-) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index ead8582eeaa34..606d8ab2c417c 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -9,9 +9,9 @@ import numpy as np import numpy.ma as ma from scipy import sparse as sp +from scipy import stats from ..base import BaseEstimator, TransformerMixin -from ..utils.fixes import mode from ..utils.sparsefuncs import _get_median from ..utils.validation import check_is_fitted from ..utils.validation import FLOAT_DTYPES @@ -51,7 +51,7 @@ def _most_frequent(array, extra_value, n_repeat): if count == most_frequent_count ) else: - most_frequent_value, most_frequent_count = mode(array) + most_frequent_value, most_frequent_count = stats.mode(array) else: most_frequent_value = 0 most_frequent_count = 0 diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index b35d0340b800c..6194a1dd7cb15 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -9,9 +9,9 @@ # License: BSD 3 clause (C) INRIA, University of Amsterdam import numpy as np +from scipy import stats from ..utils.extmath import weighted_mode -from ..utils import fixes from ..utils.validation import _is_arraylike, _num_samples import warnings @@ -235,7 +235,7 @@ def predict(self, X): y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: - mode, _ = fixes.mode(_y[neigh_ind, k], axis=1) + mode, _ = stats.mode(_y[neigh_ind, k], axis=1) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index d8884d3a3c25a..2939ac33b2784 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -10,7 +10,6 @@ # # License: BSD 3 clause -from collections import namedtuple from functools import update_wrapper import functools @@ -35,66 +34,6 @@ # mypy error: Name 'lobpcg' already defined (possibly by an import) from ..externals._lobpcg import lobpcg # type: ignore # noqa -try: - from scipy.optimize._linesearch import line_search_wolfe2, line_search_wolfe1 -except ImportError: # SciPy < 1.8 - from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1 # type: ignore # noqa - -if sp_version >= parse_version("1.9"): - from scipy.stats import mode -else: - - def mode(a, axis=0, nan_policy="propagate"): - """Return an array of the modal (most common) value in the passed array. - - If there is more than one such value, only the smallest is returned. - The bin-count for the modal bins is also returned. - - Parameters - ---------- - a : array_like - N-dimensional array of which to find mode(s). - axis : int or None, optional - Axis along which to operate. Default is 0. If None, compute over - the whole array `a`. - nan_policy : {'propagate', 'raise', 'omit'}, optional - Defines how to handle when input contains nan. - The following options are available (default is 'propagate'): - * 'propagate': returns nan - * 'raise': throws an error - * 'omit': performs the calculations ignoring nan values - - Returns - ------- - mode : ndarray - Array of modal values. - count : ndarray - Array of counts for each mode. - - Examples - -------- - >>> import numpy as np - >>> a = np.array([[6, 8, 3, 0], - ... [3, 2, 1, 7], - ... [8, 1, 8, 4], - ... [5, 3, 0, 5], - ... [4, 7, 5, 9]]) - >>> from sklearn.utils.fixes import mode - >>> mode(a) - ModeResult(mode=array([3, 1, 0, 0]), count=array([1, 1, 1, 1])) - - To get mode of whole array, specify ``axis=None``: - >>> mode(a, axis=None) - ModeResult(mode=3, count=3) - """ - from scipy.stats import mode - - ModeResult = namedtuple("ModeResult", ("mode", "count")) - - m, c = mode(a, axis=axis, nan_policy=nan_policy) - m, c = m.squeeze(axis=axis), c.squeeze(axis=axis) - return ModeResult(m[()], c[()]) - def _object_dtype_isnan(X): return X != X diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 3e9ce20b4bab4..31e54b236d925 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -6,6 +6,7 @@ import numpy as np from scipy import sparse from scipy import linalg +from scipy import stats from scipy.sparse.linalg import eigsh from scipy.special import expit @@ -19,7 +20,6 @@ from sklearn.utils._testing import assert_array_almost_equal from sklearn.utils._testing import skip_if_32bit -from sklearn.utils import fixes from sklearn.utils.extmath import density, _safe_accumulator_op from sklearn.utils.extmath import randomized_svd, _randomized_eigsh from sklearn.utils.extmath import row_norms @@ -56,7 +56,7 @@ def test_uniform_weights(): weights = np.ones(x.shape) for axis in (None, 0, 1): - mode, score = fixes.mode(x, axis) + mode, score = stats.mode(x, axis) mode2, score2 = weighted_mode(x, weights, axis=axis) if axis is not None: diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py index 314a1d26f53d6..3566897da5efc 100644 --- a/sklearn/utils/tests/test_fixes.py +++ b/sklearn/utils/tests/test_fixes.py @@ -13,7 +13,6 @@ from sklearn.utils.fixes import _object_dtype_isnan from sklearn.utils.fixes import loguniform -from sklearn.utils.fixes import mode @pytest.mark.parametrize("dtype, val", ([object, 1], [object, "a"], [float, 1])) @@ -47,15 +46,3 @@ def test_loguniform(low, high, base): assert loguniform(base**low, base**high).rvs(random_state=0) == loguniform( base**low, base**high ).rvs(random_state=0) - - -def test_mode(): - """Check that `mode` returns the expected type of data and array dimensions.""" - a = np.array([[6, 8, 3, 0], [3, 2, 1, 7], [8, 1, 8, 4], [5, 3, 0, 5], [4, 7, 5, 9]]) - mode_result = mode(a) - assert_array_equal(mode_result.mode, np.array([3, 1, 0, 0])) - assert_array_equal(mode_result.count, np.array([1, 1, 1, 1])) - - mode_result = mode(a, axis=None) - assert mode_result.mode == 3 - assert mode_result.count == 3 From 594d50d9af585a5370dd30aa40edb10a753ed4c4 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 26 Jul 2022 09:00:08 +0200 Subject: [PATCH 11/16] revert missing import --- sklearn/utils/fixes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index 2939ac33b2784..a7ea026d2cb05 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -34,6 +34,11 @@ # mypy error: Name 'lobpcg' already defined (possibly by an import) from ..externals._lobpcg import lobpcg # type: ignore # noqa +try: + from scipy.optimize._linesearch import line_search_wolfe2, line_search_wolfe1 +except ImportError: # SciPy < 1.8 + from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1 # type: ignore # noqa + def _object_dtype_isnan(X): return X != X From 56ce1e24a960845ac35df11a39522819996a2423 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 27 Jul 2022 18:01:27 +0200 Subject: [PATCH 12/16] iter [scipy-dev] --- sklearn/neighbors/_classification.py | 1 - sklearn/utils/fixes.py | 2 +- sklearn/utils/tests/test_extmath.py | 7 ------- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index 06845b70b9b4f..25ee67728e1e7 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -11,7 +11,6 @@ import numpy as np from scipy import stats - from ..utils.extmath import weighted_mode from ..utils.validation import _is_arraylike, _num_samples diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index a7ea026d2cb05..b0074ae7e3a18 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -37,7 +37,7 @@ try: from scipy.optimize._linesearch import line_search_wolfe2, line_search_wolfe1 except ImportError: # SciPy < 1.8 - from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1 # type: ignore # noqa + from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1 # type: ignore # noqa def _object_dtype_isnan(X): diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 31e54b236d925..07a553c8cf09d 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -59,13 +59,6 @@ def test_uniform_weights(): mode, score = stats.mode(x, axis) mode2, score2 = weighted_mode(x, weights, axis=axis) - if axis is not None: - # TODO: `weighted_mode` should be modified to output the same array - # dimension than `scipy.stats.mode` for scipy >= 1.9 - # See https://github.com/scipy/scipy/issues/16418 - mode = np.expand_dims(mode, axis=axis) - score = np.expand_dims(score, axis=axis) - assert_array_equal(mode, mode2) assert_array_equal(score, score2) From e0c77fd46ef69d93cf19482a25c68ad16002775a Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Thu, 4 Aug 2022 04:18:05 -0400 Subject: [PATCH 13/16] FIX Place mode in utils.fixes (#12) --- sklearn/impute/_base.py | 4 ++-- sklearn/neighbors/_classification.py | 4 ++-- sklearn/utils/fixes.py | 12 ++++++++++++ sklearn/utils/tests/test_extmath.py | 4 ++-- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index 34c23145dd220..37d9f29ae8703 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -9,10 +9,10 @@ import numpy as np import numpy.ma as ma from scipy import sparse as sp -from scipy import stats from ..base import BaseEstimator, TransformerMixin from ..utils._param_validation import StrOptions +from ..utils.fixes import _mode from ..utils.sparsefuncs import _get_median from ..utils.validation import check_is_fitted from ..utils.validation import FLOAT_DTYPES @@ -52,7 +52,7 @@ def _most_frequent(array, extra_value, n_repeat): if count == most_frequent_count ) else: - most_frequent_value, most_frequent_count = stats.mode(array) + most_frequent_value, most_frequent_count = _mode(array, keepdims=False) else: most_frequent_value = 0 most_frequent_count = 0 diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index 25ee67728e1e7..32fe73b6ec9b2 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -10,7 +10,7 @@ from numbers import Integral import numpy as np -from scipy import stats +from ..utils.fixes import _mode from ..utils.extmath import weighted_mode from ..utils.validation import _is_arraylike, _num_samples @@ -249,7 +249,7 @@ def predict(self, X): y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: - mode, _ = stats.mode(_y[neigh_ind, k], axis=1) + mode, _ = _mode(_y[neigh_ind, k], axis=1, keepdims=False) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index b0074ae7e3a18..a9db53c6042ec 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -163,3 +163,15 @@ def threadpool_info(): threadpool_info.__doc__ = threadpoolctl.threadpool_info.__doc__ + + +# TODO: Remove when SciPy 1.9 is the minimum supported version +def _mode(a, axis=0, keepdims=None): + if sp_version >= parse_version("1.9.0"): + return scipy.stats.mode(a, axis=axis, keepdims=keepdims) + + results = scipy.stats.mode(a, axis=axis) + if keepdims is None or keepdims: + return results + else: + return results[0][0], results[1][0] diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 07a553c8cf09d..0a68fbc99be14 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -6,7 +6,6 @@ import numpy as np from scipy import sparse from scipy import linalg -from scipy import stats from scipy.sparse.linalg import eigsh from scipy.special import expit @@ -19,6 +18,7 @@ from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import assert_array_almost_equal from sklearn.utils._testing import skip_if_32bit +from sklearn.utils.fixes import _mode from sklearn.utils.extmath import density, _safe_accumulator_op from sklearn.utils.extmath import randomized_svd, _randomized_eigsh @@ -56,7 +56,7 @@ def test_uniform_weights(): weights = np.ones(x.shape) for axis in (None, 0, 1): - mode, score = stats.mode(x, axis) + mode, score = _mode(x, axis, keepdims=True) mode2, score2 = weighted_mode(x, weights, axis=axis) assert_array_equal(mode, mode2) From 91f469f935f060414c3f6447ab02e1e0835b5355 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 4 Aug 2022 10:45:35 +0200 Subject: [PATCH 14/16] iter --- sklearn/utils/fixes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index a9db53c6042ec..85dad5df4694a 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -174,4 +174,4 @@ def _mode(a, axis=0, keepdims=None): if keepdims is None or keepdims: return results else: - return results[0][0], results[1][0] + return results[0], results[1] From 8119ff8ed2ddefae46ef5519def04f7a4cb452c9 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Thu, 4 Aug 2022 10:36:04 -0400 Subject: [PATCH 15/16] CLN Simplify PR --- sklearn/impute/_base.py | 2 +- sklearn/neighbors/_classification.py | 2 +- sklearn/utils/fixes.py | 11 +++-------- sklearn/utils/tests/test_extmath.py | 2 +- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index b329bd77c0def..b97d977cb14bb 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -52,7 +52,7 @@ def _most_frequent(array, extra_value, n_repeat): if count == most_frequent_count ) else: - most_frequent_value, most_frequent_count = _mode(array, keepdims=False) + most_frequent_value, most_frequent_count = _mode(array) else: most_frequent_value = 0 most_frequent_count = 0 diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index 32fe73b6ec9b2..eebd615b2491c 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -249,7 +249,7 @@ def predict(self, X): y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: - mode, _ = _mode(_y[neigh_ind, k], axis=1, keepdims=False) + mode, _ = _mode(_y[neigh_ind, k], axis=1) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index 85dad5df4694a..cdd63e00cd381 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -166,12 +166,7 @@ def threadpool_info(): # TODO: Remove when SciPy 1.9 is the minimum supported version -def _mode(a, axis=0, keepdims=None): +def _mode(a, axis=0): if sp_version >= parse_version("1.9.0"): - return scipy.stats.mode(a, axis=axis, keepdims=keepdims) - - results = scipy.stats.mode(a, axis=axis) - if keepdims is None or keepdims: - return results - else: - return results[0], results[1] + return scipy.stats.mode(a, axis=axis, keepdims=True) + return scipy.stats.mode(a, axis=axis) diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 0a68fbc99be14..14e541bbef2dc 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -56,7 +56,7 @@ def test_uniform_weights(): weights = np.ones(x.shape) for axis in (None, 0, 1): - mode, score = _mode(x, axis, keepdims=True) + mode, score = _mode(x, axis) mode2, score2 = weighted_mode(x, weights, axis=axis) assert_array_equal(mode, mode2) From ddcd6aaa7f8375ece2525a378502f7a70be046a5 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Thu, 4 Aug 2022 11:40:13 -0400 Subject: [PATCH 16/16] CLN Smaller diff --- sklearn/impute/_base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index b97d977cb14bb..225ae249b2107 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -52,7 +52,9 @@ def _most_frequent(array, extra_value, n_repeat): if count == most_frequent_count ) else: - most_frequent_value, most_frequent_count = _mode(array) + mode = _mode(array) + most_frequent_value = mode[0][0] + most_frequent_count = mode[1][0] else: most_frequent_value = 0 most_frequent_count = 0