From 8b735615e46fd7d4915fbe874331f72b326cea35 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Thu, 4 Nov 2021 11:41:34 +0100 Subject: [PATCH 01/19] MNT Add validation of scalar parameters in Ridge --- sklearn/linear_model/_ridge.py | 42 +++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 1a501e8404f62..fa4957879e44b 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -10,6 +10,7 @@ from abc import ABCMeta, abstractmethod +import numbers import warnings import numpy as np @@ -26,6 +27,7 @@ from ..utils.extmath import row_norms from ..utils import check_array from ..utils import check_consistent_length +from ..utils import check_scalar from ..utils import compute_sample_weight from ..utils import column_or_1d from ..utils.validation import check_is_fitted @@ -1842,12 +1844,40 @@ def fit(self, X, y, sample_weight=None): if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) - self.alphas = np.asarray(self.alphas) - - if np.any(self.alphas <= 0): - raise ValueError( - "alphas must be strictly positive. Got {} containing some " - "negative or null value instead.".format(self.alphas) + if isinstance(self.alphas, (np.ndarray, list, tuple)): + n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas) + if n_alphas != 1: + for alpha in self.alphas: + alpha = check_scalar( + alpha, + "alpha", + target_type=numbers.Real, + min_val=0.0, + include_boundaries="neither", + ) + elif np.ndim(self.alphas) == 0: + self.alphas[()] = check_scalar( + self.alphas[()], + "alpha", + target_type=numbers.Real, + min_val=0.0, + include_boundaries="neither", + ) + else: + self.alphas[0] = check_scalar( + self.alphas[0], + "alpha", + target_type=numbers.Real, + min_val=0.0, + include_boundaries="neither", + ) + else: + self.alphas = check_scalar( + self.alphas, + "alpha", + target_type=numbers.Real, + min_val=0.0, + include_boundaries="neither", ) X, y, X_offset, y_offset, X_scale = LinearModel._preprocess_data( From 4a150f613f56117e30d2178accc367d03488199a Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Thu, 4 Nov 2021 11:50:24 +0100 Subject: [PATCH 02/19] Add test for validation of `alphas` in `RidgeCV` --- sklearn/linear_model/_ridge.py | 11 ++++++----- sklearn/linear_model/tests/test_ridge.py | 17 ++++++++++++++--- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index fa4957879e44b..74d379f09727c 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -1847,10 +1847,10 @@ def fit(self, X, y, sample_weight=None): if isinstance(self.alphas, (np.ndarray, list, tuple)): n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas) if n_alphas != 1: - for alpha in self.alphas: + for index, alpha in enumerate(self.alphas): alpha = check_scalar( alpha, - "alpha", + f"alphas[{index}]", target_type=numbers.Real, min_val=0.0, include_boundaries="neither", @@ -1858,7 +1858,7 @@ def fit(self, X, y, sample_weight=None): elif np.ndim(self.alphas) == 0: self.alphas[()] = check_scalar( self.alphas[()], - "alpha", + "alphas", target_type=numbers.Real, min_val=0.0, include_boundaries="neither", @@ -1866,7 +1866,7 @@ def fit(self, X, y, sample_weight=None): else: self.alphas[0] = check_scalar( self.alphas[0], - "alpha", + "alphas", target_type=numbers.Real, min_val=0.0, include_boundaries="neither", @@ -1874,11 +1874,12 @@ def fit(self, X, y, sample_weight=None): else: self.alphas = check_scalar( self.alphas, - "alpha", + "alphas", target_type=numbers.Real, min_val=0.0, include_boundaries="neither", ) + self.alphas = np.asarray(self.alphas) X, y, X_offset, y_offset, X_scale = LinearModel._preprocess_data( X, diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 58d2804d89aca..5eba2669144a7 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1239,13 +1239,24 @@ def test_ridgecv_negative_alphas(): y = [1, 1, 1, -1, -1] # Negative integers - ridge = RidgeCV(alphas=(-1, -10, -100)) - with pytest.raises(ValueError, match="alphas must be strictly positive"): + ridge = RidgeCV(alphas=(1, -1, -100)) + with pytest.raises(ValueError, match=r"alphas\[1\] == -1, must be > 0.0"): ridge.fit(X, y) # Negative floats ridge = RidgeCV(alphas=(-0.1, -1.0, -10.0)) - with pytest.raises(ValueError, match="alphas must be strictly positive"): + with pytest.raises(ValueError, match=r"alphas\[0\] == -0.1, must be > 0.0"): + ridge.fit(X, y) + + # Positive strings + ridge = RidgeCV(alphas=(1, 1.0, "1")) + with pytest.raises( + TypeError, + match=( + r"alphas\[2\] must be an instance of , not " + ), + ): ridge.fit(X, y) From cf907ba9ced6d24342abe1534c4c9711ecc02094 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Thu, 18 Nov 2021 19:00:18 +0100 Subject: [PATCH 03/19] Add validation when cv is not None --- sklearn/linear_model/_ridge.py | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 74d379f09727c..c27889d32e17a 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -2070,6 +2070,42 @@ def fit(self, X, y, sample_weight=None): raise ValueError("cv!=None and store_cv_values=True are incompatible") if self.alpha_per_target: raise ValueError("cv!=None and alpha_per_target=True are incompatible") + if isinstance(self.alphas, (np.ndarray, list, tuple)): + n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas) + if n_alphas != 1: + for index, alpha in enumerate(self.alphas): + alpha = check_scalar( + alpha, + f"alphas[{index}]", + target_type=numbers.Real, + min_val=0.0, + include_boundaries="neither", + ) + elif np.ndim(self.alphas) == 0: + self.alphas[()] = check_scalar( + self.alphas[()], + "alphas", + target_type=numbers.Real, + min_val=0.0, + include_boundaries="neither", + ) + else: + self.alphas[0] = check_scalar( + self.alphas[0], + "alphas", + target_type=numbers.Real, + min_val=0.0, + include_boundaries="neither", + ) + else: + self.alphas = check_scalar( + self.alphas, + "alphas", + target_type=numbers.Real, + min_val=0.0, + include_boundaries="neither", + ) + self.alphas = np.asarray(self.alphas) parameters = {"alpha": self.alphas} solver = "sparse_cg" if sparse.issparse(X) else "auto" model = RidgeClassifier if is_classifier(self) else Ridge From c76ac26e8eaf0e1b60b79f8bdf9e6be0cd85380c Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Thu, 18 Nov 2021 19:01:23 +0100 Subject: [PATCH 04/19] Modify test to include case when cv is not None --- sklearn/linear_model/tests/test_ridge.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 5eba2669144a7..c21b351bb5c46 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1234,21 +1234,23 @@ def test_ridgecv_int_alphas(): ridge.fit(X, y) -def test_ridgecv_negative_alphas(): +def test_ridgecv_scalar_alphas(): X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] + # The method for fitting _BaseRidgeCV depends whether cv=None + cv = KFold(3) # Negative integers ridge = RidgeCV(alphas=(1, -1, -100)) with pytest.raises(ValueError, match=r"alphas\[1\] == -1, must be > 0.0"): ridge.fit(X, y) - # Negative floats - ridge = RidgeCV(alphas=(-0.1, -1.0, -10.0)) + # Negative floats and cv is not None + ridge = RidgeCV(alphas=(-0.1, -1.0, -10.0), cv=cv) with pytest.raises(ValueError, match=r"alphas\[0\] == -0.1, must be > 0.0"): ridge.fit(X, y) - # Positive strings + # Strings ridge = RidgeCV(alphas=(1, 1.0, "1")) with pytest.raises( TypeError, From c3644013314b129d283b783632a80076bf98c032 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Fri, 19 Nov 2021 09:58:05 +0100 Subject: [PATCH 05/19] Add tests for `alphas` validation in `RidgeClassifierCV` --- sklearn/linear_model/tests/test_ridge.py | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index c21b351bb5c46..4c1de31158b40 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1262,6 +1262,34 @@ def test_ridgecv_scalar_alphas(): ridge.fit(X, y) +def test_ridgeclassifiercv_scalar_alphas(): + X, Y = make_multilabel_classification(n_classes=1, random_state=0) + Y = Y.reshape(-1, 1) + y = np.concatenate([Y, Y], axis=1) + # The method for fitting _BaseRidgeCV depends whether cv=None + cv = KFold(3) + + clf = RidgeClassifierCV(alphas=(1, -1, -100)) + with pytest.raises(ValueError, match=r"alphas\[1\] == -1, must be > 0.0"): + clf.fit(X, y) + + # Negative floats and cv is not None + clf = RidgeClassifierCV(alphas=(-0.1, -1.0, -10.0), cv=cv) + with pytest.raises(ValueError, match=r"alphas\[0\] == -0.1, must be > 0.0"): + clf.fit(X, y) + + # Strings + clf = RidgeClassifierCV(alphas=(1, 1.0, "1")) + with pytest.raises( + TypeError, + match=( + r"alphas\[2\] must be an instance of , not " + ), + ): + clf.fit(X, y) + + def test_raises_value_error_if_solver_not_supported(): # Tests whether a ValueError is raised if a non-identified solver # is passed to ridge_regression From 63623b535af3f1e2fb753396abaac151ed2e8216 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Tue, 23 Nov 2021 17:35:21 +0100 Subject: [PATCH 06/19] Use from instead of specific types --- sklearn/linear_model/_ridge.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index c27889d32e17a..1a11d81c268ce 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -10,6 +10,7 @@ from abc import ABCMeta, abstractmethod +import collections import numbers import warnings @@ -1844,7 +1845,7 @@ def fit(self, X, y, sample_weight=None): if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) - if isinstance(self.alphas, (np.ndarray, list, tuple)): + if isinstance(self.alphas, collections.abc.Collection): n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas) if n_alphas != 1: for index, alpha in enumerate(self.alphas): @@ -2070,7 +2071,7 @@ def fit(self, X, y, sample_weight=None): raise ValueError("cv!=None and store_cv_values=True are incompatible") if self.alpha_per_target: raise ValueError("cv!=None and alpha_per_target=True are incompatible") - if isinstance(self.alphas, (np.ndarray, list, tuple)): + if isinstance(self.alphas, collections.abc.Collection): n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas) if n_alphas != 1: for index, alpha in enumerate(self.alphas): From b0663a9d1b4862074c62490bb3c8516e52b2d9e5 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Fri, 26 Nov 2021 11:23:04 +0100 Subject: [PATCH 07/19] DOC Add whatsnew entry --- doc/whats_new/v1.1.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index cc607aedb3946..28fa50db65150 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -141,6 +141,12 @@ Changelog multilabel classification. :pr:`19689` by :user:`Guillaume Lemaitre `. +- |Enhancement| :class:`linear_model.RidgeCV` and + :class:`linear_model.RidgeClassifierCV` now raise consistent error message + when passed invalid values for `alpha`. + :pr:`21606` by :user:`Arturo Amor ` + and :user:`Julien Jerphanion `. + :mod:`sklearn.metrics` ...................... From 881af91db7f3def0a384ee52d0ef436bfc74ed2f Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Fri, 26 Nov 2021 14:08:37 +0100 Subject: [PATCH 08/19] Factorize `check_scalar` out of `_RidgeGCV` and simplify code Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_ridge.py | 100 ++++++++++----------------------- 1 file changed, 29 insertions(+), 71 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 1a11d81c268ce..237668bc982c0 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -10,6 +10,7 @@ from abc import ABCMeta, abstractmethod +from functools import partial import collections import numbers import warnings @@ -1845,41 +1846,6 @@ def fit(self, X, y, sample_weight=None): if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) - if isinstance(self.alphas, collections.abc.Collection): - n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas) - if n_alphas != 1: - for index, alpha in enumerate(self.alphas): - alpha = check_scalar( - alpha, - f"alphas[{index}]", - target_type=numbers.Real, - min_val=0.0, - include_boundaries="neither", - ) - elif np.ndim(self.alphas) == 0: - self.alphas[()] = check_scalar( - self.alphas[()], - "alphas", - target_type=numbers.Real, - min_val=0.0, - include_boundaries="neither", - ) - else: - self.alphas[0] = check_scalar( - self.alphas[0], - "alphas", - target_type=numbers.Real, - min_val=0.0, - include_boundaries="neither", - ) - else: - self.alphas = check_scalar( - self.alphas, - "alphas", - target_type=numbers.Real, - min_val=0.0, - include_boundaries="neither", - ) self.alphas = np.asarray(self.alphas) X, y, X_offset, y_offset, X_scale = LinearModel._preprocess_data( @@ -2050,6 +2016,33 @@ def fit(self, X, y, sample_weight=None): the validation score. """ cv = self.cv + + check_scalar_alpha = partial( + check_scalar, + target_type=numbers.Real, + min_val=0.0, + include_boundaries="neither", + ) + + if isinstance(self.alphas, collections.abc.Collection): + n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas) + if n_alphas != 1: + for index, alpha in enumerate(self.alphas): + alpha = check_scalar_alpha(alpha, f"alphas[{index}]") + elif np.ndim(self.alphas) == 0: + # converting a single scalar into a np.ndarray + # (e.g. `x=np.array(1)`) + # creates a 0d-array whose value can be accessed with + # `x[()]` + self.alphas[()] = check_scalar_alpha(self.alphas[()], "alphas") + else: + self.alphas[0] = check_scalar_alpha(self.alphas[0], "alphas") + else: + # check for single non-iterable item + self.alphas = check_scalar_alpha(self.alphas, "alphas") + + self.alphas = np.asarray(self.alphas) + if cv is None: estimator = _RidgeGCV( self.alphas, @@ -2071,42 +2064,7 @@ def fit(self, X, y, sample_weight=None): raise ValueError("cv!=None and store_cv_values=True are incompatible") if self.alpha_per_target: raise ValueError("cv!=None and alpha_per_target=True are incompatible") - if isinstance(self.alphas, collections.abc.Collection): - n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas) - if n_alphas != 1: - for index, alpha in enumerate(self.alphas): - alpha = check_scalar( - alpha, - f"alphas[{index}]", - target_type=numbers.Real, - min_val=0.0, - include_boundaries="neither", - ) - elif np.ndim(self.alphas) == 0: - self.alphas[()] = check_scalar( - self.alphas[()], - "alphas", - target_type=numbers.Real, - min_val=0.0, - include_boundaries="neither", - ) - else: - self.alphas[0] = check_scalar( - self.alphas[0], - "alphas", - target_type=numbers.Real, - min_val=0.0, - include_boundaries="neither", - ) - else: - self.alphas = check_scalar( - self.alphas, - "alphas", - target_type=numbers.Real, - min_val=0.0, - include_boundaries="neither", - ) - self.alphas = np.asarray(self.alphas) + parameters = {"alpha": self.alphas} solver = "sparse_cg" if sparse.issparse(X) else "auto" model = RidgeClassifier if is_classifier(self) else Ridge From 26a478b3edb454bded24b79800d665fa8b3c011c Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 1 Dec 2021 10:19:12 +0100 Subject: [PATCH 09/19] tweak --- doc/whats_new/v1.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 28fa50db65150..21e76d9a06eb9 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -143,7 +143,7 @@ Changelog - |Enhancement| :class:`linear_model.RidgeCV` and :class:`linear_model.RidgeClassifierCV` now raise consistent error message - when passed invalid values for `alpha`. + when passed invalid values for `alphas`. :pr:`21606` by :user:`Arturo Amor ` and :user:`Julien Jerphanion `. From f31c4bc3594f1234349a70e6110f4c9c69262050 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 1 Dec 2021 10:20:20 +0100 Subject: [PATCH 10/19] Improve readability of tests Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_ridge.py | 86 ++++++++++++------------ 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 4c1de31158b40..373ce16b10fa0 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1234,60 +1234,58 @@ def test_ridgecv_int_alphas(): ridge.fit(X, y) -def test_ridgecv_scalar_alphas(): +@pytest.mark.parametrize( + "params, err_type, err_msg", + [ + ({"alphas": (1, -1, -100)}, ValueError, r"alphas\[1\] == -1, must be > 0.0"), + ( + {"alphas": (-0.1, -1.0, -10.0)}, + ValueError, + r"alphas\[0\] == -0.1, must be > 0.0", + ), + ( + {"alphas": (1, 1.0, "1")}, + TypeError, + r"alphas\[2\] must be an instance of , not ", + ), + ], +) +def test_ridgecv_scalar_alphas(params, err_type, err_msg): + """Check the parameters validation in RidgeCV.""" + X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] - # The method for fitting _BaseRidgeCV depends whether cv=None - cv = KFold(3) - # Negative integers - ridge = RidgeCV(alphas=(1, -1, -100)) - with pytest.raises(ValueError, match=r"alphas\[1\] == -1, must be > 0.0"): - ridge.fit(X, y) + with pytest.raises(err_type, match=err_msg): + RidgeCV(**params).fit(X, y) - # Negative floats and cv is not None - ridge = RidgeCV(alphas=(-0.1, -1.0, -10.0), cv=cv) - with pytest.raises(ValueError, match=r"alphas\[0\] == -0.1, must be > 0.0"): - ridge.fit(X, y) - # Strings - ridge = RidgeCV(alphas=(1, 1.0, "1")) - with pytest.raises( - TypeError, - match=( +@pytest.mark.parametrize( + "params, err_type, err_msg", + [ + ({"alphas": (1, -1, -100)}, ValueError, r"alphas\[1\] == -1, must be > 0.0"), + ( + {"alphas": (-0.1, -1.0, -10.0)}, + ValueError, + r"alphas\[0\] == -0.1, must be > 0.0", + ), + ( + {"alphas": (1, 1.0, "1")}, + TypeError, r"alphas\[2\] must be an instance of , not " + r" 'str'>", ), - ): - ridge.fit(X, y) - - -def test_ridgeclassifiercv_scalar_alphas(): + ], +) +def test_ridgeclassifiercv_scalar_alphas(params, err_type, err_msg): + """Check the parameters validation in RidgeClassifierCV.""" X, Y = make_multilabel_classification(n_classes=1, random_state=0) Y = Y.reshape(-1, 1) y = np.concatenate([Y, Y], axis=1) - # The method for fitting _BaseRidgeCV depends whether cv=None - cv = KFold(3) - - clf = RidgeClassifierCV(alphas=(1, -1, -100)) - with pytest.raises(ValueError, match=r"alphas\[1\] == -1, must be > 0.0"): - clf.fit(X, y) - - # Negative floats and cv is not None - clf = RidgeClassifierCV(alphas=(-0.1, -1.0, -10.0), cv=cv) - with pytest.raises(ValueError, match=r"alphas\[0\] == -0.1, must be > 0.0"): - clf.fit(X, y) - - # Strings - clf = RidgeClassifierCV(alphas=(1, 1.0, "1")) - with pytest.raises( - TypeError, - match=( - r"alphas\[2\] must be an instance of , not " - ), - ): - clf.fit(X, y) + + with pytest.raises(err_type, match=err_msg): + RidgeClassifierCV(**params).fit(X, y) def test_raises_value_error_if_solver_not_supported(): From 9cf3647d61a2acbf3399c33a2a932e5e808eb52a Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 1 Dec 2021 10:54:00 +0100 Subject: [PATCH 11/19] Fix doc format --- doc/whats_new/v1.1.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 04da373a04638..6375db68e4ba2 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -192,6 +192,7 @@ Changelog when passed invalid values for `alphas`. :pr:`21606` by :user:`Arturo Amor ` and :user:`Julien Jerphanion `. + :mod:`sklearn.linear_model` ........................... From f573c5cc667ca2dde639f14f762a15f46e8e4d00 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 1 Dec 2021 14:11:30 +0100 Subject: [PATCH 12/19] Merge tests for `RidgeCV` and `RidgeClassifierCV` --- sklearn/linear_model/tests/test_ridge.py | 42 ++++++------------------ 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 373ce16b10fa0..2e7b819482134 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1234,6 +1234,7 @@ def test_ridgecv_int_alphas(): ridge.fit(X, y) +@pytest.mark.parametrize("Estimator", [RidgeCV, RidgeClassifierCV]) @pytest.mark.parametrize( "params, err_type, err_msg", [ @@ -1251,41 +1252,18 @@ def test_ridgecv_int_alphas(): ), ], ) -def test_ridgecv_scalar_alphas(params, err_type, err_msg): - """Check the parameters validation in RidgeCV.""" - - X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) - y = [1, 1, 1, -1, -1] +def test_ridgecv_scalar_alphas(Estimator, params, err_type, err_msg): + """Check the `alphas` validation in RidgeCV and RidgeClassifierCV.""" - with pytest.raises(err_type, match=err_msg): - RidgeCV(**params).fit(X, y) - - -@pytest.mark.parametrize( - "params, err_type, err_msg", - [ - ({"alphas": (1, -1, -100)}, ValueError, r"alphas\[1\] == -1, must be > 0.0"), - ( - {"alphas": (-0.1, -1.0, -10.0)}, - ValueError, - r"alphas\[0\] == -0.1, must be > 0.0", - ), - ( - {"alphas": (1, 1.0, "1")}, - TypeError, - r"alphas\[2\] must be an instance of , not ", - ), - ], -) -def test_ridgeclassifiercv_scalar_alphas(params, err_type, err_msg): - """Check the parameters validation in RidgeClassifierCV.""" - X, Y = make_multilabel_classification(n_classes=1, random_state=0) - Y = Y.reshape(-1, 1) - y = np.concatenate([Y, Y], axis=1) + n_samples, n_features = 5, 5 + X = rng.randn(n_samples, n_features) + if Estimator is RidgeCV: + y = rng.randn(n_samples) + else: + y = rng.randint(0, 2, n_samples) with pytest.raises(err_type, match=err_msg): - RidgeClassifierCV(**params).fit(X, y) + Estimator(**params).fit(X, y) def test_raises_value_error_if_solver_not_supported(): From 06ee9fbfece4d5204711d1f6363375b7defd69ef Mon Sep 17 00:00:00 2001 From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Wed, 1 Dec 2021 14:16:33 +0100 Subject: [PATCH 13/19] Update doc/whats_new/v1.1.rst Co-authored-by: Julien Jerphanion --- doc/whats_new/v1.1.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 0f5f5d6e89b1f..329f87813a389 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -190,8 +190,7 @@ Changelog - |Enhancement| :class:`linear_model.RidgeCV` and :class:`linear_model.RidgeClassifierCV` now raise consistent error message when passed invalid values for `alphas`. - :pr:`21606` by :user:`Arturo Amor ` - and :user:`Julien Jerphanion `. + :pr:`21606` by :user:`Arturo Amor `. - |Enhancement| :class:`linear_model.Ridge` and :class:`linear_model.RidgeClassifier` now raise consistent error message when passed invalid values for `alpha`, From 01c001762e82f6ba32cd5dd767c43552fdadd58c Mon Sep 17 00:00:00 2001 From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Wed, 1 Dec 2021 14:17:09 +0100 Subject: [PATCH 14/19] Create local variable to avoid mutation inside of fit Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_ridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 93edd6834adf7..da47b925f725d 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -2060,7 +2060,7 @@ def fit(self, X, y, sample_weight=None): # check for single non-iterable item self.alphas = check_scalar_alpha(self.alphas, "alphas") - self.alphas = np.asarray(self.alphas) + alphas = np.asarray(self.alphas) if cv is None: estimator = _RidgeGCV( From ca7f604b8f5e90482bd385c91aaed206df83db2f Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 1 Dec 2021 14:50:27 +0100 Subject: [PATCH 15/19] Revert use collections.abc instead of specific types --- sklearn/linear_model/_ridge.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index da47b925f725d..86c6659cf032e 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -11,8 +11,6 @@ from abc import ABCMeta, abstractmethod from functools import partial -import collections -import numbers import warnings import numpy as np @@ -2043,7 +2041,7 @@ def fit(self, X, y, sample_weight=None): include_boundaries="neither", ) - if isinstance(self.alphas, collections.abc.Collection): + if isinstance(self.alphas, (np.ndarray, list, tuple)): n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas) if n_alphas != 1: for index, alpha in enumerate(self.alphas): @@ -2064,7 +2062,7 @@ def fit(self, X, y, sample_weight=None): if cv is None: estimator = _RidgeGCV( - self.alphas, + alphas, fit_intercept=self.fit_intercept, normalize=self.normalize, scoring=self.scoring, @@ -2084,7 +2082,7 @@ def fit(self, X, y, sample_weight=None): if self.alpha_per_target: raise ValueError("cv!=None and alpha_per_target=True are incompatible") - parameters = {"alpha": self.alphas} + parameters = {"alpha": alphas} solver = "sparse_cg" if sparse.issparse(X) else "auto" model = RidgeClassifier if is_classifier(self) else Ridge gs = GridSearchCV( From 4626c45bfebb1be4839a3594e23fc1a3ff72caac Mon Sep 17 00:00:00 2001 From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Wed, 1 Dec 2021 15:27:28 +0100 Subject: [PATCH 16/19] Improve test Co-authored-by: Julien Jerphanion --- sklearn/linear_model/tests/test_ridge.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 7f33074356e09..2612a271b3559 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1293,10 +1293,7 @@ def test_ridgecv_scalar_alphas(Estimator, params, err_type, err_msg): n_samples, n_features = 5, 5 X = rng.randn(n_samples, n_features) - if Estimator is RidgeCV: - y = rng.randn(n_samples) - else: - y = rng.randint(0, 2, n_samples) + y = rng.randint(0, 2, n_samples) with pytest.raises(err_type, match=err_msg): Estimator(**params).fit(X, y) From 34898150e2aa573c2db4ca7f9bc73d5b231ef946 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 1 Dec 2021 17:21:03 +0100 Subject: [PATCH 17/19] Improve test name --- sklearn/linear_model/tests/test_ridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 7f33074356e09..47cc54abcd2a5 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1288,7 +1288,7 @@ def test_ridgecv_int_alphas(): ), ], ) -def test_ridgecv_scalar_alphas(Estimator, params, err_type, err_msg): +def test_ridgecv_alphas_validation(Estimator, params, err_type, err_msg): """Check the `alphas` validation in RidgeCV and RidgeClassifierCV.""" n_samples, n_features = 5, 5 From 1950facdbcf877237ebd30e6f85f1b7e992ec3fb Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 1 Dec 2021 17:22:22 +0100 Subject: [PATCH 18/19] Add test to ensure backward compatibility --- sklearn/linear_model/tests/test_ridge.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 47cc54abcd2a5..26cdcc8ce8da5 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1302,6 +1302,24 @@ def test_ridgecv_alphas_validation(Estimator, params, err_type, err_msg): Estimator(**params).fit(X, y) +@pytest.mark.parametrize("Estimator", [RidgeCV, RidgeClassifierCV]) +def test_ridgecv_alphas_scalar(Estimator): + """Check the case when `alphas` is a scalar. + This case was supported in the past when `alphas` where converted + into array in `__init__`. + We add this test to ensure backward compatibility. + """ + + n_samples, n_features = 5, 5 + X = rng.randn(n_samples, n_features) + if Estimator is RidgeCV: + y = rng.randn(n_samples) + else: + y = rng.randint(0, 2, n_samples) + + Estimator(alphas=1).fit(X, y) + + def test_raises_value_error_if_solver_not_supported(): # Tests whether a ValueError is raised if a non-identified solver # is passed to ridge_regression From dcbd73912db95da9311eb881f8b6dfe2b0a484a6 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 1 Dec 2021 17:25:25 +0100 Subject: [PATCH 19/19] Remove case of alphas being a 0d-array --- sklearn/linear_model/_ridge.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 86c6659cf032e..1426201a893aa 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -2046,12 +2046,6 @@ def fit(self, X, y, sample_weight=None): if n_alphas != 1: for index, alpha in enumerate(self.alphas): alpha = check_scalar_alpha(alpha, f"alphas[{index}]") - elif np.ndim(self.alphas) == 0: - # converting a single scalar into a np.ndarray - # (e.g. `x=np.array(1)`) - # creates a 0d-array whose value can be accessed with - # `x[()]` - self.alphas[()] = check_scalar_alpha(self.alphas[()], "alphas") else: self.alphas[0] = check_scalar_alpha(self.alphas[0], "alphas") else: