From d0d485c4aeea315bb6effc25334a0ab298229ca3 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Fri, 15 Oct 2021 12:07:58 +0200 Subject: [PATCH 01/15] Add validation on alpha --- sklearn/linear_model/_ridge.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 1dcc81e3b988f..7bf2548de9ca3 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -13,6 +13,7 @@ import warnings import numpy as np +import numbers from scipy import linalg from scipy import sparse from scipy import optimize @@ -26,6 +27,7 @@ from ..utils.extmath import row_norms from ..utils import check_array from ..utils import check_consistent_length +from ..utils import check_scalar from ..utils import compute_sample_weight from ..utils import column_or_1d from ..utils.validation import _check_sample_weight @@ -564,8 +566,11 @@ def _ridge_regression( % (alpha.size, n_targets) ) - if alpha.size == 1 and n_targets > 1: - alpha = np.repeat(alpha, n_targets) + if alpha.size == 1: + check_scalar(alpha, "alpha", target_type=numbers.Real, + min_val=0.0, include_boundaries="left") + if n_targets > 1: + alpha = np.repeat(alpha, n_targets) n_iter = None if solver == "sparse_cg": From 553143d6ea75b1700eada58e2fe2dfdb02664788 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Tue, 19 Oct 2021 11:36:48 +0200 Subject: [PATCH 02/15] Modify alpha inplace for validation --- sklearn/linear_model/_ridge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 7bf2548de9ca3..bf507f5b95d17 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -567,8 +567,8 @@ def _ridge_regression( ) if alpha.size == 1: - check_scalar(alpha, "alpha", target_type=numbers.Real, - min_val=0.0, include_boundaries="left") + alpha[0] = check_scalar(alpha[0], "alpha", target_type=numbers.Real, + min_val=0.0, include_boundaries="left") if n_targets > 1: alpha = np.repeat(alpha, n_targets) From eb77f3585202c17c7a2552cf2442274e7cbb6230 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Tue, 19 Oct 2021 13:42:27 +0200 Subject: [PATCH 03/15] Format code with pre-commit config --- sklearn/linear_model/_ridge.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index bf507f5b95d17..8410dd37a1e7c 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -567,8 +567,13 @@ def _ridge_regression( ) if alpha.size == 1: - alpha[0] = check_scalar(alpha[0], "alpha", target_type=numbers.Real, - min_val=0.0, include_boundaries="left") + alpha[0] = check_scalar( + alpha[0], + "alpha", + target_type=numbers.Real, + min_val=0.0, + include_boundaries="left", + ) if n_targets > 1: alpha = np.repeat(alpha, n_targets) From c55bb551ba3441d5288e74fc62a7963ef0ad3710 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 20 Oct 2021 11:13:17 +0200 Subject: [PATCH 04/15] Validate max_iter for Ridge Also corrects validation for alpha --- sklearn/linear_model/_ridge.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 8410dd37a1e7c..d055c91193f89 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -558,6 +558,15 @@ def _ridge_regression( # we implement sample_weight via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) + if not isinstance(alpha, (np.ndarray, type(None), tuple)): + alpha = check_scalar( + alpha, + "alpha", + target_type=numbers.Real, + min_val=0.0, + include_boundaries="left", + ) + # There should be either 1 or n_targets penalties alpha = np.asarray(alpha, dtype=X.dtype).ravel() if alpha.size not in [1, n_targets]: @@ -566,16 +575,8 @@ def _ridge_regression( % (alpha.size, n_targets) ) - if alpha.size == 1: - alpha[0] = check_scalar( - alpha[0], - "alpha", - target_type=numbers.Real, - min_val=0.0, - include_boundaries="left", - ) - if n_targets > 1: - alpha = np.repeat(alpha, n_targets) + if alpha.size == 1 and n_targets > 1: + alpha = np.repeat(alpha, n_targets) n_iter = None if solver == "sparse_cg": @@ -761,6 +762,11 @@ def fit(self, X, y, sample_weight=None): if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) + if self.max_iter is not None: + self.max_iter = check_scalar( + self.max_iter, "max_iter", target_type=numbers.Integral, min_val=1 + ) + # when X is sparse we only remove offset from y X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, From 88a22442f87a2c03c4fee9f1c8c3b75a79b90433 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 20 Oct 2021 12:01:55 +0200 Subject: [PATCH 05/15] Add tests for alpha and max_iter validation --- sklearn/linear_model/tests/test_ridge.py | 30 ++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index bfc6722737bd8..a24d3c222cace 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -335,6 +335,36 @@ def test_ridge_individual_penalties(): ridge.fit(X, y) +@pytest.mark.parametrize( + "params, err_type, err_msg", + [ + ({"alpha": -1}, ValueError, "alpha == -1, must be >= 0.0"), + ( + {"alpha": "1"}, + TypeError, + "alpha must be an instance of , not ", + ), + ({"max_iter": 0}, ValueError, "max_iter == 0, must be >= 1."), + ( + {"max_iter": "1"}, + TypeError, + "max_iter must be an instance of , not ", + ), + ], +) +def test_Ridge_params_validation(params, err_type, err_msg): + """Check the parameters validation in `Ridge`.""" + + rng = np.random.RandomState(42) + n_samples, n_features, n_targets = 20, 10, 5 + X = rng.randn(n_samples, n_features) + y = rng.randn(n_samples, n_targets) + + with pytest.raises(err_type, match=err_msg): + Ridge(**params).fit(X, y) + + @pytest.mark.parametrize("n_col", [(), (1,), (3,)]) def test_X_CenterStackOp(n_col): rng = np.random.RandomState(0) From 1475fd098df4b7c54380808a86e654c916e64225 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 20 Oct 2021 16:20:36 +0200 Subject: [PATCH 06/15] Validate tol for Ridge --- sklearn/linear_model/_ridge.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index d055c91193f89..5c4f15525f355 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -767,6 +767,8 @@ def fit(self, X, y, sample_weight=None): self.max_iter, "max_iter", target_type=numbers.Integral, min_val=1 ) + self.tol = check_scalar(self.tol, "tol", target_type=float, min_val=0.0) + # when X is sparse we only remove offset from y X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, From fe56c15a7130db03e4bd5ca9f2db9edc9e66ce39 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 20 Oct 2021 16:23:01 +0200 Subject: [PATCH 07/15] Add tests for tol validation --- sklearn/linear_model/tests/test_ridge.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index a24d3c222cace..7a438af3afab3 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -351,6 +351,17 @@ def test_ridge_individual_penalties(): "max_iter must be an instance of , not ", ), + ({"tol": -1.0}, ValueError, "tol == -1.0, must be >= 0."), + ( + {"tol": 1}, + TypeError, + "tol must be an instance of , not ", + ), + ( + {"tol": "1"}, + TypeError, + "tol must be an instance of , not ", + ), ], ) def test_Ridge_params_validation(params, err_type, err_msg): From f427dc257483f52260c49837da3488069e9133fd Mon Sep 17 00:00:00 2001 From: ArturoAmor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Wed, 10 Nov 2021 15:35:33 +0100 Subject: [PATCH 08/15] Update sklearn/linear_model/_ridge.py This change is indeed independent of `RidgeCV`. This commit adds an explanation on the need for a conditional statement during validation. Co-authored-by: Julien Jerphanion --- sklearn/linear_model/_ridge.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 5c4f15525f355..cf95eb9c31dbc 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -558,6 +558,8 @@ def _ridge_regression( # we implement sample_weight via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) + # Some callers of this method might pass alpha as single + # element array which already has been validated. if not isinstance(alpha, (np.ndarray, type(None), tuple)): alpha = check_scalar( alpha, From 4c0bce27a640c9faf80fc3ea2a22afe8883c2b65 Mon Sep 17 00:00:00 2001 From: ArturoAmor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Wed, 10 Nov 2021 15:36:41 +0100 Subject: [PATCH 09/15] Update sklearn/linear_model/tests/test_ridge.py Tweak to adopt conventions Co-authored-by: Julien Jerphanion --- sklearn/linear_model/tests/test_ridge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 7a438af3afab3..45d24bc87c045 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -364,8 +364,8 @@ def test_ridge_individual_penalties(): ), ], ) -def test_Ridge_params_validation(params, err_type, err_msg): - """Check the parameters validation in `Ridge`.""" +def test_ridge_params_validation(params, err_type, err_msg): + """Check the parameters validation in Ridge.""" rng = np.random.RandomState(42) n_samples, n_features, n_targets = 20, 10, 5 From d397336670547d1363a0b821598285ae6c62b14f Mon Sep 17 00:00:00 2001 From: ArturoAmor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Mon, 22 Nov 2021 15:22:55 +0100 Subject: [PATCH 10/15] Improve readability Co-authored-by: Olivier Grisel --- sklearn/linear_model/_ridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index cf95eb9c31dbc..015734d7a51c1 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -560,7 +560,7 @@ def _ridge_regression( # Some callers of this method might pass alpha as single # element array which already has been validated. - if not isinstance(alpha, (np.ndarray, type(None), tuple)): + if alpha is not None and not isinstance(alpha, (np.ndarray, tuple)): alpha = check_scalar( alpha, "alpha", From 113bdf2982c65609c9c42e1acfa4f835e408bb77 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Mon, 22 Nov 2021 16:02:51 +0100 Subject: [PATCH 11/15] Change `tol` from to --- sklearn/linear_model/_ridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 015734d7a51c1..7b12f7f972668 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -769,7 +769,7 @@ def fit(self, X, y, sample_weight=None): self.max_iter, "max_iter", target_type=numbers.Integral, min_val=1 ) - self.tol = check_scalar(self.tol, "tol", target_type=float, min_val=0.0) + self.tol = check_scalar(self.tol, "tol", target_type=numbers.Real, min_val=0.0) # when X is sparse we only remove offset from y X, y, X_offset, y_offset, X_scale = self._preprocess_data( From 16a52f1d683d2966ebe852b55d29553a4b288de1 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Mon, 22 Nov 2021 16:05:37 +0100 Subject: [PATCH 12/15] Edit tests to accept `tol` in --- sklearn/linear_model/tests/test_ridge.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 45d24bc87c045..e73c3e82321f9 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -352,15 +352,10 @@ def test_ridge_individual_penalties(): " 'str'>", ), ({"tol": -1.0}, ValueError, "tol == -1.0, must be >= 0."), - ( - {"tol": 1}, - TypeError, - "tol must be an instance of , not ", - ), ( {"tol": "1"}, TypeError, - "tol must be an instance of , not ", + "tol must be an instance of , not ", ), ], ) From 91bec921e80ae5e91d49a9322e495e00cc0566cd Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Mon, 22 Nov 2021 16:08:17 +0100 Subject: [PATCH 13/15] Add changelog entry in `doc/whats_new/v1.1.rst` --- doc/whats_new/v1.1.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index a473908d8f1e7..621d9331e70da 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -68,6 +68,12 @@ Changelog message when the solver does not support sparse matrices with int64 indices. :pr:`21093` by `Tom Dupre la Tour`_. +- |Enhancement| :class:`linear_model.Ridge` and :class:`linear_model.RidgeClassifier` + now raise consistent error message when passed invalid values for `alpha`, + `max_iter` and `tol`. + :pr:`21341` by :user:`Arturo Amor ` + and :user:`Julien Jerphanion `. + :mod:`sklearn.metrics` ...................... From c4659e588dd43cfc58c51d47b313270305f5ebdd Mon Sep 17 00:00:00 2001 From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Wed, 1 Dec 2021 09:43:51 +0100 Subject: [PATCH 14/15] Update doc/whats_new/v1.1.rst Co-authored-by: Julien Jerphanion --- doc/whats_new/v1.1.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 6b83b7b95d7a8..2973213bb8e96 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -144,8 +144,7 @@ Changelog - |Enhancement| :class:`linear_model.Ridge` and :class:`linear_model.RidgeClassifier` now raise consistent error message when passed invalid values for `alpha`, `max_iter` and `tol`. - :pr:`21341` by :user:`Arturo Amor ` - and :user:`Julien Jerphanion `. + :pr:`21341` by :user:`Arturo Amor `. :mod:`sklearn.metrics` ...................... From 47fd0bd5268529c028341b6fd7443744f97fcce9 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 1 Dec 2021 09:55:27 +0100 Subject: [PATCH 15/15] tweak --- doc/whats_new/v1.1.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index c1270d28bf80f..b9651a1e1b6f8 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -191,6 +191,7 @@ Changelog now raise consistent error message when passed invalid values for `alpha`, `max_iter` and `tol`. :pr:`21341` by :user:`Arturo Amor `. + :mod:`sklearn.linear_model` ...........................