diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 84838bed686fb..35f18bb125a43 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -172,6 +172,12 @@ Changelog now accepts string format or callable to generate feature names. :pr:`28934` by :user:`Marc Bresson `. +:mod:`sklearn.cross_decomposition` +.................................. + +- |Fix| :class:`cross_decomposition.PLSRegression` properly raises an error when + `n_components` is larger than `n_samples`. :pr:`29710` by `Thomas Fan`_. + :mod:`sklearn.datasets` ....................... diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py index 16024cf961d27..ac1e8035590d6 100644 --- a/sklearn/cross_decomposition/_pls.py +++ b/sklearn/cross_decomposition/_pls.py @@ -291,7 +291,9 @@ def fit(self, X, y=None, Y=None): # With PLSRegression n_components is bounded by the rank of (X.T X) see # Wegelin page 25. With CCA and PLSCanonical, n_components is bounded # by the rank of X and the rank of Y: see Wegelin page 12 - rank_upper_bound = p if self.deflation_mode == "regression" else min(n, p, q) + rank_upper_bound = ( + min(n, p) if self.deflation_mode == "regression" else min(n, p, q) + ) if n_components > rank_upper_bound: raise ValueError( f"`n_components` upper bound is {rank_upper_bound}. " diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py index c8de4ad8a78de..381868b9b60b0 100644 --- a/sklearn/cross_decomposition/tests/test_pls.py +++ b/sklearn/cross_decomposition/tests/test_pls.py @@ -480,6 +480,17 @@ def test_n_components_upper_bounds(Estimator): est.fit(X, Y) +def test_n_components_upper_PLSRegression(): + """Check the validation of `n_components` upper bounds for PLSRegression.""" + rng = np.random.RandomState(0) + X = rng.randn(20, 64) + Y = rng.randn(20, 3) + est = PLSRegression(n_components=30) + err_msg = "`n_components` upper bound is 20. Got 30 instead. Reduce `n_components`." + with pytest.raises(ValueError, match=err_msg): + est.fit(X, Y) + + @pytest.mark.parametrize("n_samples, n_features", [(100, 10), (100, 200)]) def test_singular_value_helpers(n_samples, n_features, global_random_seed): # Make sure SVD and power method give approximately the same results