From 069d1431164ec7588b4bb86f2f3c72bc89ee7edd Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 18 Sep 2024 18:39:58 +0200 Subject: [PATCH 1/2] TST check that we support multioutput custom scorer in RidgeCV --- doc/whats_new/v1.6.rst | 5 ++++ sklearn/linear_model/tests/test_ridge.py | 29 ++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 22a0d7acfd24e..f17eccf2b644b 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -257,6 +257,11 @@ Changelog for the calculation of test scores. :pr:`29419` by :user:`Shruti Nath `. +- |Fix| :class:`linear_model.RidgeCV` supports properly custom multioutput scorers by + letting the scorer managing the multioutput averaging. Previously, the predictions + and true targets where both squeezed to a 1d array before computing the error. + :pr:`xxx` by :user:`Guillaume Lemaitre `. + - |API| Deprecates `copy_X` in :class:`linear_model.TheilSenRegressor` as the parameter has no effect. `copy_X` will be removed in 1.8. :pr:`29105` by :user:`Adam Li `. diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index cb42e1a473647..a7a9c429a45f0 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -2278,6 +2278,35 @@ def test_ridge_cv_multioutput_sample_weight(global_random_seed): assert_allclose(ridge_cv.best_score_, -mean_squared_error(y, y_pred_loo)) +def test_ridge_cv_custom_multioutput_scorer(): + """Check that `RidgeCV` works properly with a custom multioutput scorer.""" + X, y = make_regression(n_targets=2, random_state=0) + + def custom_error(y_true, y_pred): + errors = (y_true - y_pred) ** 2 + mean_errors = np.mean(errors, axis=0) + if mean_errors.ndim == 1: + # case of multioutput + return -np.average(mean_errors, weights=[2, 1]) + # single output + return -mean_errors + + def custom_multioutput_scorer(estimator, X, y): + """Multioutput score that give twice more importance to the second target.""" + return -custom_error(y, estimator.predict(X)) + + ridge_cv = RidgeCV(scoring=custom_multioutput_scorer) + ridge_cv.fit(X, y) + + cv = LeaveOneOut() + ridge = Ridge(alpha=ridge_cv.alpha_) + y_pred_loo = np.squeeze( + [ridge.fit(X[train], y[train]).predict(X[test]) for train, test in cv.split(X)] + ) + + assert_allclose(ridge_cv.best_score_, -custom_error(y, y_pred_loo)) + + # Metadata Routing Tests # ====================== From 11d8f45715479097501c49cc565277d96a633ae0 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 18 Sep 2024 18:41:26 +0200 Subject: [PATCH 2/2] update changelog --- doc/whats_new/v1.6.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index f17eccf2b644b..aa16d2b207a4d 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -260,7 +260,7 @@ Changelog - |Fix| :class:`linear_model.RidgeCV` supports properly custom multioutput scorers by letting the scorer managing the multioutput averaging. Previously, the predictions and true targets where both squeezed to a 1d array before computing the error. - :pr:`xxx` by :user:`Guillaume Lemaitre `. + :pr:`29884` by :user:`Guillaume Lemaitre `. - |API| Deprecates `copy_X` in :class:`linear_model.TheilSenRegressor` as the parameter has no effect. `copy_X` will be removed in 1.8.