From 29fc71c40acf3c4feb2a93c4aacb9102ac0972a5 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Fri, 13 Sep 2024 18:41:18 +0200
Subject: [PATCH 1/6] FIX scoring != None for RidgeCV should used unscaled y
 for evaluation

---
 sklearn/linear_model/_ridge.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 827366fab2a25..62c3d9ca3a713 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -2156,13 +2156,20 @@ def fit(self, X, y, sample_weight=None, score_params=None):
                     self.cv_results_[:, i] = squared_errors.ravel()
             else:
                 predictions = y - (c / G_inverse_diag)
+                # represent the predictions in the original scale
+                if predictions.ndim > 1:
+                    predictions /= sqrt_sw[:, None]
+                    unscaled_y = y / sqrt_sw[:, None]
+                else:
+                    predictions /= sqrt_sw
+                    unscaled_y = y / sqrt_sw
                 if self.store_cv_results:
                     self.cv_results_[:, i] = predictions.ravel()
 
                 score_params = score_params or {}
                 alpha_score = self._score(
                     predictions=predictions,
-                    y=y,
+                    y=unscaled_y,
                     n_y=n_y,
                     scorer=scorer,
                     score_params=score_params,

From fbbfeac059c06d218cba84f31020f12240410d6c Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 17 Sep 2024 22:55:31 +0200
Subject: [PATCH 2/6] take into account the intercept

---
 sklearn/linear_model/_ridge.py           | 3 +++
 sklearn/linear_model/tests/test_ridge.py | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 62c3d9ca3a713..5566b50d3c6fe 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -2163,6 +2163,9 @@ def fit(self, X, y, sample_weight=None, score_params=None):
                 else:
                     predictions /= sqrt_sw
                     unscaled_y = y / sqrt_sw
+                predictions += y_offset
+                unscaled_y += y_offset
+
                 if self.store_cv_results:
                     self.cv_results_[:, i] = predictions.ravel()
 
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 008ccf11d6ac3..40fd189635e08 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -859,7 +859,9 @@ def test_ridge_loo_cv_asym_scoring():
     loo_ridge.fit(X, y)
     gcv_ridge.fit(X, y)
 
-    assert gcv_ridge.alpha_ == pytest.approx(loo_ridge.alpha_)
+    assert gcv_ridge.alpha_ == pytest.approx(
+        loo_ridge.alpha_
+    ), f"gcv_ridge.alpha_: {gcv_ridge.alpha_}, loo_ridge.alpha_: {loo_ridge.alpha_}"
     assert_allclose(gcv_ridge.coef_, loo_ridge.coef_, rtol=1e-3)
     assert_allclose(gcv_ridge.intercept_, loo_ridge.intercept_, rtol=1e-3)
 

From 843a541afde9fbfbb1a7c62fb0168029033fe1ad Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 17 Sep 2024 23:48:36 +0200
Subject: [PATCH 3/6] TST make sure that we are in the original space

---
 sklearn/linear_model/tests/test_ridge.py | 49 ++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 40fd189635e08..16bed8ea64479 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -2254,6 +2254,55 @@ def test_ridge_cv_values_deprecated():
         ridge.cv_values_
 
 
+@pytest.mark.parametrize("with_sample_weight", [False, True])
+@pytest.mark.parametrize("fit_intercept", [False, True])
+def test_ridge_cv_results_predictions(with_sample_weight, fit_intercept):
+    """Check that the predictions stored in `cv_results_` are on the original scale.
+
+    The GCV approach works on scaled data: centered by an offset and scaled by the
+    squared root of the sample weights. Thus, previous to compute scores, the
+    predictions need to be scaled back to the original scale. Those predictions are the
+    one stored in `cv_results_`.
+
+    In this test, we check that the internal predictions stored in `cv_results_` are
+    equivalent to a naive LOO-CV grid-search with a `Ridge` estimator.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/13998
+    """
+    X, y = make_regression(n_samples=100, n_features=10, random_state=0)
+    sample_weight = np.ones(shape=(X.shape[0],))
+    if with_sample_weight:
+        sample_weight[::2] = 0.5
+
+    alphas = (0.1, 1.0, 10.0)
+
+    # scoring should be set to store predictions and not the squared error
+    ridge_cv = RidgeCV(
+        alphas=alphas,
+        scoring="neg_mean_squared_error",
+        fit_intercept=fit_intercept,
+        store_cv_results=True,
+    )
+    ridge_cv.fit(X, y, sample_weight=sample_weight)
+
+    # manual grid-search with a `Ridge` estimator
+    cv = LeaveOneOut()
+    results = np.transpose(
+        [
+            [
+                Ridge(alpha=alpha, fit_intercept=fit_intercept)
+                .fit(X[train_idx], y[train_idx], sample_weight[train_idx])
+                .predict(X[test_idx])
+                .squeeze()
+                for train_idx, test_idx in cv.split(X, y)
+            ]
+            for alpha in alphas
+        ]
+    )
+    assert_allclose(ridge_cv.cv_results_, results)
+
+
 # Metadata Routing Tests
 # ======================
 

From c3215bcf9445e5df53d528f692b39814e67485aa Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Tue, 17 Sep 2024 23:52:29 +0200
Subject: [PATCH 4/6] DOC update the changelog

---
 doc/whats_new/v1.6.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst
index fdaa79f496d63..0f3ad97ac1dd2 100644
--- a/doc/whats_new/v1.6.rst
+++ b/doc/whats_new/v1.6.rst
@@ -251,6 +251,12 @@ Changelog
   for the calculation of test scores.
   :pr:`29419` by :user:`Shruti Nath <snath-xoc>`.
 
+- |Fix| :class:`linear_model.RidgeCV` now properly use predictions in the original
+  scale and store them in `cv_results_` when `scoring != None`.
+  :pr:`29842` by :user:`Guillaume Lemaitre <glemaitre>`,
+  :user:`Jérôme Dockes <jeromedockes>` and
+  :user:`Hanmin Qin <qinhanmin2014>`.
+
 - |API| Deprecates `copy_X` in :class:`linear_model.TheilSenRegressor` as the parameter
   has no effect. `copy_X` will be removed in 1.8.
   :pr:`29105` by :user:`Adam Li <adam2392>`.

From 089ade8b117980e2a16e7882003da45fbabeb9a2 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 18 Sep 2024 18:56:02 +0200
Subject: [PATCH 5/6] Apply suggestions from code review

Co-authored-by: Christian Lorentzen <lorentzen.ch@gmail.com>
---
 sklearn/linear_model/_ridge.py           | 2 +-
 sklearn/linear_model/tests/test_ridge.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index a634e84aa12fd..c9c7396d5c5c2 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -2179,7 +2179,7 @@ def fit(self, X, y, sample_weight=None, score_params=None):
                     self.cv_results_[:, i] = squared_errors.ravel()
             else:
                 predictions = y - (c / G_inverse_diag)
-                # represent the predictions in the original scale
+                # Rescale predictions back to original scale
                 if predictions.ndim > 1:
                     predictions /= sqrt_sw[:, None]
                 else:
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index b456b900da13e..de3f4c6ce0c5b 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -861,7 +861,7 @@ def test_ridge_loo_cv_asym_scoring():
 
     assert gcv_ridge.alpha_ == pytest.approx(
         loo_ridge.alpha_
-    ), f"gcv_ridge.alpha_: {gcv_ridge.alpha_}, loo_ridge.alpha_: {loo_ridge.alpha_}"
+    ), f"{gcv_ridge.alpha_=}, {loo_ridge.alpha_=}"
     assert_allclose(gcv_ridge.coef_, loo_ridge.coef_, rtol=1e-3)
     assert_allclose(gcv_ridge.intercept_, loo_ridge.intercept_, rtol=1e-3)
 
@@ -2263,7 +2263,7 @@ def test_ridge_cv_results_predictions(with_sample_weight, fit_intercept, n_targe
     The GCV approach works on scaled data: centered by an offset and scaled by the
     squared root of the sample weights. Thus, previous to compute scores, the
     predictions need to be scaled back to the original scale. Those predictions are the
-    one stored in `cv_results_`.
+    ones stored in `cv_results_`.
 
     In this test, we check that the internal predictions stored in `cv_results_` are
     equivalent to a naive LOO-CV grid-search with a `Ridge` estimator.

From e9bd7784b041a59c8f862b3e8883489316b3ce65 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Wed, 18 Sep 2024 19:02:36 +0200
Subject: [PATCH 6/6] address christian comment

---
 doc/whats_new/v1.6.rst                   | 7 +++++--
 sklearn/linear_model/_ridge.py           | 9 +++++----
 sklearn/linear_model/tests/test_ridge.py | 2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst
index bf141b582b365..2a7211ca368b5 100644
--- a/doc/whats_new/v1.6.rst
+++ b/doc/whats_new/v1.6.rst
@@ -257,8 +257,11 @@ Changelog
   for the calculation of test scores.
   :pr:`29419` by :user:`Shruti Nath <snath-xoc>`.
 
-- |Fix| :class:`linear_model.RidgeCV` now properly use predictions in the original
-  scale and store them in `cv_results_` when `scoring != None`.
+- |Fix| :class:`linear_model.RidgeCV` now properly use predictions the same scale as the
+  target seen during `fit`. Those predictions are stored in `cv_results_` when when
+  `scoring != None`. Previously, the predictions were rescaled by the square root of the
+  sample weights and offset by the mean of the target leading to an incorrect estimate
+  of the score.
   :pr:`29842` by :user:`Guillaume Lemaitre <glemaitre>`,
   :user:`Jérôme Dockes <jeromedockes>` and
   :user:`Hanmin Qin <qinhanmin2014>`.
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index c9c7396d5c5c2..a6266db175a69 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -2180,10 +2180,11 @@ def fit(self, X, y, sample_weight=None, score_params=None):
             else:
                 predictions = y - (c / G_inverse_diag)
                 # Rescale predictions back to original scale
-                if predictions.ndim > 1:
-                    predictions /= sqrt_sw[:, None]
-                else:
-                    predictions /= sqrt_sw
+                if sample_weight is not None:  # avoid the unecessary division by ones
+                    if predictions.ndim > 1:
+                        predictions /= sqrt_sw[:, None]
+                    else:
+                        predictions /= sqrt_sw
                 predictions += y_offset
 
                 if self.store_cv_results:
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index de3f4c6ce0c5b..a82e920cb261f 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -2263,7 +2263,7 @@ def test_ridge_cv_results_predictions(with_sample_weight, fit_intercept, n_targe
     The GCV approach works on scaled data: centered by an offset and scaled by the
     squared root of the sample weights. Thus, previous to compute scores, the
     predictions need to be scaled back to the original scale. Those predictions are the
-    ones stored in `cv_results_`.
+    ones stored in `cv_results_` in `RidgeCV`.
 
     In this test, we check that the internal predictions stored in `cv_results_` are
     equivalent to a naive LOO-CV grid-search with a `Ridge` estimator.