diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 75ce01b54f3a7..80010afc30516 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -243,6 +243,11 @@ Changelog `nu=0.5` for PyPy (and possibly other non CPython interpreters). :pr:`24245` by :user:`Loïc Estève `. +- |Fix| The `fit` method of :class:`gaussian_process.GaussianProcessRegressor` + will not modify the input X in case a custom kernel is used, with a `diag` + method that returns part of the input X. :pr:`24405` + by :user:`Omar Salman `. + :mod:`sklearn.kernel_approximation` ................................... @@ -305,7 +310,7 @@ Changelog - |Fix| Allows `csr_matrix` as input for parameter: `y_true` of the :func:`metrics.label_ranking_average_precision_score` metric. :pr:`23442` by :user:`Sean Atukorala ` - + - |Fix| :func:`metrics.ndcg_score` will now trigger a warning when the `y_true` value contains a negative value. Users may still use negative values, but the result may not be between 0 and 1. Starting in v1.4, passing in negative diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index 4777d0d80627c..c0a8dc71b7352 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -435,7 +435,7 @@ def predict(self, X, return_std=False, return_cov=False): # Compute variance of predictive distribution # Use einsum to avoid explicitly forming the large matrix # V^T @ V just to extract its diagonal afterward. - y_var = self.kernel_.diag(X) + y_var = self.kernel_.diag(X).copy() y_var -= np.einsum("ij,ji->i", V.T, V) # Check if any of the variances is negative because of diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index 784aa88d6487c..c03778958a3ad 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -14,7 +14,11 @@ import pytest from sklearn.gaussian_process import GaussianProcessRegressor -from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C, WhiteKernel +from sklearn.gaussian_process.kernels import ( + RBF, + ConstantKernel as C, + WhiteKernel, +) from sklearn.gaussian_process.kernels import DotProduct, ExpSineSquared from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel from sklearn.exceptions import ConvergenceWarning @@ -767,3 +771,30 @@ def test_sample_y_shapes(normalize_y, n_targets): y_samples = model.sample_y(X_test, n_samples=n_samples_y_test) assert y_samples.shape == y_test_shape + + +class CustomKernel(C): + """ + A custom kernel that has a diag method that returns the first column of the + input matrix X. This is a helper for the test to check that the input + matrix X is not mutated. + """ + + def diag(self, X): + return X[:, 0] + + +def test_gpr_predict_input_not_modified(): + """ + Check that the input X is not modified by the predict method of the + GaussianProcessRegressor when setting return_std=True. + + Non-regression test for: + https://github.com/scikit-learn/scikit-learn/issues/24340 + """ + gpr = GaussianProcessRegressor(kernel=CustomKernel()).fit(X, y) + + X2_copy = np.copy(X2) + _, _ = gpr.predict(X2, return_std=True) + + assert_allclose(X2, X2_copy)