diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index f255e3abbcab8..cbea5d932e243 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -105,6 +105,15 @@ Changelog and `axis=1`, as documented in the docstring. :pr:`28222` by :user:`Guillaume Lemaitre `. + +:mod:`sklearn.preprocessing` +............................ + +- |Fix| :class:`preprocessing.TargetEncoder` no longer fails when + `target_type="continuous"` and the input is read-only. In particular, it now + works with pandas copy-on-write mode enabled. + :pr:`28233` by :user:`John Hopfensperger `. + .. _changes_1_4: Version 1.4.0 diff --git a/sklearn/preprocessing/_target_encoder_fast.pyx b/sklearn/preprocessing/_target_encoder_fast.pyx index 39f3ebcf49995..12e15397ffeca 100644 --- a/sklearn/preprocessing/_target_encoder_fast.pyx +++ b/sklearn/preprocessing/_target_encoder_fast.pyx @@ -19,7 +19,7 @@ ctypedef fused Y_DTYPE: def _fit_encoding_fast( INT_DTYPE[:, ::1] X_int, - Y_DTYPE[:] y, + const Y_DTYPE[:] y, cnp.int64_t[::1] n_categories, double smooth, double y_mean, @@ -79,7 +79,7 @@ def _fit_encoding_fast( def _fit_encoding_fast_auto_smooth( INT_DTYPE[:, ::1] X_int, - Y_DTYPE[:] y, + const Y_DTYPE[:] y, cnp.int64_t[::1] n_categories, double y_mean, double y_variance, diff --git a/sklearn/preprocessing/tests/test_target_encoder.py b/sklearn/preprocessing/tests/test_target_encoder.py index 248a13f88512d..81b0f32d04d68 100644 --- a/sklearn/preprocessing/tests/test_target_encoder.py +++ b/sklearn/preprocessing/tests/test_target_encoder.py @@ -701,3 +701,16 @@ def test_target_encoding_for_linear_regression(smooth, global_random_seed): # cardinality yet non-informative feature instead of the lower # cardinality yet informative feature: assert abs(coef[0]) < abs(coef[2]) + + +def test_pandas_copy_on_write(): + """ + Test target-encoder cython code when y is read-only. + + The numpy array underlying df["y"] is read-only when copy-on-write is enabled. + Non-regression test for gh-27879. + """ + pd = pytest.importorskip("pandas", minversion="2.0") + with pd.option_context("mode.copy_on_write", True): + df = pd.DataFrame({"x": ["a", "b", "b"], "y": [4.0, 5.0, 6.0]}) + TargetEncoder(target_type="continuous").fit(df[["x"]], df["y"])