scikit-learn · ogrisel · Sep 26, 2025 · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025
diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
@@ -185,6 +185,7 @@ Metrics
 Tools
 -----
 
+- :func:`model_selection.cross_val_predict`
 - :func:`model_selection.train_test_split`
 - :func:`utils.check_consistent_length`
 

diff --git a/doc/whats_new/upcoming_changes/array-api/32270.feature.rst b/doc/whats_new/upcoming_changes/array-api/32270.feature.rst
@@ -0,0 +1,2 @@
+- :func:`sklearn.model_selection.cross_val_predict` now supports array API compatible inputs.
+  By :user:`Omar Salman <OmarManzoor>`
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
@@ -1258,7 +1258,9 @@ def cross_val_predict(
             concat_pred.append(label_preds)
         predictions = concat_pred
     else:
-        predictions = np.concatenate(predictions)
+        xp, _ = get_namespace(X)
+        inv_test_indices = xp.asarray(inv_test_indices, device=device(X))
+        predictions = xp.concat(predictions)
 
     if isinstance(predictions, list):
         return [p[inv_test_indices] for p in predictions]

diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
@@ -12,7 +12,7 @@
 from scipy.sparse import issparse
 
 from sklearn import config_context
-from sklearn.base import BaseEstimator, ClassifierMixin, clone
+from sklearn.base import BaseEstimator, ClassifierMixin, clone, is_classifier
 from sklearn.cluster import KMeans
 from sklearn.datasets import (
     load_diabetes,
@@ -22,6 +22,7 @@
     make_multilabel_classification,
     make_regression,
 )
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.exceptions import FitFailedWarning, UnsetMetadataPassedError
 from sklearn.impute import SimpleImputer
@@ -81,8 +82,15 @@
     check_recorded_metadata,
 )
 from sklearn.utils import shuffle
+from sklearn.utils._array_api import (
+    _atol_for_type,
+    _convert_to_numpy,
+    _get_namespace_device_dtype_ids,
+    yield_namespace_device_dtype_combinations,
+)
 from sklearn.utils._mocking import CheckingClassifier, MockDataFrame
 from sklearn.utils._testing import (
+    _array_api_for_tests,
     assert_allclose,
     assert_almost_equal,
     assert_array_almost_equal,
@@ -2725,3 +2733,44 @@ def test_learning_curve_exploit_incremental_learning_routing():
 
 # End of metadata routing tests
 # =============================
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [Ridge(), LinearDiscriminantAnalysis()],
+    ids=["Ridge", "LinearDiscriminantAnalysis"],
+)
+@pytest.mark.parametrize("cv", [None, 3, 5])
+@pytest.mark.parametrize(
+    "namespace, device_, dtype_name",
+    yield_namespace_device_dtype_combinations(),
+    ids=_get_namespace_device_dtype_ids,
+)
+def test_cross_val_predict_array_api_compliance(
+    estimator, cv, namespace, device_, dtype_name
+):
+    """Test that `cross_val_predict` functions correctly with the array API
+    with both a classifier and a regressor."""
+
+    xp = _array_api_for_tests(namespace, device_)
+    if is_classifier(estimator):
+        X, y = make_classification(
+            n_samples=1000, n_features=5, n_classes=3, n_informative=3, random_state=42
+        )
+    else:
+        X, y = make_regression(
+            n_samples=1000, n_features=5, n_informative=3, random_state=42
+        )
+
+    X_np = X.astype(dtype_name)
+    y_np = y.astype(dtype_name)
+    X_xp = xp.asarray(X_np, device=device_)
+    y_xp = xp.asarray(y_np, device=device_)
+
+    with config_context(array_api_dispatch=True):
+        pred_xp = cross_val_predict(estimator, X_xp, y_xp, cv=cv)
+
+    pred_np = cross_val_predict(estimator, X_np, y_np, cv=cv)
+    assert_allclose(
+        _convert_to_numpy(pred_xp, xp), pred_np, atol=_atol_for_type(dtype_name)
+    )
diff --git a/sklearn/utils/_indexing.py b/sklearn/utils/_indexing.py
@@ -10,7 +10,11 @@
 import numpy as np
 from scipy.sparse import issparse
 
-from sklearn.utils._array_api import _is_numpy_namespace, get_namespace
+from sklearn.utils._array_api import (
+    _is_numpy_namespace,
+    ensure_common_namespace_device,
+    get_namespace,
+)
 from sklearn.utils._param_validation import Interval, validate_params
 from sklearn.utils.extmath import _approximate_mode
 from sklearn.utils.fixes import PYARROW_VERSION_BELOW_17
@@ -31,6 +35,7 @@ def _array_indexing(array, key, key_dtype, axis):
     """Index an array or scipy.sparse consistently across NumPy version."""
     xp, is_array_api = get_namespace(array)
     if is_array_api:
+        key = ensure_common_namespace_device(array, key)[0]
         return xp.take(array, key, axis=axis)
     if issparse(array) and key_dtype == "bool":
         key = np.asarray(key)

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
@@ -406,7 +406,11 @@ def _raise_or_return():
     if xp.isdtype(y.dtype, "real floating"):
         # [.1, .2, 3] or [[.1, .2, 3]] or [[1., .2]] and not [1., 2., 3.]
         data = y.data if issparse(y) else y
-        if xp.any(data != xp.astype(data, int)):
+        integral_data = xp.astype(data, xp.int64)
+        # conversion back to the original float dtype of y is required to
+        # satisfy array-api-strict which does not allow a comparison between
+        # arrays having different dtypes.
+        if xp.any(data != xp.astype(integral_data, y.dtype)):
             _assert_all_finite(data, input_name=input_name)
             return "continuous" + suffix
-Original file line number
+Diff line change
@@ Expand Up / @@ -185,6 +185,7 @@ Metrics @@
     Tools
     -----
+    - :func:`model_selection.cross_val_predict`
     - :func:`model_selection.train_test_split`
     - :func:`utils.check_consistent_length`
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		- :func:`sklearn.model_selection.cross_val_predict` now supports array API compatible inputs.
		By :user:`Omar Salman <OmarManzoor>`