From 4869c0d322dbbddcbed0c57ab3eb34859a25975d Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Fri, 15 Sep 2023 13:00:21 +0500
Subject: [PATCH 01/21] ENH Array API support for LabelEncoder

---
 sklearn/preprocessing/_label.py           | 15 ++--
 sklearn/preprocessing/tests/test_label.py | 25 ++++++
 sklearn/utils/_array_api.py               | 92 +++++++++++++++++++++++
 sklearn/utils/_encode.py                  | 65 +++++++++-------
 sklearn/utils/estimator_checks.py         | 19 ++++-
 5 files changed, 179 insertions(+), 37 deletions(-)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 41494f2649a01..05828c0fa6613 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -17,6 +17,7 @@
 
 from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils import column_or_1d
+from ..utils._array_api import _setdiff1d, get_namespace
 from ..utils._encode import _encode, _unique
 from ..utils._param_validation import Interval, validate_params
 from ..utils.multiclass import type_of_target, unique_labels
@@ -129,10 +130,11 @@ def transform(self, y):
             Labels as normalized encodings.
         """
         check_is_fitted(self)
+        xp, _ = get_namespace(y)
         y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
         # transform of empty array is empty array
         if _num_samples(y) == 0:
-            return np.array([])
+            return xp.asarray([])
 
         return _encode(y, uniques=self.classes_)
 
@@ -150,16 +152,17 @@ def inverse_transform(self, y):
             Original encoding.
         """
         check_is_fitted(self)
+        xp, _ = get_namespace(y)
         y = column_or_1d(y, warn=True)
         # inverse transform of empty array is empty array
         if _num_samples(y) == 0:
-            return np.array([])
+            return xp.asarray([])
 
-        diff = np.setdiff1d(y, np.arange(len(self.classes_)))
-        if len(diff):
+        diff = _setdiff1d(ar1=y, ar2=xp.arange(self.classes_.shape[0]), xp=xp)
+        if diff.shape[0]:
             raise ValueError("y contains previously unseen labels: %s" % str(diff))
-        y = np.asarray(y)
-        return self.classes_[y]
+        y = xp.asarray(y)
+        return xp.take(self.classes_, y, axis=0)
 
     def _more_tags(self):
         return {"X_types": ["1dlabels"]}
diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index cce0ddc5c267e..8f81b9cfec595 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -12,7 +12,12 @@
     label_binarize,
 )
 from sklearn.utils import _to_object_array
+from sklearn.utils._array_api import yield_namespace_device_dtype_combinations
 from sklearn.utils._testing import assert_array_equal, ignore_warnings
+from sklearn.utils.estimator_checks import (
+    _get_check_estimator_ids,
+    check_array_api_input_and_values,
+)
 from sklearn.utils.fixes import (
     COO_CONTAINERS,
     CSC_CONTAINERS,
@@ -697,3 +702,23 @@ def test_label_encoders_do_not_have_set_output(encoder):
     y_encoded_with_kwarg = encoder.fit_transform(y=["a", "b", "c"])
     y_encoded_positional = encoder.fit_transform(["a", "b", "c"])
     assert_array_equal(y_encoded_with_kwarg, y_encoded_positional)
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
+)
+@pytest.mark.parametrize(
+    "check",
+    [check_array_api_input_and_values],
+    ids=_get_check_estimator_ids,
+)
+@pytest.mark.parametrize(
+    "estimator",
+    [LabelEncoder()],
+    ids=_get_check_estimator_ids,
+)
+def test_label_encoder_array_api_compliance(
+    estimator, check, array_namespace, device, dtype
+):
+    name = estimator.__class__.__name__
+    check(name, estimator, array_namespace, device=device, dtype=dtype)
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 24534faa931e8..554a5c71a859d 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -232,6 +232,12 @@ def take(self, X, indices, *, axis=0):
     def isdtype(self, dtype, kind):
         return isdtype(dtype, kind, xp=self._namespace)
 
+    def searchsorted(self, a, v, *, side="left", sorter=None):
+        a = _convert_to_numpy(a, xp=self._namespace)
+        v = _convert_to_numpy(v, xp=self._namespace)
+        indices = numpy.searchsorted(a, v, side=side, sorter=sorter)
+        return self._namespace.asarray(indices)
+
 
 def _check_device_cpu(device):  # noqa
     if device not in {"cpu", None}:
@@ -330,6 +336,11 @@ def unique_counts(self, x):
     def unique_values(self, x):
         return numpy.unique(x)
 
+    def unique_all(self, x):
+        return numpy.unique(
+            x, return_index=True, return_inverse=True, return_counts=True
+        )
+
     def concat(self, arrays, *, axis=None):
         return numpy.concatenate(arrays, axis=axis)
 
@@ -595,3 +606,84 @@ def _estimator_with_converted_arrays(estimator, converter):
 def _atol_for_type(dtype):
     """Return the absolute tolerance for a given dtype."""
     return numpy.finfo(dtype).eps * 100
+
+
+def _in1d(ar1, ar2, xp, assume_unique=False, invert=False):
+    """Checks whether each element of an array is also present in a
+    second array.
+
+    Returns a boolean array the same length as `ar1` that is True
+    where an element of `ar1` is in `ar2` and False otherwise
+    """
+    if not assume_unique:
+        ar1, rev_idx = xp.unique_inverse(ar1)
+        ar2 = xp.unique_values(ar2)
+
+    ar = xp.concat((ar1, ar2))
+    # We need this to be a stable sort.
+    order = ar.argsort(stable=True)
+    sar = ar[order]
+    if invert:
+        bool_ar = sar[1:] != sar[:-1]
+    else:
+        bool_ar = sar[1:] == sar[:-1]
+    flag = xp.concat((bool_ar, xp.asarray([invert])))
+    ret = xp.empty(ar.shape, dtype=xp.bool)
+    ret[order] = flag
+
+    if assume_unique:
+        return ret[: len(ar1)]
+    else:
+        return ret[rev_idx]
+
+
+def _setdiff1d(ar1, ar2, xp, assume_unique=False):
+    """Find the set difference of two arrays.
+
+    Return the unique values in `ar1` that are not in `ar2`.
+    """
+    if _is_numpy_namespace(xp):
+        return xp.asarray(
+            numpy.setdiff1d(
+                ar1=ar1,
+                ar2=ar2,
+                assume_unique=assume_unique,
+            )
+        )
+
+    if assume_unique:
+        ar1 = xp.reshape(xp.asarray(ar1), (-1,))
+    else:
+        ar1 = xp.unique_values(ar1)
+        ar2 = xp.unique_values(ar2)
+    return ar1[_in1d(ar1=ar1, ar2=ar2, xp=xp, assume_unique=True, invert=True)]
+
+
+def _isin(element, test_elements, xp, assume_unique=False, invert=False):
+    """Calculates ``element in test_elements``, broadcasting over `element`
+    only.
+
+    Returns a boolean array of the same shape as `element` that is True
+    where an element of `element` is in `test_elements` and False otherwise.
+    """
+    if _is_numpy_namespace(xp):
+        return xp.asarray(
+            numpy.isin(
+                element=element,
+                test_elements=test_elements,
+                assume_unique=assume_unique,
+                invert=invert,
+            )
+        )
+
+    element = xp.asarray(element)
+    return xp.reshape(
+        _in1d(
+            ar1=element,
+            ar2=test_elements,
+            xp=xp,
+            assume_unique=assume_unique,
+            invert=invert,
+        ),
+        element.shape,
+    )
diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py
index b3bf1c2a317ec..55f422f487be7 100644
--- a/sklearn/utils/_encode.py
+++ b/sklearn/utils/_encode.py
@@ -5,6 +5,7 @@
 import numpy as np
 
 from . import is_scalar_nan
+from ._array_api import _convert_to_numpy, _isin, _setdiff1d, get_namespace
 
 
 def _unique(values, *, return_inverse=False, return_counts=False):
@@ -51,31 +52,29 @@ def _unique(values, *, return_inverse=False, return_counts=False):
 def _unique_np(values, return_inverse=False, return_counts=False):
     """Helper function to find unique values for numpy arrays that correctly
     accounts for nans. See `_unique` documentation for details."""
-    uniques = np.unique(
-        values, return_inverse=return_inverse, return_counts=return_counts
-    )
+    xp, _ = get_namespace(values)
 
     inverse, counts = None, None
 
-    if return_counts:
-        *uniques, counts = uniques
-
-    if return_inverse:
-        *uniques, inverse = uniques
-
-    if return_counts or return_inverse:
-        uniques = uniques[0]
+    if return_inverse and return_counts:
+        uniques, _, inverse, counts = xp.unique_all(values)
+    elif return_inverse:
+        uniques, inverse = xp.unique_inverse(values)
+    elif return_counts:
+        uniques, counts = xp.unique_counts(values)
+    else:
+        uniques = xp.unique_values(values)
 
     # np.unique will have duplicate missing values at the end of `uniques`
     # here we clip the nans and remove it from uniques
     if uniques.size and is_scalar_nan(uniques[-1]):
-        nan_idx = np.searchsorted(uniques, np.nan)
+        nan_idx = xp.searchsorted(uniques, xp.nan)
         uniques = uniques[: nan_idx + 1]
         if return_inverse:
             inverse[inverse > nan_idx] = nan_idx
 
         if return_counts:
-            counts[nan_idx] = np.sum(counts[nan_idx:])
+            counts[nan_idx] = xp.sum(counts[nan_idx:])
             counts = counts[: nan_idx + 1]
 
     ret = (uniques,)
@@ -161,8 +160,9 @@ def __missing__(self, key):
 
 def _map_to_integer(values, uniques):
     """Map values based on its position in uniques."""
+    xp, _ = get_namespace(values, uniques)
     table = _nandict({val: i for i, val in enumerate(uniques)})
-    return np.array([table[v] for v in values])
+    return xp.asarray([table[v] for v in values])
 
 
 def _unique_python(values, *, return_inverse, return_counts):
@@ -220,7 +220,13 @@ def _encode(values, *, uniques, check_unknown=True):
     encoded : ndarray
         Encoded values
     """
-    if values.dtype.kind in "OUS":
+    xp, is_array_api_compliant = get_namespace(values, uniques)
+    if is_array_api_compliant:
+        dtype_kind = _convert_to_numpy(values, xp).dtype.kind
+    else:
+        dtype_kind = values.dtype.kind
+
+    if dtype_kind in "OUS":
         try:
             return _map_to_integer(values, uniques)
         except KeyError as e:
@@ -230,7 +236,7 @@ def _encode(values, *, uniques, check_unknown=True):
             diff = _check_unknown(values, uniques)
             if diff:
                 raise ValueError(f"y contains previously unseen labels: {str(diff)}")
-        return np.searchsorted(uniques, values)
+        return xp.searchsorted(uniques, values)
 
 
 def _check_unknown(values, known_values, return_mask=False):
@@ -258,9 +264,14 @@ def _check_unknown(values, known_values, return_mask=False):
         Additionally returned if ``return_mask=True``.
 
     """
+    xp, is_array_api_compliant = get_namespace(values, known_values)
     valid_mask = None
+    if is_array_api_compliant:
+        dtype_kind = _convert_to_numpy(values, xp).dtype.kind
+    else:
+        dtype_kind = values.dtype.kind
 
-    if values.dtype.kind in "OUS":
+    if dtype_kind in "OUS":
         values_set = set(values)
         values_set, missing_in_values = _extract_missing(values_set)
 
@@ -282,9 +293,9 @@ def is_valid(value):
 
         if return_mask:
             if diff or nan_in_diff or none_in_diff:
-                valid_mask = np.array([is_valid(value) for value in values])
+                valid_mask = xp.array([is_valid(value) for value in values])
             else:
-                valid_mask = np.ones(len(values), dtype=bool)
+                valid_mask = xp.ones(len(values), dtype=xp.bool)
 
         diff = list(diff)
         if none_in_diff:
@@ -292,21 +303,21 @@ def is_valid(value):
         if nan_in_diff:
             diff.append(np.nan)
     else:
-        unique_values = np.unique(values)
-        diff = np.setdiff1d(unique_values, known_values, assume_unique=True)
+        unique_values = xp.unique_values(values)
+        diff = _setdiff1d(unique_values, known_values, xp, assume_unique=True)
         if return_mask:
             if diff.size:
-                valid_mask = np.isin(values, known_values)
+                valid_mask = _isin(values, known_values, xp)
             else:
-                valid_mask = np.ones(len(values), dtype=bool)
+                valid_mask = xp.ones(len(values), dtype=xp.bool)
 
         # check for nans in the known_values
-        if np.isnan(known_values).any():
-            diff_is_nan = np.isnan(diff)
-            if diff_is_nan.any():
+        if xp.any(xp.isnan(known_values)):
+            diff_is_nan = xp.isnan(diff)
+            if xp.any(diff_is_nan):
                 # removes nan from valid_mask
                 if diff.size and return_mask:
-                    is_nan = np.isnan(values)
+                    is_nan = xp.isnan(values)
                     valid_mask[is_nan] = 1
 
                 # remove nan from diff
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 895ea98feffde..0bae0b1db34c1 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -878,7 +878,18 @@ def check_array_api_input(
     X_xp = xp.asarray(X, device=device)
     y_xp = xp.asarray(y, device=device)
 
-    est.fit(X, y)
+    if "Label" in est.__class__.__name__:
+        fit_args = (y,)
+        xp_fit_args = (y_xp,)
+        method_arg = (y,)
+        xp_method_arg = (y_xp,)
+    else:
+        fit_args = (X, y)
+        xp_fit_args = (X_xp, y_xp)
+        method_arg = (X,)
+        xp_method_arg = (X_xp,)
+
+    est.fit(*fit_args)
 
     array_attributes = {
         key: value for key, value in vars(est).items() if isinstance(value, np.ndarray)
@@ -886,7 +897,7 @@ def check_array_api_input(
 
     est_xp = clone(est)
     with config_context(array_api_dispatch=True):
-        est_xp.fit(X_xp, y_xp)
+        est_xp.fit(*xp_fit_args)
         input_ns = get_namespace(X_xp)[0].__name__
 
     # Fitted attributes which are arrays must have the same
@@ -941,9 +952,9 @@ def check_array_api_input(
                 assert abs(result - result_xp) < np.finfo(X.dtype).eps * 100
             continue
         else:
-            result = method(X)
+            result = method(*method_arg)
             with config_context(array_api_dispatch=True):
-                result_xp = getattr(est_xp, method_name)(X_xp)
+                result_xp = getattr(est_xp, method_name)(*xp_method_arg)
 
         with config_context(array_api_dispatch=True):
             result_ns = get_namespace(result_xp)[0].__name__

From 7fbd4589d693145ebcb55f0cb4762dfa5fdce0ed Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Fri, 15 Sep 2023 15:05:48 +0500
Subject: [PATCH 02/21] Add changelog

---
 doc/whats_new/v1.4.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 3e74cabb396b8..98e8ff0574e2f 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -273,6 +273,10 @@ Changelog
   our usual rolling deprecation cycle policy. See
   :ref:`array_api` for more details. :pr:`26243` by `Tim Head`_ and :pr:`27110` by :user:`Edoardo Abati <EdAbati>`.
 
+- |Enhancement| :class:`preprocessing.LabelEncoder` now supports the
+  `Array API <https://data-apis.org/array-api/latest/>`_. See :ref:`array_api`
+  for more details. :pr:`27381` by :user:`Omar Salman <OmarManzoor>`.
+
 :mod:`sklearn.tree`
 ...................
 

From ec6ccc6c63ca718b8615f54f522683a5789c909d Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Fri, 15 Sep 2023 17:15:19 +0500
Subject: [PATCH 03/21] Add tests for array api functions

---
 sklearn/utils/_array_api.py           | 83 ++++++++++++++++-----------
 sklearn/utils/tests/test_array_api.py | 56 ++++++++++++++++++
 2 files changed, 106 insertions(+), 33 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 554a5c71a859d..1314dc21a799f 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -608,35 +608,6 @@ def _atol_for_type(dtype):
     return numpy.finfo(dtype).eps * 100
 
 
-def _in1d(ar1, ar2, xp, assume_unique=False, invert=False):
-    """Checks whether each element of an array is also present in a
-    second array.
-
-    Returns a boolean array the same length as `ar1` that is True
-    where an element of `ar1` is in `ar2` and False otherwise
-    """
-    if not assume_unique:
-        ar1, rev_idx = xp.unique_inverse(ar1)
-        ar2 = xp.unique_values(ar2)
-
-    ar = xp.concat((ar1, ar2))
-    # We need this to be a stable sort.
-    order = ar.argsort(stable=True)
-    sar = ar[order]
-    if invert:
-        bool_ar = sar[1:] != sar[:-1]
-    else:
-        bool_ar = sar[1:] == sar[:-1]
-    flag = xp.concat((bool_ar, xp.asarray([invert])))
-    ret = xp.empty(ar.shape, dtype=xp.bool)
-    ret[order] = flag
-
-    if assume_unique:
-        return ret[: len(ar1)]
-    else:
-        return ret[rev_idx]
-
-
 def _setdiff1d(ar1, ar2, xp, assume_unique=False):
     """Find the set difference of two arrays.
 
@@ -656,7 +627,7 @@ def _setdiff1d(ar1, ar2, xp, assume_unique=False):
     else:
         ar1 = xp.unique_values(ar1)
         ar2 = xp.unique_values(ar2)
-    return ar1[_in1d(ar1=ar1, ar2=ar2, xp=xp, assume_unique=True, invert=True)]
+    return ar1[__in1d(ar1=ar1, ar2=ar2, xp=xp, assume_unique=True, invert=True)]
 
 
 def _isin(element, test_elements, xp, assume_unique=False, invert=False):
@@ -676,14 +647,60 @@ def _isin(element, test_elements, xp, assume_unique=False, invert=False):
             )
         )
 
-    element = xp.asarray(element)
+    original_element_shape = element.shape
+    element = xp.reshape(xp.asarray(element), (-1,))
+    test_elements = xp.reshape(xp.asarray(test_elements), (-1,))
     return xp.reshape(
-        _in1d(
+        __in1d(
             ar1=element,
             ar2=test_elements,
             xp=xp,
             assume_unique=assume_unique,
             invert=invert,
         ),
-        element.shape,
+        original_element_shape,
     )
+
+
+# Note: This is a helper for the functions `_isin` and
+# `_setdiff1d`. It is not meant to be called directly.
+def __in1d(ar1, ar2, xp, assume_unique=False, invert=False):
+    """Checks whether each element of an array is also present in a
+    second array.
+
+    Returns a boolean array the same length as `ar1` that is True
+    where an element of `ar1` is in `ar2` and False otherwise
+    """
+
+    # This code is run to make the code significantly faster
+    if ar2.shape[0] < 10 * ar1.shape[0] ** 0.145:
+        if invert:
+            mask = xp.ones(ar1.shape[0], dtype=xp.bool)
+            for a in ar2:
+                mask &= ar1 != a
+        else:
+            mask = xp.zeros(ar1.shape[0], dtype=xp.bool)
+            for a in ar2:
+                mask |= ar1 == a
+        return mask
+
+    if not assume_unique:
+        ar1, rev_idx = xp.unique_inverse(ar1)
+        ar2 = xp.unique_values(ar2)
+
+    ar = xp.concat((ar1, ar2))
+    # We need this to be a stable sort.
+    order = ar.argsort(stable=True)
+    sar = ar[order]
+    if invert:
+        bool_ar = sar[1:] != sar[:-1]
+    else:
+        bool_ar = sar[1:] == sar[:-1]
+    flag = xp.concat((bool_ar, xp.asarray([invert])))
+    ret = xp.empty(ar.shape, dtype=xp.bool)
+    ret[order] = flag
+
+    if assume_unique:
+        return ret[: len(ar1)]
+    else:
+        return ret[rev_idx]
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index bf2c0e1acb0fc..2d5b89e4dd2d2 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -1,6 +1,7 @@
 from functools import partial
 
 import numpy
+import numpy as np
 import pytest
 from numpy.testing import assert_allclose, assert_array_equal
 
@@ -12,6 +13,7 @@
     _atol_for_type,
     _convert_to_numpy,
     _estimator_with_converted_arrays,
+    _isin,
     _nanmax,
     _nanmin,
     _NumPyAPIWrapper,
@@ -143,6 +145,29 @@ def test_array_api_wrapper_take():
         xp.take(xp.asarray([[[0]]]), xp.asarray([0]), axis=0)
 
 
+def test_array_api_wrapper_searchsorted():
+    """Test _ArrayAPIWrapper API for searchsorted."""
+    numpy_array_api = pytest.importorskip("numpy.array_api")
+    xp_ = _AdjustableNameAPITestWrapper(numpy_array_api, "wrapped_numpy.array_api")
+    xp = _ArrayAPIWrapper(xp_)
+
+    # Check searchsorted compared to numpy's
+    a = xp.asarray([1, 2, 3, 4, 5], dtype=xp.float64)
+    v = 3.0
+    result = xp.searchsorted(a, v)
+    assert hasattr(result, "__array_namespace__")
+    assert result == numpy.searchsorted(a, v)
+
+    result = xp.searchsorted(a, v, side="right")
+    assert hasattr(result, "__array_namespace__")
+    assert result == numpy.searchsorted(a, v, side="right")
+
+    v = xp.asarray([-10, 10, 2, 3], dtype=xp.float64)
+    result = xp.searchsorted(a, v)
+    assert hasattr(result, "__array_namespace__")
+    assert_array_equal(result, numpy.searchsorted(a, v))
+
+
 @pytest.mark.parametrize("array_api", ["numpy", "numpy.array_api"])
 def test_asarray_with_order(array_api):
     """Test _asarray_with_order passes along order for NumPy arrays."""
@@ -371,3 +396,34 @@ def test_get_namespace_array_api_isdtype(wrapper):
 
     with pytest.raises(ValueError, match="Unrecognized data type"):
         assert xp.isdtype(xp.int16, "unknown")
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
+)
+@pytest.mark.parametrize("invert", [True, False])
+@pytest.mark.parametrize("assume_unique", [True, False])
+@pytest.mark.parametrize("element_size", [6, 10, 14])
+def test_isin(array_namespace, device, dtype, invert, assume_unique, element_size):
+    xp, device, dtype = _array_api_for_tests(array_namespace, device, dtype)
+    r = element_size // 2
+    element = 2 * numpy.arange(element_size).reshape((r, 2)).astype(dtype)
+    test_elements = numpy.array(np.arange(14), dtype=dtype)
+    element_xp = xp.asarray(element, device=device)
+    test_elements_xp = xp.asarray(test_elements, device=device)
+    expected = numpy.isin(
+        element=element,
+        test_elements=test_elements,
+        assume_unique=assume_unique,
+        invert=invert,
+    )
+    with config_context(array_api_dispatch=True):
+        result = _isin(
+            element=element_xp,
+            test_elements=test_elements_xp,
+            xp=xp,
+            assume_unique=assume_unique,
+            invert=invert,
+        )
+
+    assert_array_equal(result, expected)

From 43b039d08ddc4c78ec4b5cd1043751709dcae0da Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Sat, 23 Sep 2023 15:01:40 +0500
Subject: [PATCH 04/21] Updates: PR suggestions

---
 sklearn/utils/_array_api.py           | 83 +++++++++++++++++++++------
 sklearn/utils/_encode.py              |  4 +-
 sklearn/utils/estimator_checks.py     |  3 +-
 sklearn/utils/tests/test_array_api.py | 13 ++++-
 4 files changed, 78 insertions(+), 25 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 1314dc21a799f..cc2453bc01e8e 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -9,6 +9,19 @@
 from .._config import get_config
 from .fixes import parse_version
 
+ARRAY_NAMESPACES = [
+    # The following is used to test the array_api_compat wrapper when
+    # array_api_dispatch is enabled: in particular, the arrays used in the
+    # tests are regular numpy arrays without any "device" attribute.
+    "numpy",
+    # Stricter NumPy-based Array API implementation. The
+    # numpy.array_api.Array instances always a dummy "device" attribute.
+    "numpy.array_api",
+    "cupy",
+    "cupy.array_api",
+    "torch",
+]
+
 
 def yield_namespace_device_dtype_combinations():
     """Yield supported namespace, device, dtype tuples for testing.
@@ -28,18 +41,7 @@ def yield_namespace_device_dtype_combinations():
         The name of the data type to use for arrays. Can be None to indicate
         that the default value should be used.
     """
-    for array_namespace in [
-        # The following is used to test the array_api_compat wrapper when
-        # array_api_dispatch is enabled: in particular, the arrays used in the
-        # tests are regular numpy arrays without any "device" attribute.
-        "numpy",
-        # Stricter NumPy-based Array API implementation. The
-        # numpy.array_api.Array instances always a dummy "device" attribute.
-        "numpy.array_api",
-        "cupy",
-        "cupy.array_api",
-        "torch",
-    ]:
+    for array_namespace in ARRAY_NAMESPACES:
         if array_namespace == "torch":
             for device, dtype in itertools.product(
                 ("cpu", "cuda"), ("float64", "float32")
@@ -50,6 +52,43 @@ def yield_namespace_device_dtype_combinations():
             yield array_namespace, None, None
 
 
+def yield_namespace_device_int_dtype_combinations():
+    """Yield supported namespace, device, int dtype tuples for testing.
+
+    Use this to test that an estimator works with all combinations.
+
+    Returns
+    -------
+    array_namespace : str
+        The name of the Array API namespace.
+
+    device : str
+        The name of the device on which to allocate the arrays. Can be None to
+        indicate that the default value should be used.
+
+    dtype : str
+        The name of the int data type to use for arrays. Can be None to
+        indicate that the default value should be used.
+    """
+    for array_namespace in ARRAY_NAMESPACES:
+        if array_namespace == "torch":
+            for device, dtype in itertools.product(
+                ("cpu", "cuda", "mps"), ("int16", "int32", "int64", "uint8")
+            ):
+                yield array_namespace, device, dtype
+        else:
+            for dtype in (
+                "int16",
+                "int32",
+                "int64",
+                "uint8",
+                "uint16",
+                "uint32",
+                "uint64",
+            ):
+                yield array_namespace, None, dtype
+
+
 def _check_array_api_dispatch(array_api_dispatch):
     """Check that array_api_compat is installed and NumPy version is compatible.
 
@@ -233,10 +272,16 @@ def isdtype(self, dtype, kind):
         return isdtype(dtype, kind, xp=self._namespace)
 
     def searchsorted(self, a, v, *, side="left", sorter=None):
+        # Temporary workaround needed as long as searchsorted is not part
+        # of the Array API spec:
+        # https://github.com/data-apis/array-api/issues/688
+        if hasattr(self._namespace, "searchsorted"):
+            return self._namespace.searchsorted(a, v, side=side, sorter=sorter)
+
         a = _convert_to_numpy(a, xp=self._namespace)
         v = _convert_to_numpy(v, xp=self._namespace)
         indices = numpy.searchsorted(a, v, side=side, sorter=sorter)
-        return self._namespace.asarray(indices)
+        return self._namespace.asarray(indices, device=device(a))
 
 
 def _check_device_cpu(device):  # noqa
@@ -623,11 +668,11 @@ def _setdiff1d(ar1, ar2, xp, assume_unique=False):
         )
 
     if assume_unique:
-        ar1 = xp.reshape(xp.asarray(ar1), (-1,))
+        ar1 = xp.reshape(ar1, (-1,))
     else:
         ar1 = xp.unique_values(ar1)
         ar2 = xp.unique_values(ar2)
-    return ar1[__in1d(ar1=ar1, ar2=ar2, xp=xp, assume_unique=True, invert=True)]
+    return ar1[_in1d(ar1=ar1, ar2=ar2, xp=xp, assume_unique=True, invert=True)]
 
 
 def _isin(element, test_elements, xp, assume_unique=False, invert=False):
@@ -648,10 +693,10 @@ def _isin(element, test_elements, xp, assume_unique=False, invert=False):
         )
 
     original_element_shape = element.shape
-    element = xp.reshape(xp.asarray(element), (-1,))
-    test_elements = xp.reshape(xp.asarray(test_elements), (-1,))
+    element = xp.reshape(element, (-1,))
+    test_elements = xp.reshape(test_elements, (-1,))
     return xp.reshape(
-        __in1d(
+        _in1d(
             ar1=element,
             ar2=test_elements,
             xp=xp,
@@ -664,7 +709,7 @@ def _isin(element, test_elements, xp, assume_unique=False, invert=False):
 
 # Note: This is a helper for the functions `_isin` and
 # `_setdiff1d`. It is not meant to be called directly.
-def __in1d(ar1, ar2, xp, assume_unique=False, invert=False):
+def _in1d(ar1, ar2, xp, assume_unique=False, invert=False):
     """Checks whether each element of an array is also present in a
     second array.
 
diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py
index 55f422f487be7..b885b646bb887 100644
--- a/sklearn/utils/_encode.py
+++ b/sklearn/utils/_encode.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 from . import is_scalar_nan
-from ._array_api import _convert_to_numpy, _isin, _setdiff1d, get_namespace
+from ._array_api import _convert_to_numpy, _isin, _setdiff1d, device, get_namespace
 
 
 def _unique(values, *, return_inverse=False, return_counts=False):
@@ -162,7 +162,7 @@ def _map_to_integer(values, uniques):
     """Map values based on its position in uniques."""
     xp, _ = get_namespace(values, uniques)
     table = _nandict({val: i for i, val in enumerate(uniques)})
-    return xp.asarray([table[v] for v in values])
+    return xp.asarray([table[v] for v in values], device=device(values))
 
 
 def _unique_python(values, *, return_inverse, return_counts):
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index fdc870705fef6..1dfb4a238b70a 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -878,7 +878,8 @@ def check_array_api_input(
     X_xp = xp.asarray(X, device=device)
     y_xp = xp.asarray(y, device=device)
 
-    if "Label" in est.__class__.__name__:
+    X_types = est._get_tags().get("X_types", [""])
+    if "labels" in X_types[0]:
         fit_args = (y,)
         xp_fit_args = (y_xp,)
         method_arg = (y,)
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 2d5b89e4dd2d2..237fd05f3190d 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -21,6 +21,7 @@
     get_namespace,
     supported_float_dtypes,
     yield_namespace_device_dtype_combinations,
+    yield_namespace_device_int_dtype_combinations,
 )
 from sklearn.utils._testing import (
     _array_api_for_tests,
@@ -156,15 +157,18 @@ def test_array_api_wrapper_searchsorted():
     v = 3.0
     result = xp.searchsorted(a, v)
     assert hasattr(result, "__array_namespace__")
+    assert result.__array_namespace__().__name__ == "numpy.array_api"
     assert result == numpy.searchsorted(a, v)
 
     result = xp.searchsorted(a, v, side="right")
     assert hasattr(result, "__array_namespace__")
+    assert result.__array_namespace__().__name__ == "numpy.array_api"
     assert result == numpy.searchsorted(a, v, side="right")
 
     v = xp.asarray([-10, 10, 2, 3], dtype=xp.float64)
     result = xp.searchsorted(a, v)
     assert hasattr(result, "__array_namespace__")
+    assert result.__array_namespace__().__name__ == "numpy.array_api"
     assert_array_equal(result, numpy.searchsorted(a, v))
 
 
@@ -399,13 +403,16 @@ def test_get_namespace_array_api_isdtype(wrapper):
 
 
 @pytest.mark.parametrize(
-    "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
+    "array_namespace, device, _", yield_namespace_device_int_dtype_combinations()
 )
 @pytest.mark.parametrize("invert", [True, False])
 @pytest.mark.parametrize("assume_unique", [True, False])
 @pytest.mark.parametrize("element_size", [6, 10, 14])
-def test_isin(array_namespace, device, dtype, invert, assume_unique, element_size):
-    xp, device, dtype = _array_api_for_tests(array_namespace, device, dtype)
+@pytest.mark.parametrize("int_dtype", ["int32", "int64", "uint8"])
+def test_isin(
+    array_namespace, device, _, invert, assume_unique, element_size, int_dtype
+):
+    xp, device, dtype = _array_api_for_tests(array_namespace, device, int_dtype)
     r = element_size // 2
     element = 2 * numpy.arange(element_size).reshape((r, 2)).astype(dtype)
     test_elements = numpy.array(np.arange(14), dtype=dtype)

From cfdabebd93d02702835ea8dc3477c1a61a00bf4b Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Thu, 4 Apr 2024 17:57:09 +0500
Subject: [PATCH 05/21] Fix dtype_name parameter

---
 sklearn/preprocessing/tests/test_label.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index e37e84343abec..5bf63b21267ac 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -721,4 +721,4 @@ def test_label_encoder_array_api_compliance(
     estimator, check, array_namespace, device, dtype
 ):
     name = estimator.__class__.__name__
-    check(name, estimator, array_namespace, device=device, dtype=dtype)
+    check(name, estimator, array_namespace, device=device, dtype_name=dtype)

From 23ee51015d12d71ee62c4e40e935c8e6d540fc06 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft>
Date: Fri, 12 Apr 2024 17:28:46 +0500
Subject: [PATCH 06/21] Updates as suggested in review

---
 sklearn/utils/_array_api.py           | 56 +++++----------------------
 sklearn/utils/_encode.py              | 34 +++++++++++-----
 sklearn/utils/tests/test_array_api.py | 11 +++---
 3 files changed, 39 insertions(+), 62 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 758df7df61f92..0badef6f22198 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -60,49 +60,6 @@ def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True):
             yield array_namespace, None, None
 
 
-def yield_namespace_device_int_dtype_combinations():
-    """Yield supported namespace, device, int dtype tuples for testing.
-
-    Use this to test that an estimator works with all combinations.
-
-    Returns
-    -------
-    array_namespace : str
-        The name of the Array API namespace.
-
-    device : str
-        The name of the device on which to allocate the arrays. Can be None to
-        indicate that the default value should be used.
-
-    dtype : str
-        The name of the int data type to use for arrays. Can be None to
-        indicate that the default value should be used.
-    """
-    for array_namespace in [
-        "numpy",
-        "array_api_strict",
-        "cupy",
-        "cupy.array_api",
-        "torch",
-    ]:
-        if array_namespace == "torch":
-            for device, dtype in itertools.product(
-                ("cpu", "cuda", "mps"), ("int16", "int32", "int64", "uint8")
-            ):
-                yield array_namespace, device, dtype
-        else:
-            for dtype in (
-                "int16",
-                "int32",
-                "int64",
-                "uint8",
-                "uint16",
-                "uint32",
-                "uint64",
-            ):
-                yield array_namespace, None, dtype
-
-
 def _check_array_api_dispatch(array_api_dispatch):
     """Check that array_api_compat is installed and NumPy version is compatible.
 
@@ -325,9 +282,10 @@ def isdtype(self, dtype, kind):
         return isdtype(dtype, kind, xp=self._namespace)
 
     def searchsorted(self, a, v, *, side="left", sorter=None):
-        # Temporary workaround needed as long as searchsorted is not part
-        # of the Array API spec:
-        # https://github.com/data-apis/array-api/issues/688
+        # Temporary workaround needed as long as searchsorted is not widely
+        # adopted by implementers of the Array API spec. This is a quite
+        # recent addition to the spec:
+        # https://data-apis.org/array-api/latest/API_specification/generated/array_api.searchsorted.html # noqa
         if hasattr(self._namespace, "searchsorted"):
             return self._namespace.searchsorted(a, v, side=side, sorter=sorter)
 
@@ -911,7 +869,11 @@ def _in1d(ar1, ar2, xp, assume_unique=False, invert=False):
     second array.
 
     Returns a boolean array the same length as `ar1` that is True
-    where an element of `ar1` is in `ar2` and False otherwise
+    where an element of `ar1` is in `ar2` and False otherwise.
+
+    This function has been adapted using the original implementation
+    present in numpy:
+    https://github.com/numpy/numpy/blob/v1.26.0/numpy/lib/arraysetops.py#L524-L758
     """
 
     # This code is run to make the code significantly faster
diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py
index 420e736f66b89..2c1a4fe73e517 100644
--- a/sklearn/utils/_encode.py
+++ b/sklearn/utils/_encode.py
@@ -4,7 +4,13 @@
 
 import numpy as np
 
-from ._array_api import _convert_to_numpy, _isin, _setdiff1d, device, get_namespace
+from ._array_api import (
+    _is_numpy_namespace,
+    _isin,
+    _setdiff1d,
+    device,
+    get_namespace,
+)
 from ._missing import is_scalar_nan
 
 
@@ -221,12 +227,17 @@ def _encode(values, *, uniques, check_unknown=True):
         Encoded values
     """
     xp, is_array_api_compliant = get_namespace(values, uniques)
-    if is_array_api_compliant:
-        dtype_kind = _convert_to_numpy(values, xp).dtype.kind
+    if is_array_api_compliant and not _is_numpy_namespace(xp=xp):
+        try:
+            dtype = values.dtype
+            dtype_kind = dtype.kind if hasattr(dtype, "kind") else dtype
+            numeric_dtype = xp.isdtype(dtype=dtype, kind=dtype_kind)
+        except ValueError:
+            numeric_dtype = False
     else:
-        dtype_kind = values.dtype.kind
+        numeric_dtype = values.dtype.kind not in "OUS"
 
-    if dtype_kind in "OUS":
+    if not numeric_dtype:
         try:
             return _map_to_integer(values, uniques)
         except KeyError as e:
@@ -266,12 +277,17 @@ def _check_unknown(values, known_values, return_mask=False):
     """
     xp, is_array_api_compliant = get_namespace(values, known_values)
     valid_mask = None
-    if is_array_api_compliant:
-        dtype_kind = _convert_to_numpy(values, xp).dtype.kind
+    if is_array_api_compliant and not _is_numpy_namespace(xp=xp):
+        try:
+            dtype = values.dtype
+            dtype_kind = dtype.kind if hasattr(dtype, "kind") else dtype
+            numeric_dtype = xp.isdtype(dtype=dtype, kind=dtype_kind)
+        except ValueError:
+            numeric_dtype = False
     else:
-        dtype_kind = values.dtype.kind
+        numeric_dtype = values.dtype.kind not in "OUS"
 
-    if dtype_kind in "OUS":
+    if not numeric_dtype:
         values_set = set(values)
         values_set, missing_in_values = _extract_missing(values_set)
 
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 5cf8347cadfd7..8814d61a1e656 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -24,7 +24,6 @@
     indexing_dtype,
     supported_float_dtypes,
     yield_namespace_device_dtype_combinations,
-    yield_namespace_device_int_dtype_combinations,
 )
 from sklearn.utils._testing import (
     _array_api_for_tests,
@@ -489,19 +488,19 @@ def test_indexing_dtype(namespace, _device, _dtype):
 
 
 @pytest.mark.parametrize(
-    "array_namespace, device, _", yield_namespace_device_int_dtype_combinations()
+    "array_namespace, device, _", yield_namespace_device_dtype_combinations()
 )
 @pytest.mark.parametrize("invert", [True, False])
 @pytest.mark.parametrize("assume_unique", [True, False])
 @pytest.mark.parametrize("element_size", [6, 10, 14])
-@pytest.mark.parametrize("int_dtype", ["int32", "int64", "uint8"])
+@pytest.mark.parametrize("int_dtype", ["int16", "int32", "int64", "uint8"])
 def test_isin(
     array_namespace, device, _, invert, assume_unique, element_size, int_dtype
 ):
-    xp, device, dtype = _array_api_for_tests(array_namespace, device, int_dtype)
+    xp = _array_api_for_tests(array_namespace, device)
     r = element_size // 2
-    element = 2 * numpy.arange(element_size).reshape((r, 2)).astype(dtype)
-    test_elements = numpy.array(np.arange(14), dtype=dtype)
+    element = 2 * numpy.arange(element_size).reshape((r, 2)).astype(int_dtype)
+    test_elements = numpy.array(np.arange(14), dtype=int_dtype)
     element_xp = xp.asarray(element, device=device)
     test_elements_xp = xp.asarray(test_elements, device=device)
     expected = numpy.isin(

From 61774758338c6bed04c002dc991044c2972980e1 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft>
Date: Fri, 3 May 2024 11:44:32 +0500
Subject: [PATCH 07/21] Revert changes is estimator_checks

---
 sklearn/utils/estimator_checks.py | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index da2e189632d46..59d371bad57cd 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -897,19 +897,7 @@ def check_array_api_input(
     X_xp = xp.asarray(X, device=device)
     y_xp = xp.asarray(y, device=device)
 
-    X_types = est._get_tags().get("X_types", [""])
-    if "labels" in X_types[0]:
-        fit_args = (y,)
-        xp_fit_args = (y_xp,)
-        method_arg = (y,)
-        xp_method_arg = (y_xp,)
-    else:
-        fit_args = (X, y)
-        xp_fit_args = (X_xp, y_xp)
-        method_arg = (X,)
-        xp_method_arg = (X_xp,)
-
-    est.fit(*fit_args)
+    est.fit(X, y)
 
     array_attributes = {
         key: value for key, value in vars(est).items() if isinstance(value, np.ndarray)
@@ -917,7 +905,7 @@ def check_array_api_input(
 
     est_xp = clone(est)
     with config_context(array_api_dispatch=True):
-        est_xp.fit(*xp_fit_args)
+        est_xp.fit(X_xp, y_xp)
         input_ns = get_namespace(X_xp)[0].__name__
 
     # Fitted attributes which are arrays must have the same
@@ -972,9 +960,9 @@ def check_array_api_input(
                 assert abs(result - result_xp) < _atol_for_type(X.dtype)
             continue
         else:
-            result = method(*method_arg)
+            result = method(X)
             with config_context(array_api_dispatch=True):
-                result_xp = getattr(est_xp, method_name)(*xp_method_arg)
+                result_xp = getattr(est_xp, method_name)(X_xp)
 
         with config_context(array_api_dispatch=True):
             result_ns = get_namespace(result_xp)[0].__name__

From a21a490976a20153db89a43b52ca7c3ede349b9a Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Fri, 3 May 2024 16:48:26 +0500
Subject: [PATCH 08/21] Improve the tests and handle device in _in1d

---
 sklearn/preprocessing/tests/test_label.py | 55 +++++++++++++++--------
 sklearn/utils/_array_api.py               |  5 ++-
 2 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index 5bf63b21267ac..3369ec080418e 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -2,7 +2,7 @@
 import pytest
 from scipy.sparse import issparse
 
-from sklearn import datasets
+from sklearn import config_context, datasets
 from sklearn.preprocessing._label import (
     LabelBinarizer,
     LabelEncoder,
@@ -11,11 +11,15 @@
     _inverse_binarize_thresholding,
     label_binarize,
 )
-from sklearn.utils._array_api import yield_namespace_device_dtype_combinations
-from sklearn.utils._testing import assert_array_equal, ignore_warnings
-from sklearn.utils.estimator_checks import (
-    _get_check_estimator_ids,
-    check_array_api_input_and_values,
+from sklearn.utils._array_api import (
+    _convert_to_numpy,
+    get_namespace,
+    yield_namespace_device_dtype_combinations,
+)
+from sklearn.utils._testing import (
+    _array_api_for_tests,
+    assert_array_equal,
+    ignore_warnings,
 )
 from sklearn.utils.fixes import (
     COO_CONTAINERS,
@@ -708,17 +712,30 @@ def test_label_encoders_do_not_have_set_output(encoder):
     "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
 )
 @pytest.mark.parametrize(
-    "check",
-    [check_array_api_input_and_values],
-    ids=_get_check_estimator_ids,
-)
-@pytest.mark.parametrize(
-    "estimator",
-    [LabelEncoder()],
-    ids=_get_check_estimator_ids,
+    "y",
+    [
+        np.array([2, 1, 3, 1, 3]),
+        np.array([1, 1, 4, 5, -1, 0]),
+        np.array([3, 5, 9, 5, 9, 3]),
+    ],
 )
-def test_label_encoder_array_api_compliance(
-    estimator, check, array_namespace, device, dtype
-):
-    name = estimator.__class__.__name__
-    check(name, estimator, array_namespace, device=device, dtype_name=dtype)
+def test_label_encoder_array_api_compliance(y, array_namespace, device, dtype):
+    xp = _array_api_for_tests(array_namespace, device)
+    xp_y = xp.asarray(y, device=device)
+    xp_label = LabelEncoder()
+    with config_context(array_api_dispatch=True):
+        xp_label_fit = xp_label.fit(xp_y)
+        xp_transformed = xp_label_fit.transform(xp_y)
+        xp_inv_transformed = xp_label_fit.inverse_transform(xp_transformed)
+        np_label = LabelEncoder()
+        np_label_fit = np_label.fit(y)
+        np_transformed = np_label_fit.transform(y)
+        assert get_namespace(xp_transformed)[0].__name__ == xp.__name__
+        assert get_namespace(xp_inv_transformed)[0].__name__ == xp.__name__
+        assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed)
+        assert_array_equal(_convert_to_numpy(xp_inv_transformed, xp), y)
+
+        xp_transformed = xp_label.fit_transform(xp_y)
+        np_transformed = np_label.fit_transform(y)
+        assert get_namespace(xp_transformed)[0].__name__ == xp.__name__
+        assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed)
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index a4ac7a7fe55a7..458d739d674dc 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -923,15 +923,16 @@ def _in1d(ar1, ar2, xp, assume_unique=False, invert=False):
     present in numpy:
     https://github.com/numpy/numpy/blob/v1.26.0/numpy/lib/arraysetops.py#L524-L758
     """
+    xp, _ = get_namespace(ar1, ar2, xp=xp)
 
     # This code is run to make the code significantly faster
     if ar2.shape[0] < 10 * ar1.shape[0] ** 0.145:
         if invert:
-            mask = xp.ones(ar1.shape[0], dtype=xp.bool)
+            mask = xp.ones(ar1.shape[0], dtype=xp.bool, device=device(ar1))
             for a in ar2:
                 mask &= ar1 != a
         else:
-            mask = xp.zeros(ar1.shape[0], dtype=xp.bool)
+            mask = xp.zeros(ar1.shape[0], dtype=xp.bool, device=device(ar1))
             for a in ar2:
                 mask |= ar1 == a
         return mask

From b09b57bb9937c11d0de46061a4443c2d7d14d2ac Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Fri, 3 May 2024 17:56:22 +0200
Subject: [PATCH 09/21] Fix missing device specification and explicit
 conversion to numpy

---
 sklearn/utils/_array_api.py           | 5 +++--
 sklearn/utils/tests/test_array_api.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 458d739d674dc..1e75bc3dcb26b 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -942,6 +942,7 @@ def _in1d(ar1, ar2, xp, assume_unique=False, invert=False):
         ar2 = xp.unique_values(ar2)
 
     ar = xp.concat((ar1, ar2))
+    device_ = device(ar)
     # We need this to be a stable sort.
     order = ar.argsort(stable=True)
     sar = ar[order]
@@ -949,8 +950,8 @@ def _in1d(ar1, ar2, xp, assume_unique=False, invert=False):
         bool_ar = sar[1:] != sar[:-1]
     else:
         bool_ar = sar[1:] == sar[:-1]
-    flag = xp.concat((bool_ar, xp.asarray([invert])))
-    ret = xp.empty(ar.shape, dtype=xp.bool)
+    flag = xp.concat((bool_ar, xp.asarray([invert], device=device_)))
+    ret = xp.empty(ar.shape, dtype=xp.bool, device=device_)
     ret[order] = flag
 
     if assume_unique:
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 0594363ef50c4..5d23c8c376e75 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -540,4 +540,4 @@ def test_isin(
             invert=invert,
         )
 
-    assert_array_equal(result, expected)
+    assert_array_equal(_convert_to_numpy(result, xp=xp), expected)

From 0544c32052750668f67e794ac7fcec8114d82353 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Fri, 3 May 2024 18:23:35 +0200
Subject: [PATCH 10/21] Fix _isin to work with Array API inputs

---
 sklearn/utils/_array_api.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 1e75bc3dcb26b..54d278323c767 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -944,17 +944,17 @@ def _in1d(ar1, ar2, xp, assume_unique=False, invert=False):
     ar = xp.concat((ar1, ar2))
     device_ = device(ar)
     # We need this to be a stable sort.
-    order = ar.argsort(stable=True)
-    sar = ar[order]
+    order = xp.argsort(ar, stable=True)
+    reverse_order = xp.argsort(order, stable=True)
+    sar = xp.take(ar, order)
     if invert:
         bool_ar = sar[1:] != sar[:-1]
     else:
         bool_ar = sar[1:] == sar[:-1]
     flag = xp.concat((bool_ar, xp.asarray([invert], device=device_)))
-    ret = xp.empty(ar.shape, dtype=xp.bool, device=device_)
-    ret[order] = flag
+    ret = xp.take(flag, reverse_order)
 
     if assume_unique:
-        return ret[: len(ar1)]
+        return ret[: ar1.shape[0]]
     else:
-        return ret[rev_idx]
+        return xp.take(ret, rev_idx)

From a34138bbd631de5b1815d40964d069256cd88c53 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Mon, 6 May 2024 15:30:13 +0500
Subject: [PATCH 11/21] Fix the errors, make searchsorted a helper function

---
 sklearn/preprocessing/_label.py |  8 ++++++--
 sklearn/utils/_array_api.py     | 27 ++++++++++++++-------------
 sklearn/utils/_encode.py        |  5 +++--
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 5a1ca9638c15b..3c439733bfb94 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -17,7 +17,7 @@
 
 from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils import column_or_1d
-from ..utils._array_api import _setdiff1d, get_namespace
+from ..utils._array_api import _setdiff1d, device, get_namespace
 from ..utils._encode import _encode, _unique
 from ..utils._param_validation import Interval, validate_params
 from ..utils.multiclass import type_of_target, unique_labels
@@ -158,7 +158,11 @@ def inverse_transform(self, y):
         if _num_samples(y) == 0:
             return xp.asarray([])
 
-        diff = _setdiff1d(ar1=y, ar2=xp.arange(self.classes_.shape[0]), xp=xp)
+        diff = _setdiff1d(
+            ar1=y,
+            ar2=xp.arange(self.classes_.shape[0], device=device(y)),
+            xp=xp,
+        )
         if diff.shape[0]:
             raise ValueError("y contains previously unseen labels: %s" % str(diff))
         y = xp.asarray(y)
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 54d278323c767..68dbd450803c1 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -302,19 +302,6 @@ def __eq__(self, other):
     def isdtype(self, dtype, kind):
         return isdtype(dtype, kind, xp=self._namespace)
 
-    def searchsorted(self, a, v, *, side="left", sorter=None):
-        # Temporary workaround needed as long as searchsorted is not widely
-        # adopted by implementers of the Array API spec. This is a quite
-        # recent addition to the spec:
-        # https://data-apis.org/array-api/latest/API_specification/generated/array_api.searchsorted.html # noqa
-        if hasattr(self._namespace, "searchsorted"):
-            return self._namespace.searchsorted(a, v, side=side, sorter=sorter)
-
-        a = _convert_to_numpy(a, xp=self._namespace)
-        v = _convert_to_numpy(v, xp=self._namespace)
-        indices = numpy.searchsorted(a, v, side=side, sorter=sorter)
-        return self._namespace.asarray(indices, device=device(a))
-
 
 def _check_device_cpu(device):  # noqa
     if device not in {"cpu", None}:
@@ -856,6 +843,20 @@ def indexing_dtype(xp):
     return xp.asarray(0).dtype
 
 
+def _searchsorted(xp, a, v, *, side="left", sorter=None):
+    # Temporary workaround needed as long as searchsorted is not widely
+    # adopted by implementers of the Array API spec. This is a quite
+    # recent addition to the spec:
+    # https://data-apis.org/array-api/latest/API_specification/generated/array_api.searchsorted.html # noqa
+    if hasattr(xp, "searchsorted"):
+        return xp.searchsorted(a, v, side=side, sorter=sorter)
+
+    a_np = _convert_to_numpy(a, xp=xp)
+    v_np = _convert_to_numpy(v, xp=xp)
+    indices = numpy.searchsorted(a_np, v_np, side=side, sorter=sorter)
+    return xp.asarray(indices, device=device(a))
+
+
 def _setdiff1d(ar1, ar2, xp, assume_unique=False):
     """Find the set difference of two arrays.
 
diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py
index 2c1a4fe73e517..a0f26aca9ad54 100644
--- a/sklearn/utils/_encode.py
+++ b/sklearn/utils/_encode.py
@@ -7,6 +7,7 @@
 from ._array_api import (
     _is_numpy_namespace,
     _isin,
+    _searchsorted,
     _setdiff1d,
     device,
     get_namespace,
@@ -74,7 +75,7 @@ def _unique_np(values, return_inverse=False, return_counts=False):
     # np.unique will have duplicate missing values at the end of `uniques`
     # here we clip the nans and remove it from uniques
     if uniques.size and is_scalar_nan(uniques[-1]):
-        nan_idx = xp.searchsorted(uniques, xp.nan)
+        nan_idx = _searchsorted(xp, uniques, xp.nan)
         uniques = uniques[: nan_idx + 1]
         if return_inverse:
             inverse[inverse > nan_idx] = nan_idx
@@ -247,7 +248,7 @@ def _encode(values, *, uniques, check_unknown=True):
             diff = _check_unknown(values, uniques)
             if diff:
                 raise ValueError(f"y contains previously unseen labels: {str(diff)}")
-        return xp.searchsorted(uniques, values)
+        return _searchsorted(xp, uniques, values)
 
 
 def _check_unknown(values, known_values, return_mask=False):

From beb036a74b82e41789d82039e4ef3a8e34243d87 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Mon, 6 May 2024 17:44:20 +0500
Subject: [PATCH 12/21] Add array_api_support tag

---
 sklearn/preprocessing/_label.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 3c439733bfb94..2db1486c0251d 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -169,7 +169,7 @@ def inverse_transform(self, y):
         return xp.take(self.classes_, y, axis=0)
 
     def _more_tags(self):
-        return {"X_types": ["1dlabels"]}
+        return {"X_types": ["1dlabels"], "array_api_support": True}
 
 
 class LabelBinarizer(TransformerMixin, BaseEstimator, auto_wrap_output_keys=None):

From 34c2d9201506501b3e2f24fc3a9590d3cf563bf0 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Tue, 7 May 2024 11:18:48 +0500
Subject: [PATCH 13/21] Updates: according to some pr suggestions

---
 sklearn/utils/_array_api.py |  6 +++---
 sklearn/utils/_encode.py    | 38 +++++++++++++++++++------------------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 68dbd450803c1..78a1d1023c355 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -947,15 +947,15 @@ def _in1d(ar1, ar2, xp, assume_unique=False, invert=False):
     # We need this to be a stable sort.
     order = xp.argsort(ar, stable=True)
     reverse_order = xp.argsort(order, stable=True)
-    sar = xp.take(ar, order)
+    sar = xp.take(ar, order, axis=0)
     if invert:
         bool_ar = sar[1:] != sar[:-1]
     else:
         bool_ar = sar[1:] == sar[:-1]
     flag = xp.concat((bool_ar, xp.asarray([invert], device=device_)))
-    ret = xp.take(flag, reverse_order)
+    ret = xp.take(flag, reverse_order, axis=0)
 
     if assume_unique:
         return ret[: ar1.shape[0]]
     else:
-        return xp.take(ret, rev_idx)
+        return xp.take(ret, rev_idx, axis=0)
diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py
index a0f26aca9ad54..7e768604de86e 100644
--- a/sklearn/utils/_encode.py
+++ b/sklearn/utils/_encode.py
@@ -198,6 +198,20 @@ def _unique_python(values, *, return_inverse, return_counts):
     return ret[0] if len(ret) == 1 else ret
 
 
+def _is_array_of_numeric_dtype(arr, xp, is_array_api_compliant):
+    if is_array_api_compliant and not _is_numpy_namespace(xp=xp):
+        try:
+            dtype = arr.dtype
+            dtype_kind = dtype.kind if hasattr(dtype, "kind") else dtype
+            numeric_dtype = xp.isdtype(dtype=dtype, kind=dtype_kind)
+        except ValueError:
+            numeric_dtype = False
+    else:
+        numeric_dtype = arr.dtype.kind not in "OUS"
+
+    return numeric_dtype
+
+
 def _encode(values, *, uniques, check_unknown=True):
     """Helper function to encode values into [0, n_uniques - 1].
 
@@ -228,15 +242,9 @@ def _encode(values, *, uniques, check_unknown=True):
         Encoded values
     """
     xp, is_array_api_compliant = get_namespace(values, uniques)
-    if is_array_api_compliant and not _is_numpy_namespace(xp=xp):
-        try:
-            dtype = values.dtype
-            dtype_kind = dtype.kind if hasattr(dtype, "kind") else dtype
-            numeric_dtype = xp.isdtype(dtype=dtype, kind=dtype_kind)
-        except ValueError:
-            numeric_dtype = False
-    else:
-        numeric_dtype = values.dtype.kind not in "OUS"
+    numeric_dtype = _is_array_of_numeric_dtype(
+        arr=values, xp=xp, is_array_api_compliant=is_array_api_compliant
+    )
 
     if not numeric_dtype:
         try:
@@ -278,15 +286,9 @@ def _check_unknown(values, known_values, return_mask=False):
     """
     xp, is_array_api_compliant = get_namespace(values, known_values)
     valid_mask = None
-    if is_array_api_compliant and not _is_numpy_namespace(xp=xp):
-        try:
-            dtype = values.dtype
-            dtype_kind = dtype.kind if hasattr(dtype, "kind") else dtype
-            numeric_dtype = xp.isdtype(dtype=dtype, kind=dtype_kind)
-        except ValueError:
-            numeric_dtype = False
-    else:
-        numeric_dtype = values.dtype.kind not in "OUS"
+    numeric_dtype = _is_array_of_numeric_dtype(
+        arr=values, xp=xp, is_array_api_compliant=is_array_api_compliant
+    )
 
     if not numeric_dtype:
         values_set = set(values)

From db32acf9b0a1fd0a3f1f41ab18841129c392e07d Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 7 May 2024 11:40:00 +0200
Subject: [PATCH 14/21] Use xp.isdtype(values.dtype, "numeric") directly

---
 sklearn/utils/_encode.py | 30 ++++--------------------------
 1 file changed, 4 insertions(+), 26 deletions(-)

diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py
index 7e768604de86e..3fd4d45f522e6 100644
--- a/sklearn/utils/_encode.py
+++ b/sklearn/utils/_encode.py
@@ -5,7 +5,6 @@
 import numpy as np
 
 from ._array_api import (
-    _is_numpy_namespace,
     _isin,
     _searchsorted,
     _setdiff1d,
@@ -198,20 +197,6 @@ def _unique_python(values, *, return_inverse, return_counts):
     return ret[0] if len(ret) == 1 else ret
 
 
-def _is_array_of_numeric_dtype(arr, xp, is_array_api_compliant):
-    if is_array_api_compliant and not _is_numpy_namespace(xp=xp):
-        try:
-            dtype = arr.dtype
-            dtype_kind = dtype.kind if hasattr(dtype, "kind") else dtype
-            numeric_dtype = xp.isdtype(dtype=dtype, kind=dtype_kind)
-        except ValueError:
-            numeric_dtype = False
-    else:
-        numeric_dtype = arr.dtype.kind not in "OUS"
-
-    return numeric_dtype
-
-
 def _encode(values, *, uniques, check_unknown=True):
     """Helper function to encode values into [0, n_uniques - 1].
 
@@ -241,12 +226,8 @@ def _encode(values, *, uniques, check_unknown=True):
     encoded : ndarray
         Encoded values
     """
-    xp, is_array_api_compliant = get_namespace(values, uniques)
-    numeric_dtype = _is_array_of_numeric_dtype(
-        arr=values, xp=xp, is_array_api_compliant=is_array_api_compliant
-    )
-
-    if not numeric_dtype:
+    xp, _ = get_namespace(values, uniques)
+    if not xp.isdtype(values.dtype, "numeric"):
         try:
             return _map_to_integer(values, uniques)
         except KeyError as e:
@@ -284,13 +265,10 @@ def _check_unknown(values, known_values, return_mask=False):
         Additionally returned if ``return_mask=True``.
 
     """
-    xp, is_array_api_compliant = get_namespace(values, known_values)
+    xp, _ = get_namespace(values, known_values)
     valid_mask = None
-    numeric_dtype = _is_array_of_numeric_dtype(
-        arr=values, xp=xp, is_array_api_compliant=is_array_api_compliant
-    )
 
-    if not numeric_dtype:
+    if not xp.isdtype(values.dtype, "numeric"):
         values_set = set(values)
         values_set, missing_in_values = _extract_missing(values_set)
 

From a5934783555abe159a6f5d69efb91384d7fb845b Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Tue, 7 May 2024 14:45:39 +0500
Subject: [PATCH 15/21] Update changelog

---
 doc/whats_new/v1.4.rst |  4 ----
 doc/whats_new/v1.6.rst | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 5f60aefa0d95e..7865ff38adb79 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -926,10 +926,6 @@ Changelog
   `transform` without calling `fit` since `categories` always requires to be checked.
   :pr:`27821` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-- |Enhancement| :class:`preprocessing.LabelEncoder` now supports the
-  `Array API <https://data-apis.org/array-api/latest/>`_. See :ref:`array_api`
-  for more details. :pr:`27381` by :user:`Omar Salman <OmarManzoor>`.
-
 :mod:`sklearn.tree`
 ...................
 
diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst
index b90394c75b6ff..70a8fafb9ba12 100644
--- a/doc/whats_new/v1.6.rst
+++ b/doc/whats_new/v1.6.rst
@@ -22,6 +22,22 @@ Version 1.6.0
 
 **In Development**
 
+Support for Array API
+---------------------
+
+Additional estimators and functions have been updated to include support for all
+`Array API <https://data-apis.org/array-api/latest/>`_ compliant inputs.
+
+See :ref:`array_api` for more details.
+
+**Functions:**
+
+**Classes:**
+
+- :class:`preprocessing.LabelEncoder` now supports the Array API
+  `Array API <https://data-apis.org/array-api/latest/>`_. See :ref:`array_api`
+  for more details. :pr:`27381` by :user:`Omar Salman <OmarManzoor>`.
+
 Changelog
 ---------
 

From 22fa6118b967a7dd13057723ab3148c93115f2eb Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Tue, 7 May 2024 15:08:03 +0500
Subject: [PATCH 16/21] Update docstring for inverse transform

---
 sklearn/preprocessing/_label.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 2db1486c0251d..d9023b3301e54 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -143,12 +143,12 @@ def inverse_transform(self, y):
 
         Parameters
         ----------
-        y : ndarray of shape (n_samples,)
+        y : array-like of shape (n_samples,)
             Target values.
 
         Returns
         -------
-        y : ndarray of shape (n_samples,)
+        y : array-like of shape (n_samples,)
             Original encoding.
         """
         check_is_fitted(self)

From f8144410ebd0276d17d70c9a51a41c10a2efa4c5 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Tue, 7 May 2024 21:16:45 +0500
Subject: [PATCH 17/21] Change array-like to array

---
 sklearn/preprocessing/_label.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index d9023b3301e54..54ff93e4f8f59 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -87,7 +87,7 @@ def fit(self, y):
 
         Parameters
         ----------
-        y : array-like of shape (n_samples,)
+        y : array of shape (n_samples,)
             Target values.
 
         Returns
@@ -104,12 +104,12 @@ def fit_transform(self, y):
 
         Parameters
         ----------
-        y : array-like of shape (n_samples,)
+        y : array of shape (n_samples,)
             Target values.
 
         Returns
         -------
-        y : array-like of shape (n_samples,)
+        y : array of shape (n_samples,)
             Encoded labels.
         """
         y = column_or_1d(y, warn=True)
@@ -121,12 +121,12 @@ def transform(self, y):
 
         Parameters
         ----------
-        y : array-like of shape (n_samples,)
+        y : array of shape (n_samples,)
             Target values.
 
         Returns
         -------
-        y : array-like of shape (n_samples,)
+        y : array of shape (n_samples,)
             Labels as normalized encodings.
         """
         check_is_fitted(self)
@@ -143,12 +143,12 @@ def inverse_transform(self, y):
 
         Parameters
         ----------
-        y : array-like of shape (n_samples,)
+        y : array of shape (n_samples,)
             Target values.
 
         Returns
         -------
-        y : array-like of shape (n_samples,)
+        y : array of shape (n_samples,)
             Original encoding.
         """
         check_is_fitted(self)

From 8ce860d5e69ce43ca6d4b975cde456afc37c791b Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Wed, 8 May 2024 15:57:52 +0500
Subject: [PATCH 18/21] Update the changelog definition to make it consistent

---
 doc/whats_new/v1.6.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst
index c6f049facff49..63c261497a3cc 100644
--- a/doc/whats_new/v1.6.rst
+++ b/doc/whats_new/v1.6.rst
@@ -38,9 +38,8 @@ See :ref:`array_api` for more details.
 
 **Classes:**
 
-- :class:`preprocessing.LabelEncoder` now supports the Array API
-  `Array API <https://data-apis.org/array-api/latest/>`_. See :ref:`array_api`
-  for more details. :pr:`27381` by :user:`Omar Salman <OmarManzoor>`.
+- :class:`preprocessing.LabelEncoder` now supports Array API compatible inputs.
+  :pr:`27381` by :user:`Omar Salman <OmarManzoor>`.
 
 Changelog
 ---------

From fae25aa4c3f79219c5d453bb83931f8e9cb42010 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Fri, 10 May 2024 22:55:05 +0500
Subject: [PATCH 19/21] Revert and update parameter and return type names

---
 sklearn/preprocessing/_label.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 54ff93e4f8f59..ecf0c400a2c2f 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -87,7 +87,7 @@ def fit(self, y):
 
         Parameters
         ----------
-        y : array of shape (n_samples,)
+        y : array-like of shape (n_samples,)
             Target values.
 
         Returns
@@ -104,12 +104,12 @@ def fit_transform(self, y):
 
         Parameters
         ----------
-        y : array of shape (n_samples,)
+        y : array-like of shape (n_samples,)
             Target values.
 
         Returns
         -------
-        y : array of shape (n_samples,)
+        y : array-like of shape (n_samples,)
             Encoded labels.
         """
         y = column_or_1d(y, warn=True)
@@ -121,12 +121,12 @@ def transform(self, y):
 
         Parameters
         ----------
-        y : array of shape (n_samples,)
+        y : array-like of shape (n_samples,)
             Target values.
 
         Returns
         -------
-        y : array of shape (n_samples,)
+        y : array-like of shape (n_samples,)
             Labels as normalized encodings.
         """
         check_is_fitted(self)
@@ -143,12 +143,12 @@ def inverse_transform(self, y):
 
         Parameters
         ----------
-        y : array of shape (n_samples,)
+        y : array-like of shape (n_samples,)
             Target values.
 
         Returns
         -------
-        y : array of shape (n_samples,)
+        y : ndarray of shape (n_samples,)
             Original encoding.
         """
         check_is_fitted(self)

From dbf233a2f6aabf983cda035f179848ca1467e917 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Thu, 16 May 2024 14:22:18 +0500
Subject: [PATCH 20/21] Updates: Address further PR suggestions

---
 sklearn/preprocessing/tests/test_label.py | 13 +++++++++----
 sklearn/utils/tests/test_array_api.py     |  3 +--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index 3369ec080418e..c7206f2f4b8a0 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -722,20 +722,25 @@ def test_label_encoders_do_not_have_set_output(encoder):
 def test_label_encoder_array_api_compliance(y, array_namespace, device, dtype):
     xp = _array_api_for_tests(array_namespace, device)
     xp_y = xp.asarray(y, device=device)
-    xp_label = LabelEncoder()
     with config_context(array_api_dispatch=True):
-        xp_label_fit = xp_label.fit(xp_y)
-        xp_transformed = xp_label_fit.transform(xp_y)
-        xp_inv_transformed = xp_label_fit.inverse_transform(xp_transformed)
+        xp_label = LabelEncoder()
+        xp_label = xp_label.fit(xp_y)
+        xp_transformed = xp_label.transform(xp_y)
+        xp_inv_transformed = xp_label.inverse_transform(xp_transformed)
         np_label = LabelEncoder()
         np_label_fit = np_label.fit(y)
         np_transformed = np_label_fit.transform(y)
         assert get_namespace(xp_transformed)[0].__name__ == xp.__name__
         assert get_namespace(xp_inv_transformed)[0].__name__ == xp.__name__
+        assert get_namespace(xp_label.classes_)[0].__name__ == xp.__name__
         assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed)
         assert_array_equal(_convert_to_numpy(xp_inv_transformed, xp), y)
+        assert_array_equal(_convert_to_numpy(xp_label.classes_, xp), np_label.classes_)
 
+        xp_label = LabelEncoder()
         xp_transformed = xp_label.fit_transform(xp_y)
         np_transformed = np_label.fit_transform(y)
         assert get_namespace(xp_transformed)[0].__name__ == xp.__name__
+        assert get_namespace(xp_label.classes_)[0].__name__ == xp.__name__
         assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed)
+        assert_array_equal(_convert_to_numpy(xp_label.classes_, xp), np_label.classes_)
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 5d23c8c376e75..30fc88c539fc8 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -2,7 +2,6 @@
 from functools import partial
 
 import numpy
-import numpy as np
 import pytest
 from numpy.testing import assert_allclose
 
@@ -522,7 +521,7 @@ def test_isin(
     xp = _array_api_for_tests(array_namespace, device)
     r = element_size // 2
     element = 2 * numpy.arange(element_size).reshape((r, 2)).astype(int_dtype)
-    test_elements = numpy.array(np.arange(14), dtype=int_dtype)
+    test_elements = numpy.array(numpy.arange(14), dtype=int_dtype)
     element_xp = xp.asarray(element, device=device)
     test_elements_xp = xp.asarray(test_elements, device=device)
     expected = numpy.isin(

From 7500c2f4b82731c75cff3852469f910d35a5e552 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Thu, 16 May 2024 14:23:53 +0500
Subject: [PATCH 21/21] Minor adjustment

---
 sklearn/preprocessing/tests/test_label.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index c7206f2f4b8a0..90e3aa210eebb 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -724,12 +724,12 @@ def test_label_encoder_array_api_compliance(y, array_namespace, device, dtype):
     xp_y = xp.asarray(y, device=device)
     with config_context(array_api_dispatch=True):
         xp_label = LabelEncoder()
+        np_label = LabelEncoder()
         xp_label = xp_label.fit(xp_y)
         xp_transformed = xp_label.transform(xp_y)
         xp_inv_transformed = xp_label.inverse_transform(xp_transformed)
-        np_label = LabelEncoder()
-        np_label_fit = np_label.fit(y)
-        np_transformed = np_label_fit.transform(y)
+        np_label = np_label.fit(y)
+        np_transformed = np_label.transform(y)
         assert get_namespace(xp_transformed)[0].__name__ == xp.__name__
         assert get_namespace(xp_inv_transformed)[0].__name__ == xp.__name__
         assert get_namespace(xp_label.classes_)[0].__name__ == xp.__name__
@@ -738,6 +738,7 @@ def test_label_encoder_array_api_compliance(y, array_namespace, device, dtype):
         assert_array_equal(_convert_to_numpy(xp_label.classes_, xp), np_label.classes_)
 
         xp_label = LabelEncoder()
+        np_label = LabelEncoder()
         xp_transformed = xp_label.fit_transform(xp_y)
         np_transformed = np_label.fit_transform(y)
         assert get_namespace(xp_transformed)[0].__name__ == xp.__name__