scikit-learn · jeremiedbb · Apr 10, 2024 · Mar 28, 2024 · Mar 28, 2024 · Mar 29, 2024
diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
@@ -220,6 +220,13 @@ Changelog
   by passing a function in place of a strategy name.
   :pr:`28053` by :user:`Mark Elliot <mark-thm>`.
 
+:mod:`sklearn.inspection`
+.........................
+
+- |Fix| :meth:`inspection.DecisionBoundaryDisplay.from_estimator` no longer
+  warns about missing feature names when provided a `polars.DataFrame`.
+  :pr:`28718` by :user:`Patrick Wang <patrickkwang>`.
+
 :mod:`sklearn.linear_model`
 ...........................
 

diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py
@@ -5,8 +5,11 @@
 from ...utils import _safe_indexing
 from ...utils._optional_dependencies import check_matplotlib_support
 from ...utils._response import _get_response_values
+from ...utils._set_output import _get_adapter_from_container
 from ...utils.validation import (
     _is_arraylike_not_scalar,
+    _is_pandas_df,
+    _is_polars_df,
     _num_features,
     check_is_fitted,
 )
@@ -345,13 +348,15 @@ def from_estimator(
             np.linspace(x0_min, x0_max, grid_resolution),
             np.linspace(x1_min, x1_max, grid_resolution),
         )
-        if hasattr(X, "iloc"):
-            # we need to preserve the feature names and therefore get an empty dataframe
-            X_grid = X.iloc[[], :].copy()
-            X_grid.iloc[:, 0] = xx0.ravel()
-            X_grid.iloc[:, 1] = xx1.ravel()
-        else:
-            X_grid = np.c_[xx0.ravel(), xx1.ravel()]
+
+        X_grid = np.c_[xx0.ravel(), xx1.ravel()]
+        if _is_pandas_df(X) or _is_polars_df(X):
+            adapter = _get_adapter_from_container(X)
+            X_grid = adapter.create_container(
+                X_grid,
+                X_grid,
+                columns=X.columns,
+            )
 
         prediction_method = _check_boundary_response_method(
             estimator, response_method, class_of_interest

diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py
@@ -17,6 +17,7 @@
 from sklearn.preprocessing import scale
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.utils._testing import (
+    _convert_container,
     assert_allclose,
     assert_array_equal,
 )
@@ -468,15 +469,18 @@ def test_string_target(pyplot):
     )
 
 
-def test_dataframe_support(pyplot):
+@pytest.mark.parametrize("constructor_name", ["pandas", "polars"])
+def test_dataframe_support(pyplot, constructor_name):
     """Check that passing a dataframe at fit and to the Display does not
     raise warnings.
 
     Non-regression test for:
-    https://github.com/scikit-learn/scikit-learn/issues/23311
+    * https://github.com/scikit-learn/scikit-learn/issues/23311
+    * https://github.com/scikit-learn/scikit-learn/issues/28717
     """
-    pd = pytest.importorskip("pandas")
-    df = pd.DataFrame(X, columns=["col_x", "col_y"])
+    df = _convert_container(
+        X, constructor_name=constructor_name, columns_name=["col_x", "col_y"]
+    )
     estimator = LogisticRegression().fit(df, y)
 
     with warnings.catch_warnings():

diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
@@ -4,7 +4,10 @@
 
 from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils._param_validation import StrOptions
-from ..utils._set_output import ADAPTERS_MANAGER, _get_output_config
+from ..utils._set_output import (
+    _get_adapter_from_container,
+    _get_output_config,
+)
 from ..utils.metaestimators import available_if
 from ..utils.validation import (
     _allclose_dense_sparse,
@@ -16,24 +19,6 @@
 )
 
 
-def _get_adapter_from_container(container):
-    """Get the adapter that nows how to handle such container.
-
-    See :class:`sklearn.utils._set_output.ContainerAdapterProtocol` for more
-    details.
-    """
-    module_name = container.__class__.__module__.split(".")[0]
-    try:
-        return ADAPTERS_MANAGER.adapters[module_name]
-    except KeyError as exc:
-        available_adapters = list(ADAPTERS_MANAGER.adapters.keys())
-        raise ValueError(
-            "The container does not have a registered adapter in scikit-learn. "
-            f"Available adapters are: {available_adapters} while the container "
-            f"provided is: {container!r}."
-        ) from exc
-
-
 def _identity(X):
     """The identity function."""
     return X

diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py
@@ -5,7 +5,6 @@
 
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import FunctionTransformer, StandardScaler
-from sklearn.preprocessing._function_transformer import _get_adapter_from_container
 from sklearn.utils._testing import (
     _convert_container,
     assert_allclose_dense_sparse,
@@ -14,17 +13,6 @@
 from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
 
 
-def test_get_adapter_from_container():
-    """Check the behavior fo `_get_adapter_from_container`."""
-    pd = pytest.importorskip("pandas")
-    X = pd.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]})
-    adapter = _get_adapter_from_container(X)
-    assert adapter.container_lib == "pandas"
-    err_msg = "The container does not have a registered adapter in scikit-learn."
-    with pytest.raises(ValueError, match=err_msg):
-        _get_adapter_from_container(X.to_numpy())
-
-
 def _make_func(args_store, kwargs_store, func=lambda X, *a, **k: X):
     def _func(X, *args, **kwargs):
         args_store.append(X)

diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py
@@ -197,6 +197,24 @@ def register(self, adapter):
 ADAPTERS_MANAGER.register(PolarsAdapter())
 
 
+def _get_adapter_from_container(container):
+    """Get the adapter that knows how to handle such container.
+
+    See :class:`sklearn.utils._set_output.ContainerAdapterProtocol` for more
+    details.
+    """
+    module_name = container.__class__.__module__.split(".")[0]
+    try:
+        return ADAPTERS_MANAGER.adapters[module_name]
+    except KeyError as exc:
+        available_adapters = list(ADAPTERS_MANAGER.adapters.keys())
+        raise ValueError(
+            "The container does not have a registered adapter in scikit-learn. "
+            f"Available adapters are: {available_adapters} while the container "
+            f"provided is: {container!r}."
+        ) from exc
+
+
 def _get_container_adapter(method, estimator=None):
     """Get container adapter."""
     dense_config = _get_output_config(method, estimator)["dense"]

diff --git a/sklearn/utils/tests/test_set_output.py b/sklearn/utils/tests/test_set_output.py
@@ -10,6 +10,7 @@
 from sklearn.utils._set_output import (
     ADAPTERS_MANAGER,
     ContainerAdapterProtocol,
+    _get_adapter_from_container,
     _get_output_config,
     _safe_set_output,
     _SetOutputMixin,
@@ -450,3 +451,14 @@ def patched_import_module(name):
     msg = "Setting output container to 'pandas' requires"
     with pytest.raises(ImportError, match=msg):
         check_library_installed("pandas")
+
+
+def test_get_adapter_from_container():
+    """Check the behavior fo `_get_adapter_from_container`."""
+    pd = pytest.importorskip("pandas")
+    X = pd.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]})
+    adapter = _get_adapter_from_container(X)
+    assert adapter.container_lib == "pandas"
+    err_msg = "The container does not have a registered adapter in scikit-learn."
+    with pytest.raises(ValueError, match=err_msg):
+        _get_adapter_from_container(X.to_numpy())