Thanks to visit codestin.com
Credit goes to github.com

Skip to content

FIX warning using polars DataFrames in DecisionBoundaryDisplay.from_estimator #28718

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Apr 10, 2024
Merged
7 changes: 7 additions & 0 deletions doc/whats_new/v1.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,13 @@ Changelog
by passing a function in place of a strategy name.
:pr:`28053` by :user:`Mark Elliot <mark-thm>`.

:mod:`sklearn.inspection`
.........................

- |Fix| :meth:`inspection.DecisionBoundaryDisplay.from_estimator` no longer
warns about missing feature names when provided a `polars.DataFrame`.
:pr:`28718` by :user:`Patrick Wang <patrickkwang>`.

:mod:`sklearn.linear_model`
...........................

Expand Down
19 changes: 12 additions & 7 deletions sklearn/inspection/_plot/decision_boundary.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
from ...utils import _safe_indexing
from ...utils._optional_dependencies import check_matplotlib_support
from ...utils._response import _get_response_values
from ...utils._set_output import _get_adapter_from_container
from ...utils.validation import (
_is_arraylike_not_scalar,
_is_pandas_df,
_is_polars_df,
_num_features,
check_is_fitted,
)
Expand Down Expand Up @@ -345,13 +348,15 @@ def from_estimator(
np.linspace(x0_min, x0_max, grid_resolution),
np.linspace(x1_min, x1_max, grid_resolution),
)
if hasattr(X, "iloc"):
# we need to preserve the feature names and therefore get an empty dataframe
X_grid = X.iloc[[], :].copy()
X_grid.iloc[:, 0] = xx0.ravel()
X_grid.iloc[:, 1] = xx1.ravel()
else:
X_grid = np.c_[xx0.ravel(), xx1.ravel()]

X_grid = np.c_[xx0.ravel(), xx1.ravel()]
if _is_pandas_df(X) or _is_polars_df(X):
adapter = _get_adapter_from_container(X)
X_grid = adapter.create_container(
X_grid,
X_grid,
columns=X.columns,
)

prediction_method = _check_boundary_response_method(
estimator, response_method, class_of_interest
Expand Down
12 changes: 8 additions & 4 deletions sklearn/inspection/_plot/tests/test_boundary_decision_display.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from sklearn.preprocessing import scale
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.utils._testing import (
_convert_container,
assert_allclose,
assert_array_equal,
)
Expand Down Expand Up @@ -468,15 +469,18 @@ def test_string_target(pyplot):
)


def test_dataframe_support(pyplot):
@pytest.mark.parametrize("constructor_name", ["pandas", "polars"])
def test_dataframe_support(pyplot, constructor_name):
"""Check that passing a dataframe at fit and to the Display does not
raise warnings.

Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/23311
* https://github.com/scikit-learn/scikit-learn/issues/23311
* https://github.com/scikit-learn/scikit-learn/issues/28717
"""
pd = pytest.importorskip("pandas")
df = pd.DataFrame(X, columns=["col_x", "col_y"])
df = _convert_container(
X, constructor_name=constructor_name, columns_name=["col_x", "col_y"]
)
estimator = LogisticRegression().fit(df, y)

with warnings.catch_warnings():
Expand Down
23 changes: 4 additions & 19 deletions sklearn/preprocessing/_function_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@

from ..base import BaseEstimator, TransformerMixin, _fit_context
from ..utils._param_validation import StrOptions
from ..utils._set_output import ADAPTERS_MANAGER, _get_output_config
from ..utils._set_output import (
_get_adapter_from_container,
_get_output_config,
)
from ..utils.metaestimators import available_if
from ..utils.validation import (
_allclose_dense_sparse,
Expand All @@ -16,24 +19,6 @@
)


def _get_adapter_from_container(container):
"""Get the adapter that nows how to handle such container.

See :class:`sklearn.utils._set_output.ContainerAdapterProtocol` for more
details.
"""
module_name = container.__class__.__module__.split(".")[0]
try:
return ADAPTERS_MANAGER.adapters[module_name]
except KeyError as exc:
available_adapters = list(ADAPTERS_MANAGER.adapters.keys())
raise ValueError(
"The container does not have a registered adapter in scikit-learn. "
f"Available adapters are: {available_adapters} while the container "
f"provided is: {container!r}."
) from exc


def _identity(X):
"""The identity function."""
return X
Expand Down
12 changes: 0 additions & 12 deletions sklearn/preprocessing/tests/test_function_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer, StandardScaler
from sklearn.preprocessing._function_transformer import _get_adapter_from_container
from sklearn.utils._testing import (
_convert_container,
assert_allclose_dense_sparse,
Expand All @@ -14,17 +13,6 @@
from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS


def test_get_adapter_from_container():
"""Check the behavior fo `_get_adapter_from_container`."""
pd = pytest.importorskip("pandas")
X = pd.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]})
adapter = _get_adapter_from_container(X)
assert adapter.container_lib == "pandas"
err_msg = "The container does not have a registered adapter in scikit-learn."
with pytest.raises(ValueError, match=err_msg):
_get_adapter_from_container(X.to_numpy())


def _make_func(args_store, kwargs_store, func=lambda X, *a, **k: X):
def _func(X, *args, **kwargs):
args_store.append(X)
Expand Down
18 changes: 18 additions & 0 deletions sklearn/utils/_set_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,24 @@ def register(self, adapter):
ADAPTERS_MANAGER.register(PolarsAdapter())


def _get_adapter_from_container(container):
"""Get the adapter that knows how to handle such container.

See :class:`sklearn.utils._set_output.ContainerAdapterProtocol` for more
details.
"""
module_name = container.__class__.__module__.split(".")[0]
try:
return ADAPTERS_MANAGER.adapters[module_name]
except KeyError as exc:
available_adapters = list(ADAPTERS_MANAGER.adapters.keys())
raise ValueError(
"The container does not have a registered adapter in scikit-learn. "
f"Available adapters are: {available_adapters} while the container "
f"provided is: {container!r}."
) from exc


def _get_container_adapter(method, estimator=None):
"""Get container adapter."""
dense_config = _get_output_config(method, estimator)["dense"]
Expand Down
12 changes: 12 additions & 0 deletions sklearn/utils/tests/test_set_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from sklearn.utils._set_output import (
ADAPTERS_MANAGER,
ContainerAdapterProtocol,
_get_adapter_from_container,
_get_output_config,
_safe_set_output,
_SetOutputMixin,
Expand Down Expand Up @@ -450,3 +451,14 @@ def patched_import_module(name):
msg = "Setting output container to 'pandas' requires"
with pytest.raises(ImportError, match=msg):
check_library_installed("pandas")


def test_get_adapter_from_container():
"""Check the behavior fo `_get_adapter_from_container`."""
pd = pytest.importorskip("pandas")
X = pd.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]})
adapter = _get_adapter_from_container(X)
assert adapter.container_lib == "pandas"
err_msg = "The container does not have a registered adapter in scikit-learn."
with pytest.raises(ValueError, match=err_msg):
_get_adapter_from_container(X.to_numpy())
Loading