Thanks to visit codestin.com
Credit goes to github.com

Skip to content
3 changes: 3 additions & 0 deletions doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- Fixed an issue where passing `sample_weight` to a :class:`Pipeline` inside a
:class:`GridSearchCV` would raise an error with metadata routing enabled.
By `Adrin Jalali`_.
28 changes: 23 additions & 5 deletions sklearn/linear_model/_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
_rescale_data,
)
from sklearn.linear_model._sag import sag_solver
from sklearn.metrics import check_scoring, get_scorer_names
from sklearn.metrics import check_scoring, get_scorer, get_scorer_names
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils import (
Expand Down Expand Up @@ -1359,6 +1359,12 @@ def __sklearn_tags__(self):
tags.classifier_tags.multi_label = True
return tags

def _get_scorer_instance(self):
"""Return a scorer which corresponds to what's defined in ClassiferMixin
parent class. This is used for routing `sample_weight`.
"""
return get_scorer("accuracy")


class RidgeClassifier(_RidgeClassifierMixin, _BaseRidge):
"""Classifier using Ridge regression.
Expand Down Expand Up @@ -2499,7 +2505,7 @@ def get_metadata_routing(self):
MetadataRouter(owner=self.__class__.__name__)
.add_self_request(self)
.add(
scorer=self.scoring,
scorer=self._get_scorer(),
method_mapping=MethodMapping().add(caller="fit", callee="score"),
)
.add(
Expand All @@ -2510,14 +2516,20 @@ def get_metadata_routing(self):
return router

def _get_scorer(self):
scorer = check_scoring(estimator=self, scoring=self.scoring, allow_none=True)
"""Make sure the scorer is weighted if necessary.

This uses `self._get_scorer_instance()` implemented in child objects to get the
raw scorer instance of the estimator, which will be ignored if `self.scoring` is
not None.
"""
if _routing_enabled() and self.scoring is None:
# This estimator passes an array of 1s as sample_weight even if
# sample_weight is not provided by the user. Therefore we need to
# always request it. But we don't set it if it's passed explicitly
# by the user.
scorer.set_score_request(sample_weight=True)
return scorer
return self._get_scorer_instance().set_score_request(sample_weight=True)

return check_scoring(estimator=self, scoring=self.scoring, allow_none=True)

def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
Expand Down Expand Up @@ -2707,6 +2719,12 @@ def fit(self, X, y, sample_weight=None, **params):
super().fit(X, y, sample_weight=sample_weight, **params)
return self

def _get_scorer_instance(self):
"""Return a scorer which corresponds to what's defined in RegressorMixin
parent class. This is used for routing `sample_weight`.
"""
return get_scorer("r2")


class RidgeClassifierCV(_RidgeClassifierMixin, _BaseRidgeCV):
"""Ridge classifier with built-in cross-validation.
Expand Down
38 changes: 1 addition & 37 deletions sklearn/metrics/_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,17 +489,6 @@ class _PassthroughScorer(_MetadataRequester):
def __init__(self, estimator):
self._estimator = estimator

requests = MetadataRequest(owner=self.__class__.__name__)
try:
requests.score = copy.deepcopy(estimator._metadata_request.score)
except AttributeError:
try:
requests.score = copy.deepcopy(estimator._get_default_requests().score)
except AttributeError:
pass

self._metadata_request = requests

def __call__(self, estimator, *args, **kwargs):
"""Method that wraps estimator.score"""
return estimator.score(*args, **kwargs)
Expand All @@ -525,32 +514,7 @@ def get_metadata_routing(self):
A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
routing information.
"""
return get_routing_for_object(self._metadata_request)

def set_score_request(self, **kwargs):
"""Set requested parameters by the scorer.

Please see :ref:`User Guide <metadata_routing>` on how the routing
mechanism works.

.. versionadded:: 1.5

Parameters
----------
kwargs : dict
Arguments should be of the form ``param_name=alias``, and `alias`
can be one of ``{True, False, None, str}``.
"""
if not _routing_enabled():
raise RuntimeError(
"This method is only available when metadata routing is enabled."
" You can enable it using"
" sklearn.set_config(enable_metadata_routing=True)."
)

for param, alias in kwargs.items():
self._metadata_request.score.add_request(param=param, alias=alias)
return self
return get_routing_for_object(self._estimator)


def _check_multimetric_scoring(estimator, scoring):
Expand Down
57 changes: 35 additions & 22 deletions sklearn/metrics/tests/test_score_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import LinearSVC
from sklearn.tests.metadata_routing_common import (
assert_request_is_empty,
Expand Down Expand Up @@ -1301,37 +1301,27 @@ def test_metadata_kwarg_conflict():

@config_context(enable_metadata_routing=True)
def test_PassthroughScorer_set_score_request():
"""Test that _PassthroughScorer.set_score_request adds the correct metadata request
on itself and doesn't change its estimator's routing."""
"""Test that _PassthroughScorer.set_score_request raises when routing enabled."""
est = LogisticRegression().set_score_request(sample_weight="estimator_weights")
# make a `_PassthroughScorer` with `check_scoring`:
scorer = check_scoring(est, None)
assert (
scorer.get_metadata_routing().score.requests["sample_weight"]
== "estimator_weights"
)

scorer.set_score_request(sample_weight="scorer_weights")
assert (
scorer.get_metadata_routing().score.requests["sample_weight"]
== "scorer_weights"
)

# making sure changing the passthrough object doesn't affect the estimator.
assert (
est.get_metadata_routing().score.requests["sample_weight"]
== "estimator_weights"
)
with pytest.raises(
AttributeError,
match="'_PassthroughScorer' object has no attribute 'set_score_request'",
):
scorer.set_score_request(sample_weight=True)


def test_PassthroughScorer_set_score_request_raises_without_routing_enabled():
"""Test that _PassthroughScorer.set_score_request raises if metadata routing is
disabled."""
scorer = check_scoring(LogisticRegression(), None)
msg = "This method is only available when metadata routing is enabled."

with pytest.raises(RuntimeError, match=msg):
scorer.set_score_request(sample_weight="my_weights")
with pytest.raises(
AttributeError,
match="'_PassthroughScorer' object has no attribute 'set_score_request'",
):
scorer.set_score_request(sample_weight=True)


@config_context(enable_metadata_routing=True)
Expand Down Expand Up @@ -1673,3 +1663,26 @@ def test_make_scorer_reponse_method_default_warning():
with warnings.catch_warnings():
warnings.simplefilter("error", FutureWarning)
make_scorer(accuracy_score)


@config_context(enable_metadata_routing=True)
def test_Pipeline_in_PassthroughScorer():
"""Non-regression test for
https://github.com/scikit-learn/scikit-learn/issues/30937

Make sure pipeline inside a gridsearchcv works with sample_weight passed!
"""
X, y = make_classification(10, 4)
sample_weight = np.ones_like(y)
pipe = Pipeline(
[
(
"logistic",
LogisticRegression()
.set_fit_request(sample_weight=True)
.set_score_request(sample_weight=True),
)
]
)
search = GridSearchCV(pipe, {"logistic__C": [0.1, 1]}, n_jobs=1, cv=3)
search.fit(X, y, sample_weight=sample_weight)
Loading