From 63569a650be6794007ea7bfbcad1883e73a30a12 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 8 Aug 2025 14:34:46 +0200 Subject: [PATCH 1/5] FIX make sure _PassthroughScorer works with meta-estimators --- sklearn/linear_model/_ridge.py | 28 +++++++++--- sklearn/metrics/_scorer.py | 29 +++--------- sklearn/metrics/tests/test_score_objects.py | 50 ++++++++++++--------- 3 files changed, 59 insertions(+), 48 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 07fca7e7ce55a..4fa485c10da34 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -30,7 +30,7 @@ _rescale_data, ) from sklearn.linear_model._sag import sag_solver -from sklearn.metrics import check_scoring, get_scorer_names +from sklearn.metrics import check_scoring, get_scorer, get_scorer_names from sklearn.model_selection import GridSearchCV from sklearn.preprocessing import LabelBinarizer from sklearn.utils import ( @@ -1359,6 +1359,12 @@ def __sklearn_tags__(self): tags.classifier_tags.multi_label = True return tags + def _get_scorer_instance(self): + """Return a scorer which corresponds to what's defined in ClassiferMixin + parent class. This is used for routing `sample_weight`. + """ + return get_scorer("accuracy") + class RidgeClassifier(_RidgeClassifierMixin, _BaseRidge): """Classifier using Ridge regression. @@ -2499,7 +2505,7 @@ def get_metadata_routing(self): MetadataRouter(owner=self.__class__.__name__) .add_self_request(self) .add( - scorer=self.scoring, + scorer=self._get_scorer(), method_mapping=MethodMapping().add(caller="fit", callee="score"), ) .add( @@ -2510,14 +2516,20 @@ def get_metadata_routing(self): return router def _get_scorer(self): - scorer = check_scoring(estimator=self, scoring=self.scoring, allow_none=True) + """Make sure the sorer is weighted if necessary. + + This uses `self._get_scorer_instance()` implemented in child objects to get the + raw scorer instance of the estimator, which will be ignored if `self.scoring` is + not None. + """ if _routing_enabled() and self.scoring is None: # This estimator passes an array of 1s as sample_weight even if # sample_weight is not provided by the user. Therefore we need to # always request it. But we don't set it if it's passed explicitly # by the user. - scorer.set_score_request(sample_weight=True) - return scorer + return self._get_scorer_instance().set_score_request(sample_weight=True) + + return check_scoring(estimator=self, scoring=self.scoring, allow_none=True) def __sklearn_tags__(self): tags = super().__sklearn_tags__() @@ -2707,6 +2719,12 @@ def fit(self, X, y, sample_weight=None, **params): super().fit(X, y, sample_weight=sample_weight, **params) return self + def _get_scorer_instance(self): + """Return a scorer which corresponds to what's defined in RegressorMixin + parent class. This is used for routing `sample_weight`. + """ + return get_scorer("r2") + class RidgeClassifierCV(_RidgeClassifierMixin, _BaseRidgeCV): """Ridge classifier with built-in cross-validation. diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 42745656c1276..394cd11dce166 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -481,17 +481,6 @@ class _PassthroughScorer(_MetadataRequester): def __init__(self, estimator): self._estimator = estimator - requests = MetadataRequest(owner=self.__class__.__name__) - try: - requests.score = copy.deepcopy(estimator._metadata_request.score) - except AttributeError: - try: - requests.score = copy.deepcopy(estimator._get_default_requests().score) - except AttributeError: - pass - - self._metadata_request = requests - def __call__(self, estimator, *args, **kwargs): """Method that wraps estimator.score""" return estimator.score(*args, **kwargs) @@ -517,7 +506,7 @@ def get_metadata_routing(self): A :class:`~utils.metadata_routing.MetadataRouter` encapsulating routing information. """ - return get_routing_for_object(self._metadata_request) + return get_routing_for_object(self._estimator) def set_score_request(self, **kwargs): """Set requested parameters by the scorer. @@ -526,6 +515,8 @@ def set_score_request(self, **kwargs): mechanism works. .. versionadded:: 1.5 + .. versionchanged:: 1.8 + This now raises. Parameters ---------- @@ -533,16 +524,10 @@ def set_score_request(self, **kwargs): Arguments should be of the form ``param_name=alias``, and `alias` can be one of ``{True, False, None, str}``. """ - if not _routing_enabled(): - raise RuntimeError( - "This method is only available when metadata routing is enabled." - " You can enable it using" - " sklearn.set_config(enable_metadata_routing=True)." - ) - - for param, alias in kwargs.items(): - self._metadata_request.score.add_request(param=param, alias=alias) - return self + raise AttributeError( + "This method is not available on _PassthroughScorer. " + "Use the estimator's set_score_request method instead." + ) def _check_multimetric_scoring(estimator, scoring): diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 672ed8ae7eecc..8d19756147b38 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -51,7 +51,7 @@ from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split from sklearn.multiclass import OneVsRestClassifier from sklearn.neighbors import KNeighborsClassifier -from sklearn.pipeline import make_pipeline +from sklearn.pipeline import Pipeline, make_pipeline from sklearn.svm import LinearSVC from sklearn.tests.metadata_routing_common import ( assert_request_is_empty, @@ -1291,27 +1291,12 @@ def test_metadata_kwarg_conflict(): @config_context(enable_metadata_routing=True) def test_PassthroughScorer_set_score_request(): - """Test that _PassthroughScorer.set_score_request adds the correct metadata request - on itself and doesn't change its estimator's routing.""" + """Test that _PassthroughScorer.set_score_request raises when routing enabled.""" est = LogisticRegression().set_score_request(sample_weight="estimator_weights") # make a `_PassthroughScorer` with `check_scoring`: scorer = check_scoring(est, None) - assert ( - scorer.get_metadata_routing().score.requests["sample_weight"] - == "estimator_weights" - ) - - scorer.set_score_request(sample_weight="scorer_weights") - assert ( - scorer.get_metadata_routing().score.requests["sample_weight"] - == "scorer_weights" - ) - - # making sure changing the passthrough object doesn't affect the estimator. - assert ( - est.get_metadata_routing().score.requests["sample_weight"] - == "estimator_weights" - ) + with pytest.raises(AttributeError, match="This method is not available"): + scorer.set_score_request(sample_weight=True) def test_PassthroughScorer_set_score_request_raises_without_routing_enabled(): @@ -1320,8 +1305,8 @@ def test_PassthroughScorer_set_score_request_raises_without_routing_enabled(): scorer = check_scoring(LogisticRegression(), None) msg = "This method is only available when metadata routing is enabled." - with pytest.raises(RuntimeError, match=msg): - scorer.set_score_request(sample_weight="my_weights") + with pytest.raises(AttributeError, match="This method is not available"): + scorer.set_score_request(sample_weight=True) @config_context(enable_metadata_routing=True) @@ -1663,3 +1648,26 @@ def test_make_scorer_reponse_method_default_warning(): with warnings.catch_warnings(): warnings.simplefilter("error", FutureWarning) make_scorer(accuracy_score) + + +@config_context(enable_metadata_routing=True) +def test_Pipeline_in_PassthroughScorer(): + """Non-regression test for + https://github.com/scikit-learn/scikit-learn/issues/30937 + + Make sure pipeline inside a gridsearchcv works with sample_weight passed! + """ + X, y = make_classification(10, 4) + sample_weight = np.ones_like(y) + pipe = Pipeline( + [ + ( + "logistic", + LogisticRegression() + .set_fit_request(sample_weight=True) + .set_score_request(sample_weight=True), + ) + ] + ) + search = GridSearchCV(pipe, {"logistic__C": [0.1, 1]}, n_jobs=1, cv=3) + search.fit(X, y, sample_weight=sample_weight) From d6218f7c28251337089cde721b3d6117411b5fba Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 8 Aug 2025 14:54:20 +0200 Subject: [PATCH 2/5] changelog --- doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst diff --git a/doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst b/doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst new file mode 100644 index 0000000000000..6e21ff753467b --- /dev/null +++ b/doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst @@ -0,0 +1,3 @@ +- Fixed an issue where passing `sample_weight` to a :class:`Pipeline` inside a + :class:`GridSearchCV` would raise an error with metadata routing enabled. + By `Adrin Jalali`_. From 72828838577fbf98661d71c9dca3001a475fbdfa Mon Sep 17 00:00:00 2001 From: Adrin Jalali Date: Tue, 12 Aug 2025 15:57:39 +0200 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> --- doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst | 2 +- sklearn/linear_model/_ridge.py | 2 +- sklearn/metrics/tests/test_score_objects.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst b/doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst index 6e21ff753467b..bb4b71974ca60 100644 --- a/doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst +++ b/doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst @@ -1,3 +1,3 @@ -- Fixed an issue where passing `sample_weight` to a :class:`Pipeline` inside a +- Fixed an issue where passing `sample_weight` to a :class:`Pipeline` inside a :class:`GridSearchCV` would raise an error with metadata routing enabled. By `Adrin Jalali`_. diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 4fa485c10da34..0504c0296e48d 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -2516,7 +2516,7 @@ def get_metadata_routing(self): return router def _get_scorer(self): - """Make sure the sorer is weighted if necessary. + """Make sure the scorer is weighted if necessary. This uses `self._get_scorer_instance()` implemented in child objects to get the raw scorer instance of the estimator, which will be ignored if `self.scoring` is diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 20203a9e6120b..c9815072f6ba9 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1313,7 +1313,6 @@ def test_PassthroughScorer_set_score_request_raises_without_routing_enabled(): """Test that _PassthroughScorer.set_score_request raises if metadata routing is disabled.""" scorer = check_scoring(LogisticRegression(), None) - msg = "This method is only available when metadata routing is enabled." with pytest.raises(AttributeError, match="This method is not available"): scorer.set_score_request(sample_weight=True) From efdb799a4a070cee76b6b75896a261d146db56c4 Mon Sep 17 00:00:00 2001 From: Adrin Jalali Date: Tue, 12 Aug 2025 16:23:51 +0200 Subject: [PATCH 4/5] Update sklearn/metrics/_scorer.py Co-authored-by: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> --- sklearn/metrics/_scorer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 60b0dbb349334..fa01b58faee8a 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -533,7 +533,7 @@ def set_score_request(self, **kwargs): can be one of ``{True, False, None, str}``. """ raise AttributeError( - "This method is not available on _PassthroughScorer. " + "`_PassthroughScorer` object has no attribute `set_score_request`." "Use the estimator's set_score_request method instead." ) From e2debd9c7f5fbfa5b2641bc2e4e1af3d9c600fcf Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 21 Aug 2025 12:59:50 +0200 Subject: [PATCH 5/5] no need to override method --- sklearn/metrics/_scorer.py | 21 --------------------- sklearn/metrics/tests/test_score_objects.py | 10 ++++++++-- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 60b0dbb349334..5f3bbde374143 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -516,27 +516,6 @@ def get_metadata_routing(self): """ return get_routing_for_object(self._estimator) - def set_score_request(self, **kwargs): - """Set requested parameters by the scorer. - - Please see :ref:`User Guide ` on how the routing - mechanism works. - - .. versionadded:: 1.5 - .. versionchanged:: 1.8 - This now raises. - - Parameters - ---------- - kwargs : dict - Arguments should be of the form ``param_name=alias``, and `alias` - can be one of ``{True, False, None, str}``. - """ - raise AttributeError( - "This method is not available on _PassthroughScorer. " - "Use the estimator's set_score_request method instead." - ) - def _check_multimetric_scoring(estimator, scoring): """Check the scoring parameter in cases when multiple metrics are allowed. diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 20203a9e6120b..a31712ee594be 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1305,7 +1305,10 @@ def test_PassthroughScorer_set_score_request(): est = LogisticRegression().set_score_request(sample_weight="estimator_weights") # make a `_PassthroughScorer` with `check_scoring`: scorer = check_scoring(est, None) - with pytest.raises(AttributeError, match="This method is not available"): + with pytest.raises( + AttributeError, + match="'_PassthroughScorer' object has no attribute 'set_score_request'", + ): scorer.set_score_request(sample_weight=True) @@ -1315,7 +1318,10 @@ def test_PassthroughScorer_set_score_request_raises_without_routing_enabled(): scorer = check_scoring(LogisticRegression(), None) msg = "This method is only available when metadata routing is enabled." - with pytest.raises(AttributeError, match="This method is not available"): + with pytest.raises( + AttributeError, + match="'_PassthroughScorer' object has no attribute 'set_score_request'", + ): scorer.set_score_request(sample_weight=True)