From 53255f1e3c018be910d521d0c89efd01d80dca35 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 23 Feb 2024 11:15:38 +0100 Subject: [PATCH 01/18] more informative error message for UnsetMetadataPassedError --- .../test_metaestimators_metadata_routing.py | 2 +- sklearn/utils/_metadata_requests.py | 73 +++++++++++++++---- 2 files changed, 59 insertions(+), 16 deletions(-) diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py index 08a7e0ef9952a..9196d7a78c910 100644 --- a/sklearn/tests/test_metaestimators_metadata_routing.py +++ b/sklearn/tests/test_metaestimators_metadata_routing.py @@ -507,7 +507,7 @@ def test_error_on_missing_requests_for_sub_estimator(metaestimator): instance = cls(**kwargs) msg = ( f"[{key}] are passed but are not explicitly set as requested or not" - f" for {estimator.__class__.__name__}.{method_name}" + f" requested for {estimator.__class__.__name__}.{method_name}" ) with pytest.raises(UnsetMetadataPassedError, match=re.escape(msg)): method = getattr(instance, method_name) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 83cdf7790c7cd..ba1724bc62cc9 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -403,7 +403,7 @@ def _check_warnings(self, *, params): "warning, or to True to consume and use the metadata." ) - def _route_params(self, params): + def _route_params(self, params, parent, caller_method): """Prepare the given parameters to be passed to the method. The output of this method can be used directly as the input to the @@ -414,6 +414,12 @@ def _route_params(self, params): params : dict A dictionary of provided metadata. + parent : object + Parent class object, that routes the metadata. + + caller_method : str + Method from the parent class object, where the metadata is routed from. + Returns ------- params : Bunch @@ -434,12 +440,20 @@ def _route_params(self, params): elif alias in args: res[prop] = args[alias] if unrequested: + if self.method in COMPOSITE_METHODS: + callee_method = COMPOSITE_METHODS[self.method][0] + else: + callee_method = self.method + message = ( + f"[{', '.join([key for key in unrequested])}] are passed but are" + " not explicitly set as requested or not requested for" + f" {self.owner}.{self.method}, which is used within" + f" {parent.__class__.__name__}.{caller_method}." + f" Call `{self.owner}.set_{callee_method}_request(" + "{metadata}=True)` for each metadata." + ) raise UnsetMetadataPassedError( - message=( - f"[{', '.join([key for key in unrequested])}] are passed but are" - " not explicitly set as requested or not for" - f" {self.owner}.{self.method}" - ), + message=message, unrequested_params=unrequested, routed_params=res, ) @@ -591,7 +605,7 @@ def _get_param_names(self, method, return_alias, ignore_self_request=None): """ return getattr(self, method)._get_param_names(return_alias=return_alias) - def _route_params(self, *, method, params): + def _route_params(self, *, method, caller_method, parent, params): """Prepare the given parameters to be passed to the method. The output of this method can be used directly as the input to the @@ -603,6 +617,12 @@ def _route_params(self, *, method, params): The name of the method for which the parameters are requested and routed. + caller_method : str + Method from the parent class object, where the metadata is routed from. + + parent : object + Parent class object, that routes the metadata. + params : dict A dictionary of provided metadata. @@ -612,7 +632,9 @@ def _route_params(self, *, method, params): A :class:`~sklearn.utils.Bunch` of {prop: value} which can be given to the corresponding method. """ - return getattr(self, method)._route_params(params=params) + return getattr(self, method)._route_params( + params=params, parent=parent, caller_method=caller_method + ) def _check_warnings(self, *, method, params): """Check whether metadata is passed which is marked as WARN. @@ -938,7 +960,7 @@ def _get_param_names(self, *, method, return_alias, ignore_self_request): ) return res - def _route_params(self, *, params, method): + def _route_params(self, *, params, method, caller_method, parent): """Prepare the given parameters to be passed to the method. This is used when a router is used as a child object of another router. @@ -950,12 +972,18 @@ def _route_params(self, *, params, method): Parameters ---------- + params : dict + A dictionary of provided metadata. + method : str The name of the method for which the parameters are requested and routed. - params : dict - A dictionary of provided metadata. + caller_method : str + Method from the parent class object, where the metadata is routed from. + + parent : object + Parent class object, that routes the metadata. Returns ------- @@ -965,7 +993,14 @@ def _route_params(self, *, params, method): """ res = Bunch() if self._self_request: - res.update(self._self_request._route_params(params=params, method=method)) + res.update( + self._self_request._route_params( + params=params, + method=method, + caller_method=caller_method, + parent=parent, + ) + ) param_names = self._get_param_names( method=method, return_alias=True, ignore_self_request=True @@ -987,7 +1022,7 @@ def _route_params(self, *, params, method): res.update(child_params) return res - def route_params(self, *, caller, params): + def route_params(self, *, caller, params, parent): """Return the input parameters requested by child objects. The output of this method is a bunch, which includes the inputs for all @@ -1007,6 +1042,9 @@ def route_params(self, *, caller, params): params : dict A dictionary of provided metadata. + parent : object + Parent class object, that routes the metadata. + Returns ------- params : Bunch @@ -1026,7 +1064,10 @@ def route_params(self, *, caller, params): for _callee, _caller in mapping: if _caller == caller: res[name][_callee] = router._route_params( - params=params, method=_callee + params=params, + caller_method=caller, + method=_callee, + parent=parent, ) return res @@ -1556,6 +1597,8 @@ def __getattr__(self, name): request_routing = get_routing_for_object(_obj) request_routing.validate_metadata(params=kwargs, method=_method) - routed_params = request_routing.route_params(params=kwargs, caller=_method) + routed_params = request_routing.route_params( + params=kwargs, caller=_method, parent=_obj + ) return routed_params From 693457933759100abcfb29b5995eb1491eeaa19b Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 23 Feb 2024 12:08:06 +0100 Subject: [PATCH 02/18] handle test errors --- sklearn/compose/tests/test_column_transformer.py | 2 +- sklearn/ensemble/tests/test_voting.py | 2 +- sklearn/utils/_metadata_requests.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index ec22ddf2f3ae0..56c4cd459aab5 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -2545,7 +2545,7 @@ def test_metadata_routing_error_for_column_transformer(method): error_message = ( "[sample_weight, metadata] are passed but are not explicitly set as requested" - f" or not for ConsumingTransformer.{method}" + f" or not requested for ConsumingTransformer.{method}" ) with pytest.raises(ValueError, match=re.escape(error_message)): if method == "transform": diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py index 2f4c412bd6466..4b2c365752b72 100644 --- a/sklearn/ensemble/tests/test_voting.py +++ b/sklearn/ensemble/tests/test_voting.py @@ -777,7 +777,7 @@ def test_metadata_routing_error_for_voting_estimators(Estimator, Child): error_message = ( "[sample_weight, metadata] are passed but are not explicitly set as requested" - f" or not for {Child.__name__}.fit" + f" or not requested for {Child.__name__}.fit" ) with pytest.raises(ValueError, match=re.escape(error_message)): diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index ba1724bc62cc9..6274b84e6a969 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -1022,7 +1022,7 @@ def _route_params(self, *, params, method, caller_method, parent): res.update(child_params) return res - def route_params(self, *, caller, params, parent): + def route_params(self, *, caller, params, parent=None): """Return the input parameters requested by child objects. The output of this method is a bunch, which includes the inputs for all From bef6606a56ab992d059c569e38278d16ed937e6c Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 23 Feb 2024 12:44:27 +0100 Subject: [PATCH 03/18] satisfied test --- sklearn/tests/test_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 76ed9a7433d0f..f2aa73ebdce0e 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1889,7 +1889,7 @@ def test_metadata_routing_error_for_pipeline(method): pipeline = Pipeline([("estimator", est)]) error_message = ( "[sample_weight, prop] are passed but are not explicitly set as requested" - f" or not for SimpleEstimator.{method}" + f" or not requested for SimpleEstimator.{method}" ) with pytest.raises(ValueError, match=re.escape(error_message)): try: From 7c567f78ac2a986fbdf630ca21bbf0ed860bb147 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 23 Feb 2024 13:21:45 +0100 Subject: [PATCH 04/18] handleing test errors --- doc/metadata_routing.rst | 4 ++-- sklearn/tests/test_metadata_routing.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index 4174f95e65ba0..109f82d197ff8 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -245,8 +245,8 @@ should be passed to the estimator's scorer or not:: ... ).fit(X, y, sample_weight=my_weights) ... except ValueError as e: ... print(e) - [sample_weight] are passed but are not explicitly set as requested or not for - LogisticRegression.score + [sample_weight] are passed but are not explicitly set as requested or not + requested for LogisticRegression.score The issue can be fixed by explicitly setting the request value:: diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index cf323d547e4d3..abe9f03610299 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -295,7 +295,7 @@ def test_simple_metadata_routing(): clf = WeightedMetaClassifier(estimator=ConsumingClassifier()) err_message = ( "[sample_weight] are passed but are not explicitly set as requested or" - " not for ConsumingClassifier.fit" + " not requested for ConsumingClassifier.fit" ) with pytest.raises(ValueError, match=re.escape(err_message)): clf.fit(X, y, sample_weight=my_weights) From 497ec93af20592863aa08cdfdbf32561e4e2ade8 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 23 Feb 2024 14:16:54 +0100 Subject: [PATCH 05/18] disable routing like in #27357 --- examples/miscellaneous/plot_metadata_routing.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/miscellaneous/plot_metadata_routing.py b/examples/miscellaneous/plot_metadata_routing.py index 9984bb6183348..d116b3544c73b 100644 --- a/examples/miscellaneous/plot_metadata_routing.py +++ b/examples/miscellaneous/plot_metadata_routing.py @@ -619,6 +619,11 @@ def predict(self, X): for w in record: print(w.message) +# %% +# In the end, we disable the configuration flag for metadata routing: + +set_config(enable_metadata_routing=False) + # %% # Third Party Development and scikit-learn Dependency # --------------------------------------------------- From fcb4325b516b432d12fc57e4c401762cfdc46026 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 23 Feb 2024 14:51:40 +0100 Subject: [PATCH 06/18] fix doctest failure --- doc/metadata_routing.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index 109f82d197ff8..bc9362de58014 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -246,7 +246,8 @@ should be passed to the estimator's scorer or not:: ... except ValueError as e: ... print(e) [sample_weight] are passed but are not explicitly set as requested or not - requested for LogisticRegression.score + requested for LogisticRegression.score, which is used within GridSearchCV.fit. + Call `LogisticRegression.set_score_request({metadata}=True)` for each metadata. The issue can be fixed by explicitly setting the request value:: From ea44b7e089995c377252eca5a721b95e6cb14ce2 Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Fri, 23 Feb 2024 16:36:29 +0100 Subject: [PATCH 07/18] Update doc/metadata_routing.rst Co-authored-by: Adrin Jalali --- doc/metadata_routing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index bc9362de58014..1533b1381aefd 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -247,7 +247,7 @@ should be passed to the estimator's scorer or not:: ... print(e) [sample_weight] are passed but are not explicitly set as requested or not requested for LogisticRegression.score, which is used within GridSearchCV.fit. - Call `LogisticRegression.set_score_request({metadata}=True)` for each metadata. + Call `LogisticRegression.set_score_request(sample_weight=True)` for each metadata. The issue can be fixed by explicitly setting the request value:: From 2c1e8d1aa416f16001cc44393e0f9320eee7e4fa Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 23 Feb 2024 17:09:04 +0100 Subject: [PATCH 08/18] added test --- doc/metadata_routing.rst | 2 +- sklearn/tests/test_metadata_routing.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index 1533b1381aefd..bc9362de58014 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -247,7 +247,7 @@ should be passed to the estimator's scorer or not:: ... print(e) [sample_weight] are passed but are not explicitly set as requested or not requested for LogisticRegression.score, which is used within GridSearchCV.fit. - Call `LogisticRegression.set_score_request(sample_weight=True)` for each metadata. + Call `LogisticRegression.set_score_request({metadata}=True)` for each metadata. The issue can be fixed by explicitly setting the request value:: diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index abe9f03610299..8323c95e4f1bc 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -15,7 +15,10 @@ BaseEstimator, clone, ) +from sklearn.ensemble import VotingClassifier +from sklearn.exceptions import UnsetMetadataPassedError from sklearn.linear_model import LinearRegression +from sklearn.pipeline import Pipeline from sklearn.tests.metadata_routing_common import ( ConsumingClassifier, ConsumingRegressor, @@ -1029,3 +1032,18 @@ def fit(self, X, y, metadata=None): NotImplementedError, match="Estimator has not implemented metadata routing yet." ): MetaRegressor(estimator=Estimator()).fit(X, y, metadata=my_groups) + + +def test_unsetmetadatapassederror_correct(): + """Test that UnsetMetadataPassedError raises the correct error message when + set_{method}_request is not set in nested cases.""" + voting = VotingClassifier(estimators=[("classifier", ConsumingClassifier())]) + pipe = Pipeline([("voting", voting)]) + msg = re.escape( + "[metadata] are passed but are not explicitly set as requested or not requested" + " for ConsumingClassifier.fit, which is used within VotingClassifier.fit. Call" + " `ConsumingClassifier.set_fit_request({metadata}=True)` for each metadata." + ) + + with pytest.raises(UnsetMetadataPassedError, match=msg): + pipe.fit(X, y, metadata="blah") From 8ecb239f4f08b29686c7dbe795d28bdaf95334af Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 1 Mar 2024 14:11:00 +0100 Subject: [PATCH 09/18] changes after review --- sklearn/tests/test_metadata_routing.py | 19 +++++++----- sklearn/utils/_metadata_requests.py | 43 +++++++++++++------------- 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index 8323c95e4f1bc..5431087c19d5f 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -15,10 +15,8 @@ BaseEstimator, clone, ) -from sklearn.ensemble import VotingClassifier from sklearn.exceptions import UnsetMetadataPassedError from sklearn.linear_model import LinearRegression -from sklearn.pipeline import Pipeline from sklearn.tests.metadata_routing_common import ( ConsumingClassifier, ConsumingRegressor, @@ -71,7 +69,13 @@ def enable_slep006(): class SimplePipeline(BaseEstimator): - """A very simple pipeline, assuming the last step is always a predictor.""" + """A very simple pipeline, assuming the last step is always a predictor. + + Parameters + ---------- + steps : iterable of objects + An iterable of transformers with the last step being a predictor. + """ def __init__(self, steps): self.steps = steps @@ -1037,12 +1041,13 @@ def fit(self, X, y, metadata=None): def test_unsetmetadatapassederror_correct(): """Test that UnsetMetadataPassedError raises the correct error message when set_{method}_request is not set in nested cases.""" - voting = VotingClassifier(estimators=[("classifier", ConsumingClassifier())]) - pipe = Pipeline([("voting", voting)]) + weighted_meta = WeightedMetaClassifier(estimator=ConsumingClassifier()) + pipe = SimplePipeline([weighted_meta]) msg = re.escape( "[metadata] are passed but are not explicitly set as requested or not requested" - " for ConsumingClassifier.fit, which is used within VotingClassifier.fit. Call" - " `ConsumingClassifier.set_fit_request({metadata}=True)` for each metadata." + " for ConsumingClassifier.fit, which is used within WeightedMetaClassifier.fit." + " Call `ConsumingClassifier.set_fit_request({metadata}=True/False)` for each" + " metadata you want to request/ignore." ) with pytest.raises(UnsetMetadataPassedError, match=msg): diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 6274b84e6a969..2ff9a8c3449ae 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -403,7 +403,7 @@ def _check_warnings(self, *, params): "warning, or to True to consume and use the metadata." ) - def _route_params(self, params, parent, caller_method): + def _route_params(self, params, parent, caller): """Prepare the given parameters to be passed to the method. The output of this method can be used directly as the input to the @@ -417,7 +417,7 @@ def _route_params(self, params, parent, caller_method): parent : object Parent class object, that routes the metadata. - caller_method : str + caller : str Method from the parent class object, where the metadata is routed from. Returns @@ -445,12 +445,12 @@ def _route_params(self, params, parent, caller_method): else: callee_method = self.method message = ( - f"[{', '.join([key for key in unrequested])}] are passed but are" - " not explicitly set as requested or not requested for" + f"[{', '.join([key for key in unrequested])}] are passed but are not" + " explicitly set as requested or not requested for" f" {self.owner}.{self.method}, which is used within" - f" {parent.__class__.__name__}.{caller_method}." - f" Call `{self.owner}.set_{callee_method}_request(" - "{metadata}=True)` for each metadata." + f" {parent.__class__.__name__}.{caller}. Call" + f" `{self.owner}.set_{callee_method}_request({{metadata}}=True/False)`" + " for each metadata you want to request/ignore." ) raise UnsetMetadataPassedError( message=message, @@ -605,7 +605,7 @@ def _get_param_names(self, method, return_alias, ignore_self_request=None): """ return getattr(self, method)._get_param_names(return_alias=return_alias) - def _route_params(self, *, method, caller_method, parent, params): + def _route_params(self, *, method, parent, caller, params): """Prepare the given parameters to be passed to the method. The output of this method can be used directly as the input to the @@ -617,12 +617,12 @@ def _route_params(self, *, method, caller_method, parent, params): The name of the method for which the parameters are requested and routed. - caller_method : str - Method from the parent class object, where the metadata is routed from. - parent : object Parent class object, that routes the metadata. + caller : str + Method from the parent class object, where the metadata is routed from. + params : dict A dictionary of provided metadata. @@ -633,7 +633,7 @@ def _route_params(self, *, method, caller_method, parent, params): corresponding method. """ return getattr(self, method)._route_params( - params=params, parent=parent, caller_method=caller_method + params=params, parent=parent, caller=caller ) def _check_warnings(self, *, method, params): @@ -960,7 +960,7 @@ def _get_param_names(self, *, method, return_alias, ignore_self_request): ) return res - def _route_params(self, *, params, method, caller_method, parent): + def _route_params(self, *, params, method, parent, caller): """Prepare the given parameters to be passed to the method. This is used when a router is used as a child object of another router. @@ -979,12 +979,12 @@ def _route_params(self, *, params, method, caller_method, parent): The name of the method for which the parameters are requested and routed. - caller_method : str - Method from the parent class object, where the metadata is routed from. - parent : object Parent class object, that routes the metadata. + caller : str + Method from the parent class object, where the metadata is routed from. + Returns ------- params : Bunch @@ -996,9 +996,9 @@ def _route_params(self, *, params, method, caller_method, parent): res.update( self._self_request._route_params( params=params, - method=method, - caller_method=caller_method, parent=parent, + caller=caller, + method=method, ) ) @@ -1022,7 +1022,8 @@ def _route_params(self, *, params, method, caller_method, parent): res.update(child_params) return res - def route_params(self, *, caller, params, parent=None): + # def route_params(self, *, caller, params, parent=None): + def route_params(self, *, caller, params, parent): """Return the input parameters requested by child objects. The output of this method is a bunch, which includes the inputs for all @@ -1065,9 +1066,9 @@ def route_params(self, *, caller, params, parent=None): if _caller == caller: res[name][_callee] = router._route_params( params=params, - caller_method=caller, - method=_callee, parent=parent, + caller=caller, + method=_callee, ) return res From 2073a270b1910517645626c7f72085aa375d990b Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 1 Mar 2024 16:15:10 +0100 Subject: [PATCH 10/18] correct error message for composite methods --- sklearn/tests/test_metadata_routing.py | 19 +++++++++++++++++++ sklearn/utils/_metadata_requests.py | 16 +++++++++++----- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index 5431087c19d5f..9e8ee47c35782 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -17,6 +17,7 @@ ) from sklearn.exceptions import UnsetMetadataPassedError from sklearn.linear_model import LinearRegression +from sklearn.pipeline import Pipeline from sklearn.tests.metadata_routing_common import ( ConsumingClassifier, ConsumingRegressor, @@ -1052,3 +1053,21 @@ def test_unsetmetadatapassederror_correct(): with pytest.raises(UnsetMetadataPassedError, match=msg): pipe.fit(X, y, metadata="blah") + + +def test_unsetmetadatapassederror_correct_for_composite_methods(): + """Test that UnsetMetadataPassedError raises the correct error message when + composite metadata request methods are not set in nested cases.""" + consuming_transformer = ConsumingTransformer() + pipe = Pipeline([("consuming_transformer", consuming_transformer)]) + + msg = re.escape( + "[metadata] are passed but are not explicitly set as requested or not requested" + " for ConsumingTransformer.fit_transform, which is used within" + " Pipeline.fit_transform. Call" + " `ConsumingTransformer.set_fit_request({metadata}=True/False)" + ".set_transform_request({metadata}=True/False)`" + " for each metadata you want to request/ignore." + ) + with pytest.raises(UnsetMetadataPassedError, match=msg): + pipe.fit_transform(X, y, metadata="blah") diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 2ff9a8c3449ae..cf80df91a58e5 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -441,16 +441,22 @@ def _route_params(self, params, parent, caller): res[prop] = args[alias] if unrequested: if self.method in COMPOSITE_METHODS: - callee_method = COMPOSITE_METHODS[self.method][0] + callee_methods = list(COMPOSITE_METHODS[self.method]) else: - callee_method = self.method + callee_methods = [self.method] + set_requests_on = "".join( + [ + f".set_{method}_request({{metadata}}=True/False)" + for method in callee_methods + ] + ) message = ( f"[{', '.join([key for key in unrequested])}] are passed but are not" " explicitly set as requested or not requested for" f" {self.owner}.{self.method}, which is used within" - f" {parent.__class__.__name__}.{caller}. Call" - f" `{self.owner}.set_{callee_method}_request({{metadata}}=True/False)`" - " for each metadata you want to request/ignore." + f" {parent.__class__.__name__}.{caller}. Call `{self.owner}" + + set_requests_on + + "` for each metadata you want to request/ignore." ) raise UnsetMetadataPassedError( message=message, From 0ea9e4d35da45b9325411eb7193e61e26467e662 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 1 Mar 2024 17:17:20 +0100 Subject: [PATCH 11/18] adjust for parent param without default value --- examples/miscellaneous/plot_metadata_routing.py | 14 ++++++++++---- sklearn/metrics/tests/test_score_objects.py | 8 ++++++-- sklearn/utils/_metadata_requests.py | 5 +++-- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/examples/miscellaneous/plot_metadata_routing.py b/examples/miscellaneous/plot_metadata_routing.py index d116b3544c73b..bc82db7acca7f 100644 --- a/examples/miscellaneous/plot_metadata_routing.py +++ b/examples/miscellaneous/plot_metadata_routing.py @@ -169,7 +169,9 @@ def fit(self, X, y, **fit_params): # we can use provided utility methods to map the given metadata to what # is required by the underlying estimator. Here `method` refers to the # parent's method, i.e. `fit` in this example. - routed_params = request_router.route_params(params=fit_params, caller="fit") + routed_params = request_router.route_params( + params=fit_params, caller="fit", parent=self.__class__ + ) # the output has a key for each object's method which is used here, # i.e. parent's `fit` method, containing the metadata which should be @@ -186,7 +188,7 @@ def predict(self, X, **predict_params): request_router.validate_metadata(params=predict_params, method="predict") # and then prepare the input to the underlying `predict` method. routed_params = request_router.route_params( - params=predict_params, caller="predict" + params=predict_params, caller="predict", parent=self.__class__ ) return self.estimator_.predict(X, **routed_params.estimator.predict) @@ -342,7 +344,9 @@ def fit(self, X, y, sample_weight, **fit_params): request_router.validate_metadata(params=fit_params, method="fit") # we can use provided utility methods to map the given metadata to what # is required by the underlying estimator - params = request_router.route_params(params=fit_params, caller="fit") + params = request_router.route_params( + params=fit_params, caller="fit", parent=self.__class__ + ) self.estimator_ = clone(self.estimator).fit(X, y, **params.estimator.fit) self.classes_ = self.estimator_.classes_ return self @@ -353,7 +357,9 @@ def predict(self, X, **predict_params): request_router = get_routing_for_object(self) request_router.validate_metadata(params=predict_params, method="predict") # and then prepare the input to the underlying ``predict`` method. - params = request_router.route_params(params=predict_params, caller="predict") + params = request_router.route_params( + params=predict_params, caller="predict", parent=self.__class__ + ) return self.estimator_.predict(X, **params.estimator.predict) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index c721922f1b2d5..0f52becc9c70e 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1239,7 +1239,9 @@ def test_scorer_metadata_request(name): with pytest.raises(TypeError, match="got unexpected argument"): router.validate_metadata(params={"sample_weight": 1}, method="score") # make sure `sample_weight` is not routed even if passed. - routed_params = router.route_params(params={"sample_weight": 1}, caller="score") + routed_params = router.route_params( + params={"sample_weight": 1}, caller="score", parent=name + ) assert not routed_params.scorer.score # make sure putting weighted_scorer in a router requests sample_weight @@ -1247,7 +1249,9 @@ def test_scorer_metadata_request(name): scorer=weighted_scorer, method_mapping="score" ) router.validate_metadata(params={"sample_weight": 1}, method="score") - routed_params = router.route_params(params={"sample_weight": 1}, caller="score") + routed_params = router.route_params( + params={"sample_weight": 1}, caller="score", parent=name + ) assert list(routed_params.scorer.score.keys()) == ["sample_weight"] diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index cf80df91a58e5..66a99c7a03b6f 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -440,6 +440,8 @@ def _route_params(self, params, parent, caller): elif alias in args: res[prop] = args[alias] if unrequested: + if parent.__class__.__name__ != "str": + parent = parent.__class__.__name__ if self.method in COMPOSITE_METHODS: callee_methods = list(COMPOSITE_METHODS[self.method]) else: @@ -454,7 +456,7 @@ def _route_params(self, params, parent, caller): f"[{', '.join([key for key in unrequested])}] are passed but are not" " explicitly set as requested or not requested for" f" {self.owner}.{self.method}, which is used within" - f" {parent.__class__.__name__}.{caller}. Call `{self.owner}" + f" {parent}.{caller}. Call `{self.owner}" + set_requests_on + "` for each metadata you want to request/ignore." ) @@ -1028,7 +1030,6 @@ def _route_params(self, *, params, method, parent, caller): res.update(child_params) return res - # def route_params(self, *, caller, params, parent=None): def route_params(self, *, caller, params, parent): """Return the input parameters requested by child objects. From 5f7d734381f76b26690fd668a46eacb8ef83de04 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Tue, 5 Mar 2024 09:17:50 +0100 Subject: [PATCH 12/18] fix expected error message --- doc/metadata_routing.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index dbc4a5d147118..f171d6c0b6317 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -247,7 +247,8 @@ should be passed to the estimator's scorer or not:: ... print(e) [sample_weight] are passed but are not explicitly set as requested or not requested for LogisticRegression.score, which is used within GridSearchCV.fit. - Call `LogisticRegression.set_score_request({metadata}=True)` for each metadata. + Call `LogisticRegression.set_score_request({metadata}=True/False)` for each metadata + you want to request/ignore. The issue can be fixed by explicitly setting the request value:: From f7bff85cb54804127e0ad97d257b9f4ded8ed72a Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Tue, 5 Mar 2024 13:06:23 +0100 Subject: [PATCH 13/18] clearer error message for TypeError in validate_metadata --- sklearn/utils/_metadata_requests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 7775d25c8a4ac..e8b0bc4605654 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -1108,7 +1108,7 @@ def validate_metadata(self, *, method, params): if extra_keys: raise TypeError( f"{self.owner}.{method} got unexpected argument(s) {extra_keys}, which" - " are not requested metadata in any object." + " are not routed to any object." ) def _serialize(self): From 24483d2eec034012defbf7599905adfc26b24d76 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Thu, 7 Mar 2024 15:39:53 +0100 Subject: [PATCH 14/18] removed wrapping in list --- sklearn/utils/_metadata_requests.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index e8b0bc4605654..2cd4833192d4d 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -440,10 +440,12 @@ def _route_params(self, params, parent, caller): elif alias in args: res[prop] = args[alias] if unrequested: + # we want to get the parent's class name, except for cases, when the router + # is a function if parent.__class__.__name__ != "str": parent = parent.__class__.__name__ if self.method in COMPOSITE_METHODS: - callee_methods = list(COMPOSITE_METHODS[self.method]) + callee_methods = COMPOSITE_METHODS[self.method] else: callee_methods = [self.method] set_requests_on = "".join( From cfec1003daa968ba94f94e592230dc09bd87c3a2 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Tue, 12 Mar 2024 16:26:58 +0100 Subject: [PATCH 15/18] move parent param one function down, get string directly by MetadataRouter.owner --- examples/miscellaneous/plot_metadata_routing.py | 14 ++++---------- sklearn/metrics/tests/test_score_objects.py | 8 ++------ sklearn/utils/_metadata_requests.py | 15 +++------------ 3 files changed, 9 insertions(+), 28 deletions(-) diff --git a/examples/miscellaneous/plot_metadata_routing.py b/examples/miscellaneous/plot_metadata_routing.py index bc82db7acca7f..d116b3544c73b 100644 --- a/examples/miscellaneous/plot_metadata_routing.py +++ b/examples/miscellaneous/plot_metadata_routing.py @@ -169,9 +169,7 @@ def fit(self, X, y, **fit_params): # we can use provided utility methods to map the given metadata to what # is required by the underlying estimator. Here `method` refers to the # parent's method, i.e. `fit` in this example. - routed_params = request_router.route_params( - params=fit_params, caller="fit", parent=self.__class__ - ) + routed_params = request_router.route_params(params=fit_params, caller="fit") # the output has a key for each object's method which is used here, # i.e. parent's `fit` method, containing the metadata which should be @@ -188,7 +186,7 @@ def predict(self, X, **predict_params): request_router.validate_metadata(params=predict_params, method="predict") # and then prepare the input to the underlying `predict` method. routed_params = request_router.route_params( - params=predict_params, caller="predict", parent=self.__class__ + params=predict_params, caller="predict" ) return self.estimator_.predict(X, **routed_params.estimator.predict) @@ -344,9 +342,7 @@ def fit(self, X, y, sample_weight, **fit_params): request_router.validate_metadata(params=fit_params, method="fit") # we can use provided utility methods to map the given metadata to what # is required by the underlying estimator - params = request_router.route_params( - params=fit_params, caller="fit", parent=self.__class__ - ) + params = request_router.route_params(params=fit_params, caller="fit") self.estimator_ = clone(self.estimator).fit(X, y, **params.estimator.fit) self.classes_ = self.estimator_.classes_ return self @@ -357,9 +353,7 @@ def predict(self, X, **predict_params): request_router = get_routing_for_object(self) request_router.validate_metadata(params=predict_params, method="predict") # and then prepare the input to the underlying ``predict`` method. - params = request_router.route_params( - params=predict_params, caller="predict", parent=self.__class__ - ) + params = request_router.route_params(params=predict_params, caller="predict") return self.estimator_.predict(X, **params.estimator.predict) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 0f52becc9c70e..c721922f1b2d5 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1239,9 +1239,7 @@ def test_scorer_metadata_request(name): with pytest.raises(TypeError, match="got unexpected argument"): router.validate_metadata(params={"sample_weight": 1}, method="score") # make sure `sample_weight` is not routed even if passed. - routed_params = router.route_params( - params={"sample_weight": 1}, caller="score", parent=name - ) + routed_params = router.route_params(params={"sample_weight": 1}, caller="score") assert not routed_params.scorer.score # make sure putting weighted_scorer in a router requests sample_weight @@ -1249,9 +1247,7 @@ def test_scorer_metadata_request(name): scorer=weighted_scorer, method_mapping="score" ) router.validate_metadata(params={"sample_weight": 1}, method="score") - routed_params = router.route_params( - params={"sample_weight": 1}, caller="score", parent=name - ) + routed_params = router.route_params(params={"sample_weight": 1}, caller="score") assert list(routed_params.scorer.score.keys()) == ["sample_weight"] diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 2cd4833192d4d..c83d040955d76 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -440,10 +440,6 @@ def _route_params(self, params, parent, caller): elif alias in args: res[prop] = args[alias] if unrequested: - # we want to get the parent's class name, except for cases, when the router - # is a function - if parent.__class__.__name__ != "str": - parent = parent.__class__.__name__ if self.method in COMPOSITE_METHODS: callee_methods = COMPOSITE_METHODS[self.method] else: @@ -1032,7 +1028,7 @@ def _route_params(self, *, params, method, parent, caller): res.update(child_params) return res - def route_params(self, *, caller, params, parent): + def route_params(self, *, caller, params): """Return the input parameters requested by child objects. The output of this method is a bunch, which includes the inputs for all @@ -1052,9 +1048,6 @@ def route_params(self, *, caller, params, parent): params : dict A dictionary of provided metadata. - parent : object - Parent class object, that routes the metadata. - Returns ------- params : Bunch @@ -1075,7 +1068,7 @@ def route_params(self, *, caller, params, parent): if _caller == caller: res[name][_callee] = router._route_params( params=params, - parent=parent, + parent=self.owner, caller=caller, method=_callee, ) @@ -1607,8 +1600,6 @@ def __getattr__(self, name): request_routing = get_routing_for_object(_obj) request_routing.validate_metadata(params=kwargs, method=_method) - routed_params = request_routing.route_params( - params=kwargs, caller=_method, parent=_obj - ) + routed_params = request_routing.route_params(params=kwargs, caller=_method) return routed_params From e0eef01987437f6ad4aa9e2c837b75266771d8f6 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Tue, 12 Mar 2024 16:53:51 +0100 Subject: [PATCH 16/18] same signature for methods with same name --- sklearn/utils/_metadata_requests.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index c83d040955d76..9e067fa29195b 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -611,7 +611,7 @@ def _get_param_names(self, method, return_alias, ignore_self_request=None): """ return getattr(self, method)._get_param_names(return_alias=return_alias) - def _route_params(self, *, method, parent, caller, params): + def _route_params(self, *, params, method, parent, caller): """Prepare the given parameters to be passed to the method. The output of this method can be used directly as the input to the @@ -619,6 +619,9 @@ def _route_params(self, *, method, parent, caller, params): Parameters ---------- + params : dict + A dictionary of provided metadata. + method : str The name of the method for which the parameters are requested and routed. @@ -629,9 +632,6 @@ def _route_params(self, *, method, parent, caller, params): caller : str Method from the parent class object, where the metadata is routed from. - params : dict - A dictionary of provided metadata. - Returns ------- params : Bunch @@ -1002,9 +1002,9 @@ def _route_params(self, *, params, method, parent, caller): res.update( self._self_request._route_params( params=params, + method=method, parent=parent, caller=caller, - method=method, ) ) @@ -1068,9 +1068,9 @@ def route_params(self, *, caller, params): if _caller == caller: res[name][_callee] = router._route_params( params=params, + method=_callee, parent=self.owner, caller=caller, - method=_callee, ) return res From f7614e2f872ebf835947bfaf4fcb0127ed7176ed Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 15 Mar 2024 15:28:08 +0100 Subject: [PATCH 17/18] fix test --- sklearn/tests/test_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index a5a5de41cb943..55f520de7c62e 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1975,7 +1975,7 @@ def test_feature_union_metadata_routing_error(): error_message = ( "[sample_weight, metadata] are passed but are not explicitly set as requested" - f" or not for {ConsumingTransformer.__name__}.fit" + f" or not requested for {ConsumingTransformer.__name__}.fit" ) with pytest.raises(UnsetMetadataPassedError, match=re.escape(error_message)): @@ -1995,7 +1995,7 @@ def test_feature_union_metadata_routing_error(): error_message = ( "[sample_weight, metadata] are passed but are not explicitly set as requested " - f"or not for {ConsumingTransformer.__name__}.transform" + f"or not requested for {ConsumingTransformer.__name__}.transform" ) with pytest.raises(UnsetMetadataPassedError, match=re.escape(error_message)): From 96e42a05a572d08521ae5247fe8ae0c1fa0b36f6 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Mon, 8 Apr 2024 13:18:44 +0200 Subject: [PATCH 18/18] satisfy linting --- sklearn/tests/test_metadata_routing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index e094701dc266e..110452870d682 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -1074,6 +1074,7 @@ def test_unsetmetadatapassederror_correct_for_composite_methods(): with pytest.raises(UnsetMetadataPassedError, match=msg): pipe.fit_transform(X, y, metadata="blah") + def test_unbound_set_methods_work(): """Tests that if the set_{method}_request is unbound, it still works.