From ebdc429ad70696ef794e67beb7794b7e91ecfe90 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 29 Nov 2024 13:33:53 +0100 Subject: [PATCH 1/5] FIX KNeighbor classes correctly set positive_only tag --- sklearn/neighbors/_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py index cdcd8929da6ca..876fb9906b9e2 100644 --- a/sklearn/neighbors/_base.py +++ b/sklearn/neighbors/_base.py @@ -709,6 +709,8 @@ def __sklearn_tags__(self): tags = super().__sklearn_tags__() # For cross-validation routines to split data correctly tags.input_tags.pairwise = self.metric == "precomputed" + # when input is precomputed metric values, all those values need to be positive + tags.input_tags.positive_only = tags.input_tags.pairwise tags.input_tags.allow_nan = self.metric == "nan_euclidean" return tags From 7b709dbe6f437b9bc31a71e23a92e40c5babf6f5 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 29 Nov 2024 16:45:55 +0100 Subject: [PATCH 2/5] add common test --- sklearn/utils/_tags.py | 4 ++++ sklearn/utils/estimator_checks.py | 35 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/sklearn/utils/_tags.py b/sklearn/utils/_tags.py index 1ba1913c37234..7e5c275d683e1 100644 --- a/sklearn/utils/_tags.py +++ b/sklearn/utils/_tags.py @@ -58,6 +58,10 @@ class InputTags: Specifically, this tag is used by `sklearn.utils.metaestimators._safe_split` to slice rows and columns. + + Note that if setting this tag to ``True`` means the estimator can take only + positive values, the `positive_only` tag must reflect it and also be set to + ``True``. """ one_d_array: bool = False diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 6bb6524974a3a..fe840fafa7a81 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -136,6 +136,7 @@ def _yield_api_checks(estimator): yield check_estimator_cloneable yield check_estimator_tags_renamed yield check_valid_tag_types + yield check_positive_only_tag_during_fit yield check_estimator_repr yield check_no_attributes_set_in_init yield check_fit_score_takes_y @@ -3899,6 +3900,40 @@ def _enforce_estimator_tags_X(estimator, X, X_test=None, kernel=linear_kernel): return X_res +@ignore_warnings(category=FutureWarning) +def check_positive_only_tag_during_fit(name, estimator_orig): + """Test that the estimator correctly sets the tags.input_tags.positive_only + + If the tag is False, the estimator should accept negative input regardless of the + tags.input_tags.pairwise flag. + """ + estimator = clone(estimator_orig) + tags = get_tags(estimator) + + iris = load_iris() + X, y = iris.data, iris.target + y = _enforce_estimator_tags_y(estimator, y) + set_random_state(estimator, 0) + X = _enforce_estimator_tags_X(estimator_orig, X) + X -= X.mean() + + if tags.input_tags.positive_only: + with raises(ValueError, match="Negative values in data"): + estimator.fit(X, y) + else: + # This should pass + try: + estimator.fit(X, y) + except Exception as e: + err_msg = ( + f"Estimator {repr(name)} raised {e.__class__.__name__} unexpectedly." + " This happens when passing negative input values as X." + " If negative values are not supported for this estimator instance," + " then the tags.input_tags.positive_only tag needs to be set to True." + ) + raise AssertionError(err_msg) from e + + @ignore_warnings(category=FutureWarning) def check_non_transformer_estimators_n_iter(name, estimator_orig): # Test that estimators that are not transformers with a parameter From 3841ac866f3e78675f0b8370484e2e212a332c0e Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 29 Nov 2024 17:35:31 +0100 Subject: [PATCH 3/5] fix tests and add a test for the check --- sklearn/utils/estimator_checks.py | 2 +- sklearn/utils/tests/test_estimator_checks.py | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index fe840fafa7a81..a628cd91e1f85 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -136,7 +136,6 @@ def _yield_api_checks(estimator): yield check_estimator_cloneable yield check_estimator_tags_renamed yield check_valid_tag_types - yield check_positive_only_tag_during_fit yield check_estimator_repr yield check_no_attributes_set_in_init yield check_fit_score_takes_y @@ -149,6 +148,7 @@ def _yield_api_checks(estimator): yield check_do_not_raise_errors_in_init_or_set_params yield check_n_features_in_after_fitting yield check_mixin_order + yield check_positive_only_tag_during_fit def _yield_checks(estimator): diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index d09b3e7f366ec..7caf05f3d327f 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -85,6 +85,7 @@ check_outlier_contamination, check_outlier_corruption, check_parameters_default_constructible, + check_positive_only_tag_during_fit, check_regressor_data_not_an_array, check_requires_y_none, check_sample_weights_pandas_series, @@ -509,7 +510,7 @@ class RequiresPositiveXRegressor(LinearRegression): def fit(self, X, y): X, y = validate_data(self, X, y, multi_output=True) if (X < 0).any(): - raise ValueError("negative X values not supported!") + raise ValueError("Negative values in data passed to X.") return super().fit(X, y) def __sklearn_tags__(self): @@ -1600,3 +1601,18 @@ def fit(self, X, y=None): msg = "TransformerMixin comes before/left side of BaseEstimator" with raises(AssertionError, match=re.escape(msg)): check_mixin_order("BadEstimator", BadEstimator()) + + +def test_check_positive_only_tag_during_fit(): + class RequiresPositiveXBadTag(RequiresPositiveXRegressor): + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.input_tags.positive_only = False + return tags + + with raises( + AssertionError, match="This happens when passing negative input values as X." + ): + check_positive_only_tag_during_fit( + "RequiresPositiveXBadTag", RequiresPositiveXBadTag() + ) From 6d8574a2390eadd7a52bf26f05930a496f89b65e Mon Sep 17 00:00:00 2001 From: Adrin Jalali Date: Wed, 4 Dec 2024 12:56:40 +0100 Subject: [PATCH 4/5] Update sklearn/utils/estimator_checks.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jérémie du Boisberranger --- sklearn/utils/estimator_checks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 0523fadec2ebd..c3303f573bd8c 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3910,8 +3910,7 @@ def check_positive_only_tag_during_fit(name, estimator_orig): estimator = clone(estimator_orig) tags = get_tags(estimator) - iris = load_iris() - X, y = iris.data, iris.target + X, y = load_iris(return_X_y=True) y = _enforce_estimator_tags_y(estimator, y) set_random_state(estimator, 0) X = _enforce_estimator_tags_X(estimator_orig, X) From 454ff98b0ba70330bf87ce92b38df27fa0f61a9e Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 4 Dec 2024 12:57:57 +0100 Subject: [PATCH 5/5] fix --- sklearn/utils/estimator_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index c3303f573bd8c..7416216dda520 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3913,7 +3913,7 @@ def check_positive_only_tag_during_fit(name, estimator_orig): X, y = load_iris(return_X_y=True) y = _enforce_estimator_tags_y(estimator, y) set_random_state(estimator, 0) - X = _enforce_estimator_tags_X(estimator_orig, X) + X = _enforce_estimator_tags_X(estimator, X) X -= X.mean() if tags.input_tags.positive_only: