Thanks to visit codestin.com
Credit goes to github.com

Skip to content

FIX KNeighbor classes correctly set positive_only tag #30372

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sklearn/neighbors/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,8 @@ def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
# For cross-validation routines to split data correctly
tags.input_tags.pairwise = self.metric == "precomputed"
# when input is precomputed metric values, all those values need to be positive
tags.input_tags.positive_only = tags.input_tags.pairwise
tags.input_tags.allow_nan = self.metric == "nan_euclidean"
return tags

Expand Down
4 changes: 4 additions & 0 deletions sklearn/utils/_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ class InputTags:
Specifically, this tag is used by
`sklearn.utils.metaestimators._safe_split` to slice rows and
columns.

Note that if setting this tag to ``True`` means the estimator can take only
positive values, the `positive_only` tag must reflect it and also be set to
``True``.
"""

one_d_array: bool = False
Expand Down
34 changes: 34 additions & 0 deletions sklearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def _yield_api_checks(estimator):
yield check_do_not_raise_errors_in_init_or_set_params
yield check_n_features_in_after_fitting
yield check_mixin_order
yield check_positive_only_tag_during_fit


def _yield_checks(estimator):
Expand Down Expand Up @@ -3899,6 +3900,39 @@ def _enforce_estimator_tags_X(estimator, X, X_test=None, kernel=linear_kernel):
return X_res


@ignore_warnings(category=FutureWarning)
def check_positive_only_tag_during_fit(name, estimator_orig):
"""Test that the estimator correctly sets the tags.input_tags.positive_only

If the tag is False, the estimator should accept negative input regardless of the
tags.input_tags.pairwise flag.
"""
estimator = clone(estimator_orig)
tags = get_tags(estimator)

X, y = load_iris(return_X_y=True)
y = _enforce_estimator_tags_y(estimator, y)
set_random_state(estimator, 0)
X = _enforce_estimator_tags_X(estimator, X)
X -= X.mean()

if tags.input_tags.positive_only:
with raises(ValueError, match="Negative values in data"):
estimator.fit(X, y)
else:
# This should pass
try:
estimator.fit(X, y)
except Exception as e:
err_msg = (
f"Estimator {repr(name)} raised {e.__class__.__name__} unexpectedly."
" This happens when passing negative input values as X."
" If negative values are not supported for this estimator instance,"
" then the tags.input_tags.positive_only tag needs to be set to True."
)
raise AssertionError(err_msg) from e


@ignore_warnings(category=FutureWarning)
def check_non_transformer_estimators_n_iter(name, estimator_orig):
# Test that estimators that are not transformers with a parameter
Expand Down
18 changes: 17 additions & 1 deletion sklearn/utils/tests/test_estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
check_outlier_contamination,
check_outlier_corruption,
check_parameters_default_constructible,
check_positive_only_tag_during_fit,
check_regressor_data_not_an_array,
check_requires_y_none,
check_sample_weights_pandas_series,
Expand Down Expand Up @@ -509,7 +510,7 @@ class RequiresPositiveXRegressor(LinearRegression):
def fit(self, X, y):
X, y = validate_data(self, X, y, multi_output=True)
if (X < 0).any():
raise ValueError("negative X values not supported!")
raise ValueError("Negative values in data passed to X.")
return super().fit(X, y)

def __sklearn_tags__(self):
Expand Down Expand Up @@ -1600,3 +1601,18 @@ def fit(self, X, y=None):
msg = "TransformerMixin comes before/left side of BaseEstimator"
with raises(AssertionError, match=re.escape(msg)):
check_mixin_order("BadEstimator", BadEstimator())


def test_check_positive_only_tag_during_fit():
class RequiresPositiveXBadTag(RequiresPositiveXRegressor):
def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
tags.input_tags.positive_only = False
return tags

with raises(
AssertionError, match="This happens when passing negative input values as X."
):
check_positive_only_tag_during_fit(
"RequiresPositiveXBadTag", RequiresPositiveXBadTag()
)
Loading