diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 8a1f4eb95583a..b4d391b34bfb6 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3504,7 +3504,21 @@ def check_decision_proba_consistency(name, estimator_orig): # inversions in case of machine level differences. a = estimator.predict_proba(X_test)[:, 1].round(decimals=10) b = estimator.decision_function(X_test).round(decimals=10) - assert_array_equal(rankdata(a), rankdata(b)) + + rank_proba, rank_score = rankdata(a), rankdata(b) + try: + assert_array_almost_equal(rank_proba, rank_score) + except AssertionError: + # Sometimes, the rounding applied on the probabilities will have + # ties that are not present in the scores because it is + # numerically more precise. In this case, we relax the test by + # grouping the decision function scores based on the probability + # rank and check that the score is monotonically increasing. + grouped_y_score = np.array( + [b[rank_proba == group].mean() for group in np.unique(rank_proba)] + ) + sorted_idx = np.argsort(grouped_y_score) + assert_array_equal(sorted_idx, np.arange(len(sorted_idx))) def check_outliers_fit_predict(name, estimator_orig): diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 5dfa53e02df26..aaba33de5803c 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -36,6 +36,7 @@ from sklearn.utils import all_estimators from sklearn.exceptions import SkipTestWarning from sklearn.utils.metaestimators import available_if +from sklearn.utils.estimator_checks import check_decision_proba_consistency from sklearn.utils._param_validation import Interval, StrOptions from sklearn.utils.estimator_checks import ( @@ -1159,3 +1160,13 @@ class OutlierDetectorWithConstraint(OutlierDetectorWithoutConstraint): detector = OutlierDetectorWithConstraint() with raises(AssertionError, match=err_msg): check_outlier_contamination(detector.__class__.__name__, detector) + + +def test_decision_proba_tie_ranking(): + """Check that in case with some probabilities ties, we relax the + ranking comparison with the decision function. + Non-regression test for: + https://github.com/scikit-learn/scikit-learn/issues/24025 + """ + estimator = SGDClassifier(loss="log_loss") + check_decision_proba_consistency("SGDClassifier", estimator)