From 8dacf5e49d1b4ec7a066c2cc925b6a4b523135b1 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 10 Jan 2024 00:21:19 -0500 Subject: [PATCH 01/15] Adding api deprecation Signed-off-by: Adam Li --- doc/whats_new/v1.4.rst | 8 ++++ sklearn/metrics/_classification.py | 48 +++++++++++++++----- sklearn/metrics/_ranking.py | 43 ++++++++++++++++-- sklearn/metrics/tests/test_classification.py | 29 ++++++++++-- sklearn/metrics/tests/test_ranking.py | 34 ++++++++++++++ 5 files changed, 144 insertions(+), 18 deletions(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index a932391b732cd..4da31db6448a6 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -618,6 +618,14 @@ Changelog :func:`metrics.root_mean_squared_log_error` instead. :pr:`26734` by :user:`Alejandro Martin Gil <101AlexMartin>`. +- |API| :func:`metrics.precision_recall_curve` deprecated the keyword argument ``probas_pred`` + in favor of ``y_score``. ``probas_pred`` will be removed in version 1.6. + :pr:`27718` by :user:`Adam Li `. + +- |API| :func:`metrics.brier_score_loss` deprecated the keyword argument ``y_prob`` + in favor of ``y_proba``. ``y_prob`` will be removed in version 1.6. + :pr:`27718` by :user:`Adam Li `. + :mod:`sklearn.model_selection` .............................. diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index f0a13f8a04830..5b316bbdd33da 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -3122,13 +3122,16 @@ def hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None): @validate_params( { "y_true": ["array-like"], - "y_prob": ["array-like"], + "y_proba": ["array-like", None], "sample_weight": ["array-like", None], "pos_label": [Real, str, "boolean", None], + "y_prob": ["array-like", StrOptions({"deprecated"})], }, prefer_skip_nested_validation=True, ) -def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None): +def brier_score_loss( + y_true, y_proba=None, *, sample_weight=None, pos_label=None, y_prob="deprecated" +): """Compute the Brier score loss. The smaller the Brier score loss, the better, hence the naming with "loss". @@ -3156,7 +3159,7 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None): y_true : array-like of shape (n_samples,) True targets. - y_prob : array-like of shape (n_samples,) + y_proba : array-like of shape (n_samples,) Probabilities of the positive class. sample_weight : array-like of shape (n_samples,), default=None @@ -3172,6 +3175,13 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None): * otherwise, `pos_label` defaults to the greater label, i.e. `np.unique(y_true)[-1]`. + y_prob : array-like of shape (n_samples,) + Probabilities of the positive class. + + .. deprecated:: 1.4 + `y_prob` is deprecated and will be removed in 1.6. Use + `y_proba` instead. + Returns ------- score : float @@ -3198,11 +3208,27 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None): >>> brier_score_loss(y_true, np.array(y_prob) > 0.5) 0.0 """ + # TODO(1.6): remove in 1.6 and reset y_proba to be required + if y_prob != "deprecated" and y_proba is not None: + raise ValueError( + "`y_prob` and `y_proba` cannot be both specified. Please use `y_proba` only" + " as `y_prob` is deprecated in v1.4 and will be removed in v1.6." + ) + if y_prob != "deprecated": + warnings.warn( + ( + "y_prob was deprecated in version 1.4 and will be removed in 1.6." + "Please use ``y_proba`` instead." + ), + FutureWarning, + ) + y_proba = y_prob + y_true = column_or_1d(y_true) - y_prob = column_or_1d(y_prob) + y_proba = column_or_1d(y_proba) assert_all_finite(y_true) - assert_all_finite(y_prob) - check_consistent_length(y_true, y_prob, sample_weight) + assert_all_finite(y_proba) + check_consistent_length(y_true, y_proba, sample_weight) y_type = type_of_target(y_true, input_name="y_true") if y_type != "binary": @@ -3211,10 +3237,10 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None): f"is {y_type}." ) - if y_prob.max() > 1: - raise ValueError("y_prob contains values greater than 1.") - if y_prob.min() < 0: - raise ValueError("y_prob contains values less than 0.") + if y_proba.max() > 1: + raise ValueError("y_proba contains values greater than 1.") + if y_proba.min() < 0: + raise ValueError("y_proba contains values less than 0.") try: pos_label = _check_pos_label_consistency(pos_label, y_true) @@ -3227,4 +3253,4 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None): else: raise y_true = np.array(y_true == pos_label, int) - return np.average((y_true - y_prob) ** 2, weights=sample_weight) + return np.average((y_true - y_proba) ** 2, weights=sample_weight) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 4a2e7aa1b78a3..47e3e16ee2196 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -852,15 +852,25 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None): @validate_params( { "y_true": ["array-like"], - "probas_pred": ["array-like"], + "y_score": ["array-like", None], "pos_label": [Real, str, "boolean", None], "sample_weight": ["array-like", None], "drop_intermediate": ["boolean"], + "probas_pred": [ + "array-like", + StrOptions({"deprecated"}), + ], }, prefer_skip_nested_validation=True, ) def precision_recall_curve( - y_true, probas_pred, *, pos_label=None, sample_weight=None, drop_intermediate=False + y_true, + y_score=None, + *, + pos_label=None, + sample_weight=None, + drop_intermediate=False, + probas_pred="deprecated", ): """Compute precision-recall pairs for different probability thresholds. @@ -890,7 +900,7 @@ def precision_recall_curve( True binary labels. If labels are not either {-1, 1} or {0, 1}, then pos_label should be explicitly given. - probas_pred : array-like of shape (n_samples,) + y_score : array-like of shape (n_samples,) Target scores, can either be probability estimates of the positive class, or non-thresholded measure of decisions (as returned by `decision_function` on some classifiers). @@ -910,6 +920,15 @@ def precision_recall_curve( .. versionadded:: 1.3 + probas_pred : array-like of shape (n_samples,) + Target scores, can either be probability estimates of the positive + class, or non-thresholded measure of decisions (as returned by + `decision_function` on some classifiers). By default None. + + .. deprecated:: 1.4 + `probas_pred` is deprecated and will be removed in 1.6. Use + `y_score` instead. + Returns ------- precision : ndarray of shape (n_thresholds + 1,) @@ -949,8 +968,24 @@ def precision_recall_curve( >>> thresholds array([0.1 , 0.35, 0.4 , 0.8 ]) """ + # TODO(1.6): remove in 1.6 and reset y_score to be required + if probas_pred != "deprecated" and y_score is not None: + raise ValueError( + "`probas_pred` and `y_score` cannot be both specified. Please use `y_score`" + " only as `probas_pred` is deprecated in v1.4 and will be removed in v1.6." + ) + if probas_pred != "deprecated": + warnings.warn( + ( + "probas_pred was deprecated in version 1.4 and will be removed in 1.6." + "Please use ``y_score`` instead." + ), + FutureWarning, + ) + y_score = probas_pred + fps, tps, thresholds = _binary_clf_curve( - y_true, probas_pred, pos_label=pos_label, sample_weight=sample_weight + y_true, y_score, pos_label=pos_label, sample_weight=sample_weight ) if drop_intermediate and len(fps) > 2: diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index abf1aae487599..a51b4bdef8ca2 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -88,16 +88,16 @@ def make_prediction(dataset=None, binary=False): # run classifier, get class probabilities and label predictions clf = svm.SVC(kernel="linear", probability=True, random_state=0) - probas_pred = clf.fit(X[:half], y[:half]).predict_proba(X[half:]) + y_pred_proba = clf.fit(X[:half], y[:half]).predict_proba(X[half:]) if binary: # only interested in probabilities of the positive case # XXX: do we really want a special API for the binary case? - probas_pred = probas_pred[:, 1] + y_pred_proba = y_pred_proba[:, 1] y_pred = clf.predict(X[half:]) y_true = y[half:] - return y_true, y_pred, probas_pred + return y_true, y_pred, y_pred_proba ############################################################################### @@ -2864,3 +2864,26 @@ def test_classification_metric_division_by_zero_nan_validaton(scoring): X, y = datasets.make_classification(random_state=0) classifier = DecisionTreeClassifier(max_depth=3, random_state=0).fit(X, y) cross_val_score(classifier, X, y, scoring=scoring, n_jobs=2, error_score="raise") + + +# TODO(1.6): remove +def test_brier_score_loss_deprecation_warning(): + """Check the message for future deprecation.""" + # Check brier_score_loss function + y_true = np.array([0, 1, 1, 0, 1, 1]) + y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1.0, 0.95]) + + warn_msg = "y_prob was deprecated in version 1.4" + with pytest.warns(FutureWarning, match=warn_msg): + brier_score_loss( + y_true, + y_prob=y_pred, + ) + + error_msg = "`y_prob` and `y_proba` cannot be both specified" + with pytest.raises(ValueError, match=error_msg): + brier_score_loss( + y_true, + y_prob=y_pred, + y_proba=y_pred, + ) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index d49d96e1d82d7..1c45ba15ee45b 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -2242,3 +2242,37 @@ def test_roc_curve_with_probablity_estimates(global_random_seed): y_score = rng.rand(10) _, _, thresholds = roc_curve(y_true, y_score) assert np.isinf(thresholds[0]) + + +@pytest.mark.parametrize("drop", [True, False]) +def test_precision_recall_curve(drop): + y_true, _, y_score = make_prediction(binary=True) + _test_precision_recall_curve(y_true, y_score, drop) + + # Make sure the first point of the Precision-Recall on the right is: + # (p=1.0, r=class balance) on a non-balanced dataset [1:] + p, r, t = precision_recall_curve(y_true[1:], y_score[1:], drop_intermediate=drop) + assert r[0] == 1.0 + assert p[0] == y_true[1:].mean() + + +# TODO(1.6): remove +def test_precision_recall_curve_deprecation_warning(): + """Check the message for future deprecation.""" + # Check precision_recall_curve function + y_true, _, y_score = make_prediction(binary=True) + + warn_msg = "probas_pred was deprecated in version 1.4" + with pytest.warns(FutureWarning, match=warn_msg): + precision_recall_curve( + y_true, + probas_pred=y_score, + ) + + error_msg = "`probas_pred` and `y_score` cannot be both specified" + with pytest.raises(ValueError, match=error_msg): + precision_recall_curve( + y_true, + probas_pred=y_score, + y_score=y_score, + ) From 91fe2f598869c0c7cb3de63a8426961ca42c235d Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 10 Jan 2024 00:22:44 -0500 Subject: [PATCH 02/15] Update pr number Signed-off-by: Adam Li --- doc/whats_new/v1.4.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 4da31db6448a6..feaa076dd5e9c 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -620,11 +620,11 @@ Changelog - |API| :func:`metrics.precision_recall_curve` deprecated the keyword argument ``probas_pred`` in favor of ``y_score``. ``probas_pred`` will be removed in version 1.6. - :pr:`27718` by :user:`Adam Li `. + :pr:`28092` by :user:`Adam Li `. - |API| :func:`metrics.brier_score_loss` deprecated the keyword argument ``y_prob`` in favor of ``y_proba``. ``y_prob`` will be removed in version 1.6. - :pr:`27718` by :user:`Adam Li `. + :pr:`28092` by :user:`Adam Li `. :mod:`sklearn.model_selection` .............................. From 213f9fb99e0fa628652994073c8628b5f801364f Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 10 Jan 2024 00:48:50 -0500 Subject: [PATCH 03/15] Fix lint Signed-off-by: Adam Li --- sklearn/metrics/tests/test_ranking.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 1c45ba15ee45b..b976b19d937f4 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -2244,18 +2244,6 @@ def test_roc_curve_with_probablity_estimates(global_random_seed): assert np.isinf(thresholds[0]) -@pytest.mark.parametrize("drop", [True, False]) -def test_precision_recall_curve(drop): - y_true, _, y_score = make_prediction(binary=True) - _test_precision_recall_curve(y_true, y_score, drop) - - # Make sure the first point of the Precision-Recall on the right is: - # (p=1.0, r=class balance) on a non-balanced dataset [1:] - p, r, t = precision_recall_curve(y_true[1:], y_score[1:], drop_intermediate=drop) - assert r[0] == 1.0 - assert p[0] == y_true[1:].mean() - - # TODO(1.6): remove def test_precision_recall_curve_deprecation_warning(): """Check the message for future deprecation.""" From 19e92c59e3c961c82ac956715f60f9f15aaa0da6 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 11 Jan 2024 12:03:34 -0500 Subject: [PATCH 04/15] Fix warnings Signed-off-by: Adam Li --- sklearn/metrics/_classification.py | 4 ++-- sklearn/metrics/_ranking.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 5b316bbdd33da..180f404f2919c 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -3209,12 +3209,12 @@ def brier_score_loss( 0.0 """ # TODO(1.6): remove in 1.6 and reset y_proba to be required - if y_prob != "deprecated" and y_proba is not None: + if y_proba is not None and not isinstance(y_prob, str): raise ValueError( "`y_prob` and `y_proba` cannot be both specified. Please use `y_proba` only" " as `y_prob` is deprecated in v1.4 and will be removed in v1.6." ) - if y_prob != "deprecated": + if y_proba is None: warnings.warn( ( "y_prob was deprecated in version 1.4 and will be removed in 1.6." diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 47e3e16ee2196..e85b842e3fe10 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -969,12 +969,12 @@ def precision_recall_curve( array([0.1 , 0.35, 0.4 , 0.8 ]) """ # TODO(1.6): remove in 1.6 and reset y_score to be required - if probas_pred != "deprecated" and y_score is not None: + if y_score is not None and not isinstance(probas_pred, str): raise ValueError( "`probas_pred` and `y_score` cannot be both specified. Please use `y_score`" " only as `probas_pred` is deprecated in v1.4 and will be removed in v1.6." ) - if probas_pred != "deprecated": + if y_score is None: warnings.warn( ( "probas_pred was deprecated in version 1.4 and will be removed in 1.6." From 5073f5162103dcc8c38c38706a1d4a2750bb813b Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 25 Jan 2024 09:06:19 -0500 Subject: [PATCH 05/15] Apply suggestions from code review Co-authored-by: Adrin Jalali --- sklearn/metrics/_classification.py | 2 +- sklearn/metrics/_ranking.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 0dfd0ace2bb88..1b2ffc7c8a33a 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -3221,7 +3221,7 @@ def brier_score_loss( 0.0 """ # TODO(1.6): remove in 1.6 and reset y_proba to be required - if y_proba is not None and not isinstance(y_prob, str): + if y_proba is not None and y_prob != "deprecated": raise ValueError( "`y_prob` and `y_proba` cannot be both specified. Please use `y_proba` only" " as `y_prob` is deprecated in v1.4 and will be removed in v1.6." diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index eff2833bcb154..d3a11a7bc2d90 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -982,7 +982,7 @@ def precision_recall_curve( array([0.1 , 0.35, 0.4 , 0.8 ]) """ # TODO(1.6): remove in 1.6 and reset y_score to be required - if y_score is not None and not isinstance(probas_pred, str): + if y_score is not None and probas_pred != "deprecated": raise ValueError( "`probas_pred` and `y_score` cannot be both specified. Please use `y_score`" " only as `probas_pred` is deprecated in v1.4 and will be removed in v1.6." From 5c0eccaaefa917bf0b5abe1b24c233b0e771e835 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 25 Jan 2024 09:08:57 -0500 Subject: [PATCH 06/15] Address comments Signed-off-by: Adam Li --- sklearn/metrics/_classification.py | 22 +++++++++++++------- sklearn/metrics/_ranking.py | 14 ++++++------- sklearn/metrics/tests/test_classification.py | 4 ++-- sklearn/metrics/tests/test_ranking.py | 4 ++-- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 1b2ffc7c8a33a..ee02551491dcd 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -39,7 +39,13 @@ column_or_1d, ) from ..utils._array_api import _union1d, _weighted_sum, get_namespace -from ..utils._param_validation import Interval, Options, StrOptions, validate_params +from ..utils._param_validation import ( + Hidden, + Interval, + Options, + StrOptions, + validate_params, +) from ..utils.extmath import _nanaverage from ..utils.multiclass import type_of_target, unique_labels from ..utils.sparsefuncs import count_nonzero @@ -3134,10 +3140,10 @@ def hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None): @validate_params( { "y_true": ["array-like"], - "y_proba": ["array-like", None], + "y_proba": ["array-like", Hidden(None)], "sample_weight": ["array-like", None], "pos_label": [Real, str, "boolean", None], - "y_prob": ["array-like", StrOptions({"deprecated"})], + "y_prob": ["array-like", Hidden(StrOptions({"deprecated"}))], }, prefer_skip_nested_validation=True, ) @@ -3190,8 +3196,8 @@ def brier_score_loss( y_prob : array-like of shape (n_samples,) Probabilities of the positive class. - .. deprecated:: 1.4 - `y_prob` is deprecated and will be removed in 1.6. Use + .. deprecated:: 1.5 + `y_prob` is deprecated and will be removed in 1.7. Use `y_proba` instead. Returns @@ -3220,16 +3226,16 @@ def brier_score_loss( >>> brier_score_loss(y_true, np.array(y_prob) > 0.5) 0.0 """ - # TODO(1.6): remove in 1.6 and reset y_proba to be required + # TODO(1.7): remove in 1.7 and reset y_proba to be required if y_proba is not None and y_prob != "deprecated": raise ValueError( "`y_prob` and `y_proba` cannot be both specified. Please use `y_proba` only" - " as `y_prob` is deprecated in v1.4 and will be removed in v1.6." + " as `y_prob` is deprecated in v1.5 and will be removed in v1.7." ) if y_proba is None: warnings.warn( ( - "y_prob was deprecated in version 1.4 and will be removed in 1.6." + "y_prob was deprecated in version 1.5 and will be removed in 1.7." "Please use ``y_proba`` instead." ), FutureWarning, diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index d3a11a7bc2d90..76b0e9bce5033 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -36,7 +36,7 @@ column_or_1d, ) from ..utils._encode import _encode, _unique -from ..utils._param_validation import Interval, StrOptions, validate_params +from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params from ..utils.extmath import stable_cumsum from ..utils.fixes import trapezoid from ..utils.multiclass import type_of_target @@ -865,13 +865,13 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None): @validate_params( { "y_true": ["array-like"], - "y_score": ["array-like", None], + "y_score": ["array-like", Hidden(None)], "pos_label": [Real, str, "boolean", None], "sample_weight": ["array-like", None], "drop_intermediate": ["boolean"], "probas_pred": [ "array-like", - StrOptions({"deprecated"}), + Hidden(StrOptions({"deprecated"})), ], }, prefer_skip_nested_validation=True, @@ -938,8 +938,8 @@ def precision_recall_curve( class, or non-thresholded measure of decisions (as returned by `decision_function` on some classifiers). By default None. - .. deprecated:: 1.4 - `probas_pred` is deprecated and will be removed in 1.6. Use + .. deprecated:: 1.5 + `probas_pred` is deprecated and will be removed in 1.7. Use `y_score` instead. Returns @@ -985,12 +985,12 @@ def precision_recall_curve( if y_score is not None and probas_pred != "deprecated": raise ValueError( "`probas_pred` and `y_score` cannot be both specified. Please use `y_score`" - " only as `probas_pred` is deprecated in v1.4 and will be removed in v1.6." + " only as `probas_pred` is deprecated in v1.5 and will be removed in v1.7." ) if y_score is None: warnings.warn( ( - "probas_pred was deprecated in version 1.4 and will be removed in 1.6." + "probas_pred was deprecated in version 1.5 and will be removed in 1.7." "Please use ``y_score`` instead." ), FutureWarning, diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index a51b4bdef8ca2..8f79874e72162 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -2866,14 +2866,14 @@ def test_classification_metric_division_by_zero_nan_validaton(scoring): cross_val_score(classifier, X, y, scoring=scoring, n_jobs=2, error_score="raise") -# TODO(1.6): remove +# TODO(1.7): remove def test_brier_score_loss_deprecation_warning(): """Check the message for future deprecation.""" # Check brier_score_loss function y_true = np.array([0, 1, 1, 0, 1, 1]) y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1.0, 0.95]) - warn_msg = "y_prob was deprecated in version 1.4" + warn_msg = "y_prob was deprecated in version 1.5" with pytest.warns(FutureWarning, match=warn_msg): brier_score_loss( y_true, diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index b976b19d937f4..7b3a71978907a 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -2244,13 +2244,13 @@ def test_roc_curve_with_probablity_estimates(global_random_seed): assert np.isinf(thresholds[0]) -# TODO(1.6): remove +# TODO(1.7): remove def test_precision_recall_curve_deprecation_warning(): """Check the message for future deprecation.""" # Check precision_recall_curve function y_true, _, y_score = make_prediction(binary=True) - warn_msg = "probas_pred was deprecated in version 1.4" + warn_msg = "probas_pred was deprecated in version 1.5" with pytest.warns(FutureWarning, match=warn_msg): precision_recall_curve( y_true, From c279517d0ecdfed12a666f7e57e2100325409cc8 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 25 Jan 2024 09:09:37 -0500 Subject: [PATCH 07/15] Move changelog Signed-off-by: Adam Li --- doc/whats_new/v1.4.rst | 8 -------- doc/whats_new/v1.5.rst | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 401ebe093641f..0cc775fd7c604 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -679,14 +679,6 @@ Changelog :func:`metrics.root_mean_squared_log_error` instead. :pr:`26734` by :user:`Alejandro Martin Gil <101AlexMartin>`. -- |API| :func:`metrics.precision_recall_curve` deprecated the keyword argument ``probas_pred`` - in favor of ``y_score``. ``probas_pred`` will be removed in version 1.6. - :pr:`28092` by :user:`Adam Li `. - -- |API| :func:`metrics.brier_score_loss` deprecated the keyword argument ``y_prob`` - in favor of ``y_proba``. ``y_prob`` will be removed in version 1.6. - :pr:`28092` by :user:`Adam Li `. - :mod:`sklearn.model_selection` .............................. diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index f976e99e498b6..433207ad405b8 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -96,6 +96,14 @@ Changelog :class:`~calibration.CalibrationDisplay`. :pr:`28051` by :user:`Pierre de Fréminville `. +- |API| :func:`metrics.precision_recall_curve` deprecated the keyword argument ``probas_pred`` + in favor of ``y_score``. ``probas_pred`` will be removed in version 1.6. + :pr:`28092` by :user:`Adam Li `. + +- |API| :func:`metrics.brier_score_loss` deprecated the keyword argument ``y_prob`` + in favor of ``y_proba``. ``y_prob`` will be removed in version 1.6. + :pr:`28092` by :user:`Adam Li `. + :mod:`sklearn.utils` .................... From d6be8ab36ce4e575842b5e7483d37e2f3289bf6b Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 25 Jan 2024 09:09:46 -0500 Subject: [PATCH 08/15] Move changelog Signed-off-by: Adam Li --- doc/whats_new/v1.5.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index 433207ad405b8..d8a5e50367e1e 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -97,11 +97,11 @@ Changelog :pr:`28051` by :user:`Pierre de Fréminville `. - |API| :func:`metrics.precision_recall_curve` deprecated the keyword argument ``probas_pred`` - in favor of ``y_score``. ``probas_pred`` will be removed in version 1.6. + in favor of ``y_score``. ``probas_pred`` will be removed in version 1.7. :pr:`28092` by :user:`Adam Li `. - |API| :func:`metrics.brier_score_loss` deprecated the keyword argument ``y_prob`` - in favor of ``y_proba``. ``y_prob`` will be removed in version 1.6. + in favor of ``y_proba``. ``y_prob`` will be removed in version 1.7. :pr:`28092` by :user:`Adam Li `. :mod:`sklearn.utils` From ee4b64d33ad206752979166a87636da09800c60e Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 25 Jan 2024 09:10:45 -0500 Subject: [PATCH 09/15] 1.7 --- sklearn/metrics/_ranking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 76b0e9bce5033..5c30820f233f0 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -981,7 +981,7 @@ def precision_recall_curve( >>> thresholds array([0.1 , 0.35, 0.4 , 0.8 ]) """ - # TODO(1.6): remove in 1.6 and reset y_score to be required + # TODO(1.7): remove in 1.7 and reset y_score to be required if y_score is not None and probas_pred != "deprecated": raise ValueError( "`probas_pred` and `y_score` cannot be both specified. Please use `y_score`" From 3ded4e6e64c15c146510e208735e3ca9ce806ba4 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 25 Jan 2024 11:50:11 -0500 Subject: [PATCH 10/15] Allow correct comparison Signed-off-by: Adam Li --- sklearn/metrics/_classification.py | 2 +- sklearn/metrics/_ranking.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index ee02551491dcd..bdded37f36895 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -3227,7 +3227,7 @@ def brier_score_loss( 0.0 """ # TODO(1.7): remove in 1.7 and reset y_proba to be required - if y_proba is not None and y_prob != "deprecated": + if y_proba is not None and str(y_prob) != "deprecated": raise ValueError( "`y_prob` and `y_proba` cannot be both specified. Please use `y_proba` only" " as `y_prob` is deprecated in v1.5 and will be removed in v1.7." diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 5c30820f233f0..8f184180eed25 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -982,7 +982,7 @@ def precision_recall_curve( array([0.1 , 0.35, 0.4 , 0.8 ]) """ # TODO(1.7): remove in 1.7 and reset y_score to be required - if y_score is not None and probas_pred != "deprecated": + if y_score is not None and str(probas_pred) != "deprecated": raise ValueError( "`probas_pred` and `y_score` cannot be both specified. Please use `y_score`" " only as `probas_pred` is deprecated in v1.5 and will be removed in v1.7." From 8bb3c50c77a1be2d77e8f9fa0bffad068f78cae4 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 15 Feb 2024 20:29:20 -0500 Subject: [PATCH 11/15] Apply suggestions from code review Co-authored-by: Thomas J. Fan --- doc/whats_new/v1.5.rst | 8 ++++---- sklearn/metrics/_classification.py | 2 +- sklearn/metrics/_ranking.py | 6 ++++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index 6f85e58217b1c..2360e4f38265b 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -123,12 +123,12 @@ Changelog :class:`~calibration.CalibrationDisplay`. :pr:`28051` by :user:`Pierre de Fréminville `. -- |API| :func:`metrics.precision_recall_curve` deprecated the keyword argument ``probas_pred`` - in favor of ``y_score``. ``probas_pred`` will be removed in version 1.7. +- |API| :func:`metrics.precision_recall_curve` deprecated the keyword argument `probas_pred` + in favor of `y_score`. `probas_pred` will be removed in version 1.7. :pr:`28092` by :user:`Adam Li `. -- |API| :func:`metrics.brier_score_loss` deprecated the keyword argument ``y_prob`` - in favor of ``y_proba``. ``y_prob`` will be removed in version 1.7. +- |API| :func:`metrics.brier_score_loss` deprecated the keyword argument `y_prob` + in favor of `y_proba`. `y_prob` will be removed in version 1.7. :pr:`28092` by :user:`Adam Li `. :mod:`sklearn.model_selection` diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index d9f6f86a6e68b..b168a02eb6b96 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -3239,7 +3239,7 @@ def brier_score_loss( 0.0 """ # TODO(1.7): remove in 1.7 and reset y_proba to be required - if y_proba is not None and str(y_prob) != "deprecated": + if y_proba is not None and (isinstance(y_prob, str) and y_prob == "deprecated"): raise ValueError( "`y_prob` and `y_proba` cannot be both specified. Please use `y_proba` only" " as `y_prob` is deprecated in v1.5 and will be removed in v1.7." diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 220c76e671fbb..f14a5f51f7a2e 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -936,7 +936,7 @@ def precision_recall_curve( probas_pred : array-like of shape (n_samples,) Target scores, can either be probability estimates of the positive class, or non-thresholded measure of decisions (as returned by - `decision_function` on some classifiers). By default None. + `decision_function` on some classifiers). .. deprecated:: 1.5 `probas_pred` is deprecated and will be removed in 1.7. Use @@ -982,7 +982,9 @@ def precision_recall_curve( array([0.1 , 0.35, 0.4 , 0.8 ]) """ # TODO(1.7): remove in 1.7 and reset y_score to be required - if y_score is not None and str(probas_pred) != "deprecated": + if y_score is not None and ( + isinstance(probas_pred, str) and probas_pred != "deprecated" + ): raise ValueError( "`probas_pred` and `y_score` cannot be both specified. Please use `y_score`" " only as `probas_pred` is deprecated in v1.5 and will be removed in v1.7." From a6e7eea93a48852d946e6b049ebb3d9d3efba6aa Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 15 Feb 2024 22:59:46 -0500 Subject: [PATCH 12/15] Fix unit test Signed-off-by: Adam Li --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index b168a02eb6b96..b7531ad3cae8c 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -3239,7 +3239,7 @@ def brier_score_loss( 0.0 """ # TODO(1.7): remove in 1.7 and reset y_proba to be required - if y_proba is not None and (isinstance(y_prob, str) and y_prob == "deprecated"): + if y_proba is not None and (isinstance(y_prob, str) and y_prob != "deprecated"): raise ValueError( "`y_prob` and `y_proba` cannot be both specified. Please use `y_proba` only" " as `y_prob` is deprecated in v1.5 and will be removed in v1.7." From 545013dfe1080e0d716bb793434f8f5e84f3e7fe Mon Sep 17 00:00:00 2001 From: Adam Li Date: Fri, 16 Feb 2024 10:03:04 -0500 Subject: [PATCH 13/15] Fix unit test Signed-off-by: Adam Li --- sklearn/metrics/_classification.py | 2 +- sklearn/metrics/_ranking.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index b7531ad3cae8c..2fe9afd51e9f3 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -3239,7 +3239,7 @@ def brier_score_loss( 0.0 """ # TODO(1.7): remove in 1.7 and reset y_proba to be required - if y_proba is not None and (isinstance(y_prob, str) and y_prob != "deprecated"): + if y_proba is not None and (not isinstance(y_prob, str) and y_prob != "deprecated"): raise ValueError( "`y_prob` and `y_proba` cannot be both specified. Please use `y_proba` only" " as `y_prob` is deprecated in v1.5 and will be removed in v1.7." diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index f14a5f51f7a2e..3c761440cc20d 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -983,7 +983,7 @@ def precision_recall_curve( """ # TODO(1.7): remove in 1.7 and reset y_score to be required if y_score is not None and ( - isinstance(probas_pred, str) and probas_pred != "deprecated" + not isinstance(probas_pred, str) and probas_pred != "deprecated" ): raise ValueError( "`probas_pred` and `y_score` cannot be both specified. Please use `y_score`" From 50dea0f0c0c5e7b1eaabc125217a775612f27dd9 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Fri, 16 Feb 2024 10:41:54 -0500 Subject: [PATCH 14/15] Fix unit tests Signed-off-by: Adam Li --- sklearn/metrics/_classification.py | 3 ++- sklearn/metrics/_ranking.py | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 2fe9afd51e9f3..385651ba3c1d6 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -3239,7 +3239,8 @@ def brier_score_loss( 0.0 """ # TODO(1.7): remove in 1.7 and reset y_proba to be required - if y_proba is not None and (not isinstance(y_prob, str) and y_prob != "deprecated"): + # Note: validate params will raise an error if y_prob is not array-like, or "deprecated" + if y_proba is not None and not isinstance(y_prob, str): raise ValueError( "`y_prob` and `y_proba` cannot be both specified. Please use `y_proba` only" " as `y_prob` is deprecated in v1.5 and will be removed in v1.7." diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 3c761440cc20d..886721a01b50e 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -982,9 +982,8 @@ def precision_recall_curve( array([0.1 , 0.35, 0.4 , 0.8 ]) """ # TODO(1.7): remove in 1.7 and reset y_score to be required - if y_score is not None and ( - not isinstance(probas_pred, str) and probas_pred != "deprecated" - ): + # Note: validate params will raise an error if probas_pred is not array-like, or "deprecated" + if y_score is not None and not isinstance(probas_pred, str): raise ValueError( "`probas_pred` and `y_score` cannot be both specified. Please use `y_score`" " only as `probas_pred` is deprecated in v1.5 and will be removed in v1.7." From 62dc74ee54a2421a56df1bf2bcd1f2fae7bca3ec Mon Sep 17 00:00:00 2001 From: Adam Li Date: Fri, 16 Feb 2024 11:14:59 -0500 Subject: [PATCH 15/15] Fix lint' -s Signed-off-by: Adam Li --- sklearn/metrics/_classification.py | 3 ++- sklearn/metrics/_ranking.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 385651ba3c1d6..08dd6e2558095 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -3239,7 +3239,8 @@ def brier_score_loss( 0.0 """ # TODO(1.7): remove in 1.7 and reset y_proba to be required - # Note: validate params will raise an error if y_prob is not array-like, or "deprecated" + # Note: validate params will raise an error if y_prob is not array-like, + # or "deprecated" if y_proba is not None and not isinstance(y_prob, str): raise ValueError( "`y_prob` and `y_proba` cannot be both specified. Please use `y_proba` only" diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 886721a01b50e..e31b06a926dd1 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -982,7 +982,8 @@ def precision_recall_curve( array([0.1 , 0.35, 0.4 , 0.8 ]) """ # TODO(1.7): remove in 1.7 and reset y_score to be required - # Note: validate params will raise an error if probas_pred is not array-like, or "deprecated" + # Note: validate params will raise an error if probas_pred is not array-like, + # or "deprecated" if y_score is not None and not isinstance(probas_pred, str): raise ValueError( "`probas_pred` and `y_score` cannot be both specified. Please use `y_score`"