From c4cff8514e5ef455773fcc824e06d585d207bab9 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 13 Sep 2021 20:47:50 +0200 Subject: [PATCH 01/16] ENH add in calibration tools --- sklearn/calibration.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 9a7e08c9d9ff2..3eef8ddf08494 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -37,12 +37,17 @@ from .utils.multiclass import check_classification_targets from .utils.fixes import delayed -from .utils.validation import check_is_fitted, check_consistent_length -from .utils.validation import _check_sample_weight, _num_samples +from .utils.validation import ( + _check_sample_weight, + _num_samples, + check_consistent_length, + check_is_fitted, +) from .utils import _safe_indexing from .isotonic import IsotonicRegression from .svm import LinearSVC from .model_selection import check_cv, cross_val_predict +from .metrics._base import _check_pos_label_consistency from .metrics._plot.base import _get_response @@ -847,7 +852,9 @@ def predict(self, T): return expit(-(self.a_ * T + self.b_)) -def calibration_curve(y_true, y_prob, *, normalize=False, n_bins=5, strategy="uniform"): +def calibration_curve( + y_true, y_prob, *, pos_label=None, normalize=False, n_bins=5, strategy="uniform" +): """Compute true and predicted probabilities for a calibration curve. The method assumes the inputs come from a binary classifier, and @@ -865,6 +872,11 @@ def calibration_curve(y_true, y_prob, *, normalize=False, n_bins=5, strategy="un y_prob : array-like of shape (n_samples,) Probabilities of the positive class. + pos_label : int or str, default=None + The label of the positive class. + + .. versionadded:: 1.1 + normalize : bool, default=False Whether y_prob needs to be normalized into the [0, 1] interval, i.e. is not a proper probability. If True, the smallest value in y_prob @@ -915,6 +927,7 @@ def calibration_curve(y_true, y_prob, *, normalize=False, n_bins=5, strategy="un y_true = column_or_1d(y_true) y_prob = column_or_1d(y_prob) check_consistent_length(y_true, y_prob) + pos_label = _check_pos_label_consistency(pos_label, y_true) if normalize: # Normalize predicted values into interval [0, 1] y_prob = (y_prob - y_prob.min()) / (y_prob.max() - y_prob.min()) @@ -926,9 +939,9 @@ def calibration_curve(y_true, y_prob, *, normalize=False, n_bins=5, strategy="un labels = np.unique(y_true) if len(labels) > 2: raise ValueError( - "Only binary classification is supported. Provided labels %s." % labels + "Only binary classification is supported. Provided labels {labels}." ) - y_true = label_binarize(y_true, classes=labels)[:, 0] + y_true = y_true == pos_label if strategy == "quantile": # Determine bin edges by distribution of data quantiles = np.linspace(0, 1, n_bins + 1) From 8d6b10fa74a127d8c67e849f79b4fd921ad1087b Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 14 Sep 2021 10:24:42 +0200 Subject: [PATCH 02/16] TST check that we raise a consistent error message --- sklearn/tests/test_calibration.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index b06f14b082cf5..c568d89b913e6 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -785,3 +785,19 @@ def test_calibration_display_ref_line(pyplot, iris_data_binary): labels = viz2.ax_.get_legend_handles_labels()[1] assert labels.count("Perfectly calibrated") == 1 + + +@pytest.mark.parametrize("dtype_y_str", [str, object]) +def test_calibration_curve_pos_label_error_str(dtype_y_str): + """Check error message when a `pos_label` is not specified with `str` targets.""" + rng = np.random.RandomState(42) + y1 = np.array(["spam"] * 3 + ["eggs"] * 2, dtype=dtype_y_str) + y2 = rng.randint(0, 2, size=y1.size) + + err_msg = ( + "y_true takes value in {'eggs', 'spam'} and pos_label is not " + "specified: either make y_true take value in {0, 1} or {-1, 1} or " + "pass pos_label explicit" + ) + with pytest.raises(ValueError, match=err_msg): + calibration_curve(y1, y2) From 65b149913e7ee12a3c5c39068ab1ffd827fa67fe Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 14 Sep 2021 10:27:24 +0200 Subject: [PATCH 03/16] add whats new --- doc/whats_new/v1.1.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index fba40e25a9e7e..2616f55d9561f 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -38,6 +38,13 @@ Changelog :pr:`123456` by :user:`Joe Bloggs `. where 123456 is the *pull request* number, not the issue number. +:mod:`sklearn.calibration` +.......................... + +- |Enhancement| :func:`calibration.calibration_curve` accepts a parameter + `pos_label` to specify the positive class label. + :pr:`21032` by :user:`Guillaume Lemaitre `. + :mod:`sklearn.utils` .................... From 3865f60f85e71894f74b5b2f74577bad04aaf2da Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 14 Sep 2021 11:53:20 +0200 Subject: [PATCH 04/16] TST add test for pos_label --- sklearn/tests/test_calibration.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index c568d89b913e6..c78c273c7038c 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -801,3 +801,24 @@ def test_calibration_curve_pos_label_error_str(dtype_y_str): ) with pytest.raises(ValueError, match=err_msg): calibration_curve(y1, y2) + + +@pytest.mark.parametrize("dtype_y_str", [str, object]) +def test_calibration_curve_pos_label(dtype_y_str): + """Check the behaviour when passing explicitly `pos_label`.""" + y_true = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1]) + classes = np.array(["spam", "egg"], dtype=dtype_y_str) + y_true_str = classes[y_true] + y_pred = np.array([0.1, 0.2, 0.3, 0.4, 0.65, 0.7, 0.8, 0.9, 1.0]) + + # default case + prob_true, _ = calibration_curve(y_true, y_pred, n_bins=4) + assert_allclose(prob_true, [0, 0.5, 1, 1]) + # if `y_true` contains `str`, then `pos_label` is required + prob_true, _ = calibration_curve(y_true_str, y_pred, n_bins=4, pos_label="egg") + assert_allclose(prob_true, [0, 0.5, 1, 1]) + + prob_true, _ = calibration_curve(y_true, 1 - y_pred, n_bins=4, pos_label=0) + assert_allclose(prob_true, [0, 0, 0.5, 1]) + prob_true, _ = calibration_curve(y_true_str, 1 - y_pred, n_bins=4, pos_label="spam") + assert_allclose(prob_true, [0, 0, 0.5, 1]) From 1423fb7e624dc8105aa2521a9bf0368bc6c782fc Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 14 Sep 2021 13:43:27 +0200 Subject: [PATCH 05/16] ENH add pos_label to CalibrationDisplay --- sklearn/calibration.py | 44 +++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index af8a3ae208efd..75d4ecb02f5d9 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -996,6 +996,13 @@ class CalibrationDisplay: estimator_name : str, default=None Name of estimator. If None, the estimator name is not shown. + pos_label : str or int, default=None + The class considered as the positive class when computing the + calibration curve. By default, `estimators.classes_[1]` is considered + as the positive class. + + .. versionadded:: 1.1 + Attributes ---------- line_ : matplotlib Artist @@ -1035,11 +1042,14 @@ class CalibrationDisplay: <...> """ - def __init__(self, prob_true, prob_pred, y_prob, *, estimator_name=None): + def __init__( + self, prob_true, prob_pred, y_prob, *, estimator_name=None, pos_label=None + ): self.prob_true = prob_true self.prob_pred = prob_pred self.y_prob = y_prob self.estimator_name = estimator_name + self.pos_label = pos_label def plot(self, *, ax=None, name=None, ref_line=True, **kwargs): """Plot visualization. @@ -1106,6 +1116,7 @@ def from_estimator( *, n_bins=5, strategy="uniform", + pos_label=None, name=None, ref_line=True, ax=None, @@ -1151,6 +1162,13 @@ def from_estimator( - `'quantile'`: The bins have the same number of samples and depend on predicted probabilities. + pos_label : str or int, default=None + The class considered as the positive class when computing the + calibration curve. By default, `estimators.classes_[1]` is + considered as the positive class. + + .. versionadded:: 1.1 + name : str, default=None Name for labeling curve. If `None`, the name of the estimator is used. @@ -1198,10 +1216,8 @@ def from_estimator( if not is_classifier(estimator): raise ValueError("'estimator' should be a fitted classifier.") - # FIXME: `pos_label` should not be set to None - # We should allow any int or string in `calibration_curve`. - y_prob, _ = _get_response( - X, estimator, response_method="predict_proba", pos_label=None + y_prob, pos_label = _get_response( + X, estimator, response_method="predict_proba", pos_label=pos_label ) name = name if name is not None else estimator.__class__.__name__ @@ -1210,6 +1226,7 @@ def from_estimator( y_prob, n_bins=n_bins, strategy=strategy, + pos_label=pos_label, name=name, ref_line=ref_line, ax=ax, @@ -1224,6 +1241,7 @@ def from_predictions( *, n_bins=5, strategy="uniform", + pos_label=None, name=None, ref_line=True, ax=None, @@ -1264,6 +1282,13 @@ def from_predictions( - `'quantile'`: The bins have the same number of samples and depend on predicted probabilities. + pos_label : str or int, default=None + The class considered as the positive class when computing the + calibration curve. By default, `estimators.classes_[1]` is + considered as the positive class. + + .. versionadded:: 1.1 + name : str, default=None Name for labeling curve. @@ -1311,9 +1336,14 @@ def from_predictions( prob_true, prob_pred = calibration_curve( y_true, y_prob, n_bins=n_bins, strategy=strategy ) - name = name if name is not None else "Classifier" + name = "Classifier" if name is None else name + pos_label = _check_pos_label_consistency(pos_label, y_true) disp = cls( - prob_true=prob_true, prob_pred=prob_pred, y_prob=y_prob, estimator_name=name + prob_true=prob_true, + prob_pred=prob_pred, + y_prob=y_prob, + estimator_name=name, + pos_label=pos_label, ) return disp.plot(ax=ax, ref_line=ref_line, **kwargs) From 2644263294a84b5f4d6d3fa96230e4ba85ef5f6c Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 14 Sep 2021 13:46:52 +0200 Subject: [PATCH 06/16] DOC add to whats new --- doc/whats_new/v1.1.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 2616f55d9561f..6690b7d17b562 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -45,6 +45,10 @@ Changelog `pos_label` to specify the positive class label. :pr:`21032` by :user:`Guillaume Lemaitre `. +- |Enhancement| :class:`CalibrationDisplay` accepts a parameter `pos_label` to + add this information to the plot. + :pr:`21038` by :user:`Guillaume Lemaitre `. + :mod:`sklearn.utils` .................... From 65b3bd13df0dd25819a6da6ce9e5afd23f22e378 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 14 Sep 2021 14:34:42 +0200 Subject: [PATCH 07/16] TST add test for pos_label in CalibrationDisplay --- sklearn/calibration.py | 9 +++++++-- sklearn/metrics/_plot/base.py | 2 +- sklearn/tests/test_calibration.py | 30 ++++++++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 75d4ecb02f5d9..bf53e2d03a503 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -1086,6 +1086,9 @@ def plot(self, *, ax=None, name=None, ref_line=True, **kwargs): fig, ax = plt.subplots() name = self.estimator_name if name is None else name + info_pos_label = ( + f" (Positive label: {self.pos_label})" if self.pos_label is not None else "" + ) line_kwargs = {} if name is not None: @@ -1101,7 +1104,9 @@ def plot(self, *, ax=None, name=None, ref_line=True, **kwargs): if "label" in line_kwargs: ax.legend(loc="lower right") - ax.set(xlabel="Mean predicted probability", ylabel="Fraction of positives") + xlabel = "Mean predicted probability" + info_pos_label + ylabel = "Fraction of positives" + info_pos_label + ax.set(xlabel=xlabel, ylabel=ylabel) self.ax_ = ax self.figure_ = ax.figure @@ -1334,7 +1339,7 @@ def from_predictions( check_matplotlib_support(method_name) prob_true, prob_pred = calibration_curve( - y_true, y_prob, n_bins=n_bins, strategy=strategy + y_true, y_prob, n_bins=n_bins, strategy=strategy, pos_label=pos_label ) name = "Classifier" if name is None else name pos_label = _check_pos_label_consistency(pos_label, y_true) diff --git a/sklearn/metrics/_plot/base.py b/sklearn/metrics/_plot/base.py index 8f5552ffd6808..67bce31315e3e 100644 --- a/sklearn/metrics/_plot/base.py +++ b/sklearn/metrics/_plot/base.py @@ -111,7 +111,7 @@ def _get_response(X, estimator, response_method, pos_label=None): pos_label = estimator.classes_[1] y_pred = y_pred[:, 1] else: - class_idx = np.flatnonzero(estimator.classes_ == pos_label) + class_idx = np.flatnonzero(estimator.classes_ == pos_label)[0] y_pred = y_pred[:, class_idx] else: if pos_label is None: diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 9238c0eda5c91..4d728c7a20aef 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -703,8 +703,8 @@ def test_calibration_display_compute(pyplot, iris_data_binary, n_bins, strategy) assert isinstance(viz.ax_, mpl.axes.Axes) assert isinstance(viz.figure_, mpl.figure.Figure) - assert viz.ax_.get_xlabel() == "Mean predicted probability" - assert viz.ax_.get_ylabel() == "Fraction of positives" + assert viz.ax_.get_xlabel() == "Mean predicted probability (Positive label: 1)" + assert viz.ax_.get_ylabel() == "Fraction of positives (Positive label: 1)" assert viz.line_.get_label() == "LogisticRegression" @@ -821,3 +821,29 @@ def test_calibration_curve_pos_label(dtype_y_str): assert_allclose(prob_true, [0, 0, 0.5, 1]) prob_true, _ = calibration_curve(y_true_str, 1 - y_pred, n_bins=4, pos_label="spam") assert_allclose(prob_true, [0, 0, 0.5, 1]) + + +def test_calibration_display_pos_label(pyplot, iris_data_binary): + """Check the behaviour of `pos_label` in the `CalibrationDisplay`.""" + X, y = iris_data_binary + + lr = LogisticRegression().fit(X, y) + + pos_label = 0 + viz = CalibrationDisplay.from_estimator(lr, X, y, pos_label=pos_label) + + y_prob = lr.predict_proba(X)[:, pos_label] + prob_true, prob_pred = calibration_curve(y, y_prob, pos_label=pos_label) + + assert_allclose(viz.prob_true, prob_true) + assert_allclose(viz.prob_pred, prob_pred) + assert_allclose(viz.y_prob, y_prob) + + assert ( + viz.ax_.get_xlabel() + == f"Mean predicted probability (Positive label: {pos_label})" + ) + assert ( + viz.ax_.get_ylabel() == f"Fraction of positives (Positive label: {pos_label})" + ) + assert viz.line_.get_label() == "LogisticRegression" From ec9bdf210d18739242f0d690d68e845ec2098500 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 14 Sep 2021 15:06:17 +0200 Subject: [PATCH 08/16] TST add unit tests for current _get_response --- sklearn/metrics/_plot/base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_plot/base.py b/sklearn/metrics/_plot/base.py index 8f5552ffd6808..442b833f40345 100644 --- a/sklearn/metrics/_plot/base.py +++ b/sklearn/metrics/_plot/base.py @@ -94,7 +94,9 @@ def _get_response(X, estimator, response_method, pos_label=None): y_pred = prediction_method(X) - if pos_label is not None and pos_label not in estimator.classes_: + # Checking that a scalar is contained in a NumPy array will raise a FutureWarning. + # We need to convert it into a list. + if pos_label is not None and pos_label not in list(estimator.classes_): raise ValueError( "The class provided by 'pos_label' is unknown. Got " f"{pos_label} instead of one of {estimator.classes_}" @@ -111,7 +113,7 @@ def _get_response(X, estimator, response_method, pos_label=None): pos_label = estimator.classes_[1] y_pred = y_pred[:, 1] else: - class_idx = np.flatnonzero(estimator.classes_ == pos_label) + class_idx = np.flatnonzero(estimator.classes_ == pos_label)[0] y_pred = y_pred[:, class_idx] else: if pos_label is None: From a989c67e185794563b9d0d51d97be96b05cfa7c0 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 14 Sep 2021 15:06:33 +0200 Subject: [PATCH 09/16] TST add unit tests for current _get_response --- sklearn/metrics/_plot/tests/test_base.py | 75 ++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 sklearn/metrics/_plot/tests/test_base.py diff --git a/sklearn/metrics/_plot/tests/test_base.py b/sklearn/metrics/_plot/tests/test_base.py new file mode 100644 index 0000000000000..2f67d7dd223f4 --- /dev/null +++ b/sklearn/metrics/_plot/tests/test_base.py @@ -0,0 +1,75 @@ +import numpy as np +import pytest + +from sklearn.datasets import load_iris +from sklearn.linear_model import LogisticRegression +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +from sklearn.metrics._plot.base import _get_response + + +@pytest.mark.parametrize( + "estimator, err_msg, params", + [ + ( + DecisionTreeRegressor(), + "Expected 'estimator' to be a binary classifier", + {"response_method": "auto"}, + ), + ( + DecisionTreeClassifier(), + "The class provided by 'pos_label' is unknown.", + {"response_method": "auto", "pos_label": "unknown"}, + ), + ( + DecisionTreeClassifier(), + "fit on multiclass", + {"response_method": "predict_proba"}, + ), + ], +) +def test_get_response_error(estimator, err_msg, params): + """Check that we raise the proper error messages in `_get_response`.""" + X, y = load_iris(return_X_y=True) + + estimator.fit(X, y) + with pytest.raises(ValueError, match=err_msg): + _get_response(X, estimator, **params) + + +def test_get_response_predict_proba(): + """Check the behaviour of `_get_response` using `predict_proba`.""" + X, y = load_iris(return_X_y=True) + X_binary, y_binary = X[:100], y[:100] + + classifier = DecisionTreeClassifier().fit(X_binary, y_binary) + y_proba, pos_label = _get_response( + X_binary, classifier, response_method="predict_proba" + ) + np.testing.assert_allclose(y_proba, classifier.predict_proba(X_binary)[:, 1]) + assert pos_label == 1 + + y_proba, pos_label = _get_response( + X_binary, classifier, response_method="predict_proba", pos_label=0 + ) + np.testing.assert_allclose(y_proba, classifier.predict_proba(X_binary)[:, 0]) + assert pos_label == 0 + + +def test_get_response_decision_function(): + """Check the behaviour of `get_response` using `decision_function`.""" + X, y = load_iris(return_X_y=True) + X_binary, y_binary = X[:100], y[:100] + + classifier = LogisticRegression().fit(X_binary, y_binary) + y_score, pos_label = _get_response( + X_binary, classifier, response_method="decision_function" + ) + np.testing.assert_allclose(y_score, classifier.decision_function(X_binary)) + assert pos_label == 1 + + y_score, pos_label = _get_response( + X_binary, classifier, response_method="decision_function", pos_label=0 + ) + np.testing.assert_allclose(y_score, classifier.decision_function(X_binary) * -1) + assert pos_label == 0 From bc6efdaac3f162bd09f21840cdb47929114fad5c Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 14 Sep 2021 15:20:33 +0200 Subject: [PATCH 10/16] add a proper way to check the warning raised --- setup.cfg | 2 +- sklearn/metrics/_plot/base.py | 4 ++-- sklearn/metrics/_plot/tests/test_base.py | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index 050045072f428..3150bcb1ef5ad 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,7 +13,7 @@ addopts = --ignore maint_tools --ignore asv_benchmarks --doctest-modules - --disable-pytest-warnings + # --disable-pytest-warnings --color=yes -rxXs diff --git a/sklearn/metrics/_plot/base.py b/sklearn/metrics/_plot/base.py index 442b833f40345..817a82ee5d7ab 100644 --- a/sklearn/metrics/_plot/base.py +++ b/sklearn/metrics/_plot/base.py @@ -94,8 +94,8 @@ def _get_response(X, estimator, response_method, pos_label=None): y_pred = prediction_method(X) - # Checking that a scalar is contained in a NumPy array will raise a FutureWarning. - # We need to convert it into a list. + # `not in` between a `str` and a NumPy array will raise a FutureWarning; + # thus we convert the array of classes into a Python list. if pos_label is not None and pos_label not in list(estimator.classes_): raise ValueError( "The class provided by 'pos_label' is unknown. Got " diff --git a/sklearn/metrics/_plot/tests/test_base.py b/sklearn/metrics/_plot/tests/test_base.py index 2f67d7dd223f4..75dd4edd50f71 100644 --- a/sklearn/metrics/_plot/tests/test_base.py +++ b/sklearn/metrics/_plot/tests/test_base.py @@ -56,6 +56,25 @@ def test_get_response_predict_proba(): assert pos_label == 0 +def test_get_response_warning(): + """Check that we don't raise a FutureWarning issued by NumPy.""" + X, y = load_iris(return_X_y=True) + X_binary, y_binary = X[:100], y[:100] + + classifier = DecisionTreeClassifier().fit(X_binary, y_binary) + with pytest.warns(None) as record: + try: + _get_response( + X_binary, + classifier, + response_method="predict_proba", + pos_label="unknown", + ) + except ValueError: + pass + assert len(record) == 0 + + def test_get_response_decision_function(): """Check the behaviour of `get_response` using `decision_function`.""" X, y = load_iris(return_X_y=True) From cf4c2a4b8ae88981f73c4fd6bce5c84d3f02ab9d Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 14 Sep 2021 15:22:43 +0200 Subject: [PATCH 11/16] revert hidding warning --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 3150bcb1ef5ad..050045072f428 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,7 +13,7 @@ addopts = --ignore maint_tools --ignore asv_benchmarks --doctest-modules - # --disable-pytest-warnings + --disable-pytest-warnings --color=yes -rxXs From 55636db589fd43f56bd43f4258d382d20f5b5d77 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 24 Sep 2021 15:31:40 +0200 Subject: [PATCH 12/16] Update sklearn/tests/test_calibration.py Co-authored-by: Olivier Grisel --- sklearn/tests/test_calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index c78c273c7038c..be790c2a98ba8 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -797,7 +797,7 @@ def test_calibration_curve_pos_label_error_str(dtype_y_str): err_msg = ( "y_true takes value in {'eggs', 'spam'} and pos_label is not " "specified: either make y_true take value in {0, 1} or {-1, 1} or " - "pass pos_label explicit" + "pass pos_label explicitly" ) with pytest.raises(ValueError, match=err_msg): calibration_curve(y1, y2) From 02dc3a54f1275957286b9cdb0275b1fd0269f6c1 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 24 Sep 2021 15:44:06 +0200 Subject: [PATCH 13/16] Address ogrisel comments --- sklearn/calibration.py | 2 +- sklearn/tests/test_calibration.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 7ec4d4919f1f8..dd7b9993e8c2f 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -1087,7 +1087,7 @@ def plot(self, *, ax=None, name=None, ref_line=True, **kwargs): name = self.estimator_name if name is None else name info_pos_label = ( - f" (Positive label: {self.pos_label})" if self.pos_label is not None else "" + f" (Positive class: {self.pos_label})" if self.pos_label is not None else "" ) line_kwargs = {} diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 6ebbff7e5747a..a08a3b480dbe0 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -703,8 +703,8 @@ def test_calibration_display_compute(pyplot, iris_data_binary, n_bins, strategy) assert isinstance(viz.ax_, mpl.axes.Axes) assert isinstance(viz.figure_, mpl.figure.Figure) - assert viz.ax_.get_xlabel() == "Mean predicted probability (Positive label: 1)" - assert viz.ax_.get_ylabel() == "Fraction of positives (Positive label: 1)" + assert viz.ax_.get_xlabel() == "Mean predicted probability (Positive class: 1)" + assert viz.ax_.get_ylabel() == "Fraction of positives (Positive class: 1)" assert viz.line_.get_label() == "LogisticRegression" @@ -841,9 +841,9 @@ def test_calibration_display_pos_label(pyplot, iris_data_binary): assert ( viz.ax_.get_xlabel() - == f"Mean predicted probability (Positive label: {pos_label})" + == f"Mean predicted probability (Positive class: {pos_label})" ) assert ( - viz.ax_.get_ylabel() == f"Fraction of positives (Positive label: {pos_label})" + viz.ax_.get_ylabel() == f"Fraction of positives (Positive class: {pos_label})" ) assert viz.line_.get_label() == "LogisticRegression" From 23e87df80b7507a7b66c86c42b4b5cb920f57633 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 27 Sep 2021 11:46:07 +0200 Subject: [PATCH 14/16] Update sklearn/calibration.py Co-authored-by: Thomas J. Fan --- sklearn/calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 104cd84f4f219..a390159c2db38 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -939,7 +939,7 @@ def calibration_curve( labels = np.unique(y_true) if len(labels) > 2: raise ValueError( - "Only binary classification is supported. Provided labels {labels}." + f"Only binary classification is supported. Provided labels {labels}." ) y_true = y_true == pos_label From 3570c698cff7398cab6efbd82a3cae8da0e373a8 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 27 Oct 2021 11:35:25 +0200 Subject: [PATCH 15/16] Apply suggestions from code review Co-authored-by: Julien Jerphanion --- sklearn/calibration.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 24006af7d516c..8bc1b9842de6c 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -1016,9 +1016,9 @@ class CalibrationDisplay: Name of estimator. If None, the estimator name is not shown. pos_label : str or int, default=None - The class considered as the positive class when computing the - calibration curve. By default, `estimators.classes_[1]` is considered - as the positive class. + The positive class when computing the calibration curve. + By default, `estimators.classes_[1]` is considered as the + positive class. .. versionadded:: 1.1 @@ -1106,7 +1106,7 @@ def plot(self, *, ax=None, name=None, ref_line=True, **kwargs): name = self.estimator_name if name is None else name info_pos_label = ( - f" (Positive class: {self.pos_label})" if self.pos_label is not None else "" + f"(Positive class: {self.pos_label})" if self.pos_label is not None else "" ) line_kwargs = {} @@ -1123,8 +1123,8 @@ def plot(self, *, ax=None, name=None, ref_line=True, **kwargs): if "label" in line_kwargs: ax.legend(loc="lower right") - xlabel = "Mean predicted probability" + info_pos_label - ylabel = "Fraction of positives" + info_pos_label + xlabel = f"Mean predicted probability {info_pos_label}" + ylabel = f"Fraction of positives {info_pos_label}" ax.set(xlabel=xlabel, ylabel=ylabel) self.ax_ = ax @@ -1187,9 +1187,9 @@ def from_estimator( on predicted probabilities. pos_label : str or int, default=None - The class considered as the positive class when computing the - calibration curve. By default, `estimators.classes_[1]` is - considered as the positive class. + The positive class when computing the calibration curve. + By default, `estimators.classes_[1]` is considered as the + positive class. .. versionadded:: 1.1 @@ -1307,9 +1307,9 @@ def from_predictions( on predicted probabilities. pos_label : str or int, default=None - The class considered as the positive class when computing the - calibration curve. By default, `estimators.classes_[1]` is - considered as the positive class. + The positive class when computing the calibration curve. + By default, `estimators.classes_[1]` is considered as the + positive class. .. versionadded:: 1.1 From c1cebfe3f36462f6a73ea7972b0669305ab2dabd Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 27 Oct 2021 11:41:13 +0200 Subject: [PATCH 16/16] TST add multiple cases for pos_label --- sklearn/tests/test_calibration.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index ae1ef0f6e76c5..b6b5c482b1eb5 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -823,16 +823,17 @@ def test_calibration_curve_pos_label(dtype_y_str): assert_allclose(prob_true, [0, 0, 0.5, 1]) -def test_calibration_display_pos_label(pyplot, iris_data_binary): +@pytest.mark.parametrize("pos_label, expected_pos_label", [(None, 1), (0, 0), (1, 1)]) +def test_calibration_display_pos_label( + pyplot, iris_data_binary, pos_label, expected_pos_label +): """Check the behaviour of `pos_label` in the `CalibrationDisplay`.""" X, y = iris_data_binary lr = LogisticRegression().fit(X, y) - - pos_label = 0 viz = CalibrationDisplay.from_estimator(lr, X, y, pos_label=pos_label) - y_prob = lr.predict_proba(X)[:, pos_label] + y_prob = lr.predict_proba(X)[:, expected_pos_label] prob_true, prob_pred = calibration_curve(y, y_prob, pos_label=pos_label) assert_allclose(viz.prob_true, prob_true) @@ -841,10 +842,11 @@ def test_calibration_display_pos_label(pyplot, iris_data_binary): assert ( viz.ax_.get_xlabel() - == f"Mean predicted probability (Positive class: {pos_label})" + == f"Mean predicted probability (Positive class: {expected_pos_label})" ) assert ( - viz.ax_.get_ylabel() == f"Fraction of positives (Positive class: {pos_label})" + viz.ax_.get_ylabel() + == f"Fraction of positives (Positive class: {expected_pos_label})" ) assert viz.line_.get_label() == "LogisticRegression"