diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 7141080afbc06..946c31146c1a1 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -45,6 +45,10 @@ Changelog `pos_label` to specify the positive class label. :pr:`21032` by :user:`Guillaume Lemaitre `. +- |Enhancement| :class:`CalibrationDisplay` accepts a parameter `pos_label` to + add this information to the plot. + :pr:`21038` by :user:`Guillaume Lemaitre `. + :mod:`sklearn.cross_decomposition` .................................. diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 6d9abf82d3470..8bc1b9842de6c 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -1015,6 +1015,13 @@ class CalibrationDisplay: estimator_name : str, default=None Name of estimator. If None, the estimator name is not shown. + pos_label : str or int, default=None + The positive class when computing the calibration curve. + By default, `estimators.classes_[1]` is considered as the + positive class. + + .. versionadded:: 1.1 + Attributes ---------- line_ : matplotlib Artist @@ -1054,11 +1061,14 @@ class CalibrationDisplay: <...> """ - def __init__(self, prob_true, prob_pred, y_prob, *, estimator_name=None): + def __init__( + self, prob_true, prob_pred, y_prob, *, estimator_name=None, pos_label=None + ): self.prob_true = prob_true self.prob_pred = prob_pred self.y_prob = y_prob self.estimator_name = estimator_name + self.pos_label = pos_label def plot(self, *, ax=None, name=None, ref_line=True, **kwargs): """Plot visualization. @@ -1095,6 +1105,9 @@ def plot(self, *, ax=None, name=None, ref_line=True, **kwargs): fig, ax = plt.subplots() name = self.estimator_name if name is None else name + info_pos_label = ( + f"(Positive class: {self.pos_label})" if self.pos_label is not None else "" + ) line_kwargs = {} if name is not None: @@ -1110,7 +1123,9 @@ def plot(self, *, ax=None, name=None, ref_line=True, **kwargs): if "label" in line_kwargs: ax.legend(loc="lower right") - ax.set(xlabel="Mean predicted probability", ylabel="Fraction of positives") + xlabel = f"Mean predicted probability {info_pos_label}" + ylabel = f"Fraction of positives {info_pos_label}" + ax.set(xlabel=xlabel, ylabel=ylabel) self.ax_ = ax self.figure_ = ax.figure @@ -1125,6 +1140,7 @@ def from_estimator( *, n_bins=5, strategy="uniform", + pos_label=None, name=None, ref_line=True, ax=None, @@ -1170,6 +1186,13 @@ def from_estimator( - `'quantile'`: The bins have the same number of samples and depend on predicted probabilities. + pos_label : str or int, default=None + The positive class when computing the calibration curve. + By default, `estimators.classes_[1]` is considered as the + positive class. + + .. versionadded:: 1.1 + name : str, default=None Name for labeling curve. If `None`, the name of the estimator is used. @@ -1217,10 +1240,8 @@ def from_estimator( if not is_classifier(estimator): raise ValueError("'estimator' should be a fitted classifier.") - # FIXME: `pos_label` should not be set to None - # We should allow any int or string in `calibration_curve`. - y_prob, _ = _get_response( - X, estimator, response_method="predict_proba", pos_label=None + y_prob, pos_label = _get_response( + X, estimator, response_method="predict_proba", pos_label=pos_label ) name = name if name is not None else estimator.__class__.__name__ @@ -1229,6 +1250,7 @@ def from_estimator( y_prob, n_bins=n_bins, strategy=strategy, + pos_label=pos_label, name=name, ref_line=ref_line, ax=ax, @@ -1243,6 +1265,7 @@ def from_predictions( *, n_bins=5, strategy="uniform", + pos_label=None, name=None, ref_line=True, ax=None, @@ -1283,6 +1306,13 @@ def from_predictions( - `'quantile'`: The bins have the same number of samples and depend on predicted probabilities. + pos_label : str or int, default=None + The positive class when computing the calibration curve. + By default, `estimators.classes_[1]` is considered as the + positive class. + + .. versionadded:: 1.1 + name : str, default=None Name for labeling curve. @@ -1328,11 +1358,16 @@ def from_predictions( check_matplotlib_support(method_name) prob_true, prob_pred = calibration_curve( - y_true, y_prob, n_bins=n_bins, strategy=strategy + y_true, y_prob, n_bins=n_bins, strategy=strategy, pos_label=pos_label ) - name = name if name is not None else "Classifier" + name = "Classifier" if name is None else name + pos_label = _check_pos_label_consistency(pos_label, y_true) disp = cls( - prob_true=prob_true, prob_pred=prob_pred, y_prob=y_prob, estimator_name=name + prob_true=prob_true, + prob_pred=prob_pred, + y_prob=y_prob, + estimator_name=name, + pos_label=pos_label, ) return disp.plot(ax=ax, ref_line=ref_line, **kwargs) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 4ad983d72e007..b6b5c482b1eb5 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -703,8 +703,8 @@ def test_calibration_display_compute(pyplot, iris_data_binary, n_bins, strategy) assert isinstance(viz.ax_, mpl.axes.Axes) assert isinstance(viz.figure_, mpl.figure.Figure) - assert viz.ax_.get_xlabel() == "Mean predicted probability" - assert viz.ax_.get_ylabel() == "Fraction of positives" + assert viz.ax_.get_xlabel() == "Mean predicted probability (Positive class: 1)" + assert viz.ax_.get_ylabel() == "Fraction of positives (Positive class: 1)" assert viz.line_.get_label() == "LogisticRegression" @@ -823,6 +823,34 @@ def test_calibration_curve_pos_label(dtype_y_str): assert_allclose(prob_true, [0, 0, 0.5, 1]) +@pytest.mark.parametrize("pos_label, expected_pos_label", [(None, 1), (0, 0), (1, 1)]) +def test_calibration_display_pos_label( + pyplot, iris_data_binary, pos_label, expected_pos_label +): + """Check the behaviour of `pos_label` in the `CalibrationDisplay`.""" + X, y = iris_data_binary + + lr = LogisticRegression().fit(X, y) + viz = CalibrationDisplay.from_estimator(lr, X, y, pos_label=pos_label) + + y_prob = lr.predict_proba(X)[:, expected_pos_label] + prob_true, prob_pred = calibration_curve(y, y_prob, pos_label=pos_label) + + assert_allclose(viz.prob_true, prob_true) + assert_allclose(viz.prob_pred, prob_pred) + assert_allclose(viz.y_prob, y_prob) + + assert ( + viz.ax_.get_xlabel() + == f"Mean predicted probability (Positive class: {expected_pos_label})" + ) + assert ( + viz.ax_.get_ylabel() + == f"Fraction of positives (Positive class: {expected_pos_label})" + ) + assert viz.line_.get_label() == "LogisticRegression" + + @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) @pytest.mark.parametrize("ensemble", [True, False]) def test_calibrated_classifier_cv_double_sample_weights_equivalence(method, ensemble):