diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index b8a1a8e5e22b4..f042acc7c87d9 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -2396,7 +2396,8 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None): pos_label : int or str, default=None Label of the positive class. Defaults to the greater label unless y_true is all 0 or all -1 - in which case pos_label defaults to 1. + in which case pos_label defaults to 1. If `y_true` contains strings + and `pos_label` is not specified, an error will be raised. Returns ------- @@ -2439,16 +2440,22 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None): if y_prob.min() < 0: raise ValueError("y_prob contains values less than 0.") - # if pos_label=None, when y_true is in {-1, 1} or {0, 1}, - # pos_label is set to 1 (consistent with precision_recall_curve/roc_curve), - # otherwise pos_label is set to the greater label - # (different from precision_recall_curve/roc_curve, - # the purpose is to keep backward compatibility). + # Default behavior when pos_label=None: + # When y_true contains strings, an error will be raised. + # (differ from other functions to keep backward compatibility) + # When y_true is in {-1, 1} or {0, 1}, pos_label is set to 1. + # (consistent with precision_recall_curve/roc_curve) + # Otherwise pos_label is set to the greater label. + # (differ from other functions to keep backward compatibility) if pos_label is None: - if (np.array_equal(labels, [0]) or + if any(isinstance(label, str) for label in labels): + raise ValueError("pos_label must be specified when y_true " + "contains strings.") + elif (np.array_equal(labels, [0]) or np.array_equal(labels, [-1])): pos_label = 1 else: - pos_label = y_true.max() + pos_label = labels.max() + y_true = np.array(y_true == pos_label, int) return np.average((y_true - y_prob) ** 2, weights=sample_weight) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 1f959d95ce844..b831cad6a07da 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -2246,10 +2246,24 @@ def test_brier_score_loss(): assert_almost_equal(brier_score_loss([-1], [0.4]), 0.16) assert_almost_equal(brier_score_loss([0], [0.4]), 0.16) assert_almost_equal(brier_score_loss([1], [0.4]), 0.36) - assert_almost_equal( - brier_score_loss(['foo'], [0.4], pos_label='bar'), 0.16) - assert_almost_equal( - brier_score_loss(['foo'], [0.4], pos_label='foo'), 0.36) + + # make sure the positive class is correctly inferred + y_true = np.array([0, 1, 1, 0]) + y_pred = np.array([0.8, 0.6, 0.4, 0.2]) + score1 = brier_score_loss(y_true, y_pred, pos_label=1) + score2 = brier_score_loss(y_true, y_pred) + assert score1 == pytest.approx(score2) + y_true = np.array(["neg", "pos", "pos", "neg"]) + # raise error when y_true contains strings and pos_label is not specified + with pytest.raises(ValueError, match="pos_label must be specified"): + brier_score_loss(y_true, y_pred) + score2 = brier_score_loss(y_true, y_pred, pos_label="pos") + assert score1 == pytest.approx(score2) + + # positive class if correctly inferred an object array with all ints + y_pred_num_obj = np.array([0, 1, 1, 0], dtype=object) + score3 = brier_score_loss(y_pred_num_obj, y_pred) + assert score1 == pytest.approx(score3) def test_balanced_accuracy_score_unseen():