Thanks to visit codestin.com
Credit goes to github.com

Skip to content

MNT Improve error message with implicit pos_label in brier_score_loss #15412

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions sklearn/metrics/_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -2396,7 +2396,8 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):
pos_label : int or str, default=None
Label of the positive class.
Defaults to the greater label unless y_true is all 0 or all -1
in which case pos_label defaults to 1.
in which case pos_label defaults to 1. If `y_true` contains strings
and `pos_label` is not specified, an error will be raised.

Returns
-------
Expand Down Expand Up @@ -2439,16 +2440,22 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):
if y_prob.min() < 0:
raise ValueError("y_prob contains values less than 0.")

# if pos_label=None, when y_true is in {-1, 1} or {0, 1},
# pos_label is set to 1 (consistent with precision_recall_curve/roc_curve),
# otherwise pos_label is set to the greater label
# (different from precision_recall_curve/roc_curve,
# the purpose is to keep backward compatibility).
Comment on lines -2442 to -2446
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keep this?

# Default behavior when pos_label=None:
# When y_true contains strings, an error will be raised.
# (differ from other functions to keep backward compatibility)
# When y_true is in {-1, 1} or {0, 1}, pos_label is set to 1.
# (consistent with precision_recall_curve/roc_curve)
# Otherwise pos_label is set to the greater label.
# (differ from other functions to keep backward compatibility)
if pos_label is None:
if (np.array_equal(labels, [0]) or
if any(isinstance(label, str) for label in labels):
raise ValueError("pos_label must be specified when y_true "
"contains strings.")
elif (np.array_equal(labels, [0]) or
np.array_equal(labels, [-1])):
pos_label = 1
else:
pos_label = y_true.max()
pos_label = labels.max()

y_true = np.array(y_true == pos_label, int)
return np.average((y_true - y_prob) ** 2, weights=sample_weight)
22 changes: 18 additions & 4 deletions sklearn/metrics/tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -2246,10 +2246,24 @@ def test_brier_score_loss():
assert_almost_equal(brier_score_loss([-1], [0.4]), 0.16)
assert_almost_equal(brier_score_loss([0], [0.4]), 0.16)
assert_almost_equal(brier_score_loss([1], [0.4]), 0.36)
assert_almost_equal(
brier_score_loss(['foo'], [0.4], pos_label='bar'), 0.16)
assert_almost_equal(
brier_score_loss(['foo'], [0.4], pos_label='foo'), 0.36)

# make sure the positive class is correctly inferred
y_true = np.array([0, 1, 1, 0])
y_pred = np.array([0.8, 0.6, 0.4, 0.2])
score1 = brier_score_loss(y_true, y_pred, pos_label=1)
score2 = brier_score_loss(y_true, y_pred)
assert score1 == pytest.approx(score2)
y_true = np.array(["neg", "pos", "pos", "neg"])
# raise error when y_true contains strings and pos_label is not specified
with pytest.raises(ValueError, match="pos_label must be specified"):
brier_score_loss(y_true, y_pred)
score2 = brier_score_loss(y_true, y_pred, pos_label="pos")
assert score1 == pytest.approx(score2)

# positive class if correctly inferred an object array with all ints
y_pred_num_obj = np.array([0, 1, 1, 0], dtype=object)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test case was added which was enabled by #15412 (comment) and with this fb199bd diff

score3 = brier_score_loss(y_pred_num_obj, y_pred)
assert score1 == pytest.approx(score3)


def test_balanced_accuracy_score_unseen():
Expand Down