-
-
Notifications
You must be signed in to change notification settings - Fork 26.5k
[MRG+2] Fix log loss bug #7239
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MRG+2] Fix log loss bug #7239
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1544,13 +1544,15 @@ def hamming_loss(y_true, y_pred, classes=None, sample_weight=None): | |
| raise ValueError("{0} is not supported".format(y_type)) | ||
|
|
||
|
|
||
| def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None): | ||
| def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None, | ||
| labels=None): | ||
| """Log loss, aka logistic loss or cross-entropy loss. | ||
|
|
||
| This is the loss function used in (multinomial) logistic regression | ||
| and extensions of it such as neural networks, defined as the negative | ||
| log-likelihood of the true labels given a probabilistic classifier's | ||
| predictions. For a single sample with true label yt in {0,1} and | ||
| predictions. The log loss is only defined for two or more labels. | ||
| For a single sample with true label yt in {0,1} and | ||
| estimated probability yp that yt = 1, the log loss is | ||
|
|
||
| -log P(yt|yp) = -(yt log(yp) + (1 - yt) log(1 - yp)) | ||
|
|
@@ -1562,9 +1564,13 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None): | |
| y_true : array-like or label indicator matrix | ||
| Ground truth (correct) labels for n_samples samples. | ||
|
|
||
| y_pred : array-like of float, shape = (n_samples, n_classes) | ||
| y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,) | ||
| Predicted probabilities, as returned by a classifier's | ||
| predict_proba method. | ||
| predict_proba method. If ``y_pred.shape = (n_samples,)`` | ||
| the probabilities provided are assumed to be that of the | ||
| positive class. The labels in ``y_pred`` are assumed to be | ||
| ordered alphabetically, as done by | ||
| :class:`preprocessing.LabelBinarizer`. | ||
|
|
||
| eps : float | ||
| Log loss is undefined for p=0 or p=1, so probabilities are | ||
|
|
@@ -1577,6 +1583,12 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None): | |
| sample_weight : array-like of shape = [n_samples], optional | ||
| Sample weights. | ||
|
|
||
| labels : array-like, optional (default=None) | ||
| If not provided, labels will be inferred from y_true. If ``labels`` | ||
| is ``None`` and ``y_pred`` has shape (n_samples,) the labels are | ||
| assumed to be binary and are inferred from ``y_true``. | ||
| .. versionadded:: 0.18 | ||
|
|
||
| Returns | ||
| ------- | ||
| loss : float | ||
|
|
@@ -1596,37 +1608,58 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None): | |
| ----- | ||
| The logarithm used is the natural logarithm (base-e). | ||
| """ | ||
| y_pred = check_array(y_pred, ensure_2d=False) | ||
| check_consistent_length(y_pred, y_true) | ||
|
|
||
| lb = LabelBinarizer() | ||
| T = lb.fit_transform(y_true) | ||
| if T.shape[1] == 1: | ||
| T = np.append(1 - T, T, axis=1) | ||
|
|
||
| y_pred = check_array(y_pred, ensure_2d=False) | ||
| # Clipping | ||
| Y = np.clip(y_pred, eps, 1 - eps) | ||
| if labels is not None: | ||
| lb.fit(labels) | ||
| else: | ||
| lb.fit(y_true) | ||
|
|
||
| if len(lb.classes_) == 1: | ||
| if labels is None: | ||
| raise ValueError('y_true contains only one label ({0}). Please provide ' | ||
| 'the true labels explicitly through the labels ' | ||
| 'argument.'.format(lb.classes_[0])) | ||
| else: | ||
| raise ValueError('The labels array needs to contain at least two labels' | ||
| 'for log_loss, got {0}.'.format(lb.classes_)) | ||
|
|
||
| # This happens in cases when elements in y_pred have type "str". | ||
| if not isinstance(Y, np.ndarray): | ||
| raise ValueError("y_pred should be an array of floats.") | ||
| transformed_labels = lb.transform(y_true) | ||
|
|
||
| if transformed_labels.shape[1] == 1: | ||
| transformed_labels = np.append(1 - transformed_labels, | ||
| transformed_labels, axis=1) | ||
|
|
||
| # Clipping | ||
| y_pred = np.clip(y_pred, eps, 1 - eps) | ||
|
|
||
| # If y_pred is of single dimension, assume y_true to be binary | ||
| # and then check. | ||
| if Y.ndim == 1: | ||
| Y = Y[:, np.newaxis] | ||
| if Y.shape[1] == 1: | ||
| Y = np.append(1 - Y, Y, axis=1) | ||
| if y_pred.ndim == 1: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So it might be a good time to add to the documentation that we can handle y_pred = (n_samples,) and that the score inferred is that of the positive class. |
||
| y_pred = y_pred[:, np.newaxis] | ||
| if y_pred.shape[1] == 1: | ||
| y_pred = np.append(1 - y_pred, y_pred, axis=1) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why was the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it was moved to the very top of the function |
||
|
|
||
| # Check if dimensions are consistent. | ||
| check_consistent_length(T, Y) | ||
| T = check_array(T) | ||
| Y = check_array(Y) | ||
| if T.shape[1] != Y.shape[1]: | ||
| raise ValueError("y_true and y_pred have different number of classes " | ||
| "%d, %d" % (T.shape[1], Y.shape[1])) | ||
| transformed_labels = check_array(transformed_labels) | ||
| if len(lb.classes_) != y_pred.shape[1]: | ||
| if labels is None: | ||
| raise ValueError("y_true and y_pred contain different number of classes " | ||
| "{0}, {1}. Please provide the true labels explicitly " | ||
| "through the labels argument. Classes found in" | ||
| "y_true: {2}".format(transformed_labels.shape[1], | ||
| y_pred.shape[1], lb.classes_)) | ||
| else: | ||
| raise ValueError('The number of classes in labels is different ' | ||
| 'from that in y_pred. Classes found in ' | ||
| 'labels: {0}'.format(lb.classes_)) | ||
|
|
||
| # Renormalize | ||
| Y /= Y.sum(axis=1)[:, np.newaxis] | ||
| loss = -(T * np.log(Y)).sum(axis=1) | ||
| y_pred /= y_pred.sum(axis=1)[:, np.newaxis] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Umm, is the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no i think it is necessary...
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tests also blow up if i remove it, which seems to support the fact that it's necessary.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ooh yeah, ofc sorry.. |
||
| loss = -(transformed_labels * np.log(y_pred)).sum(axis=1) | ||
|
|
||
| return _weighted_sum(loss, sample_weight, normalize) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,7 +45,6 @@ | |
| from sklearn.metrics import zero_one_loss | ||
| from sklearn.metrics import brier_score_loss | ||
|
|
||
|
|
||
| from sklearn.metrics.classification import _check_targets | ||
| from sklearn.exceptions import UndefinedMetricWarning | ||
|
|
||
|
|
@@ -1384,6 +1383,32 @@ def test_log_loss(): | |
| loss = log_loss(y_true, y_pred) | ||
| assert_almost_equal(loss, 1.0383217, decimal=6) | ||
|
|
||
| # test labels option | ||
|
|
||
| y_true = [2, 2] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add this under a separate test?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm i think it's fine as it is?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you want it as a separate test, then i'd be fine with moving it. no strong opinions either way here. |
||
| y_pred = [[0.2, 0.7], [0.6, 0.5]] | ||
| y_score = np.array([[0.1, 0.9], [0.1, 0.9]]) | ||
| error_str = ('y_true contains only one label (2). Please provide ' | ||
| 'the true labels explicitly through the labels argument.') | ||
| assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred) | ||
|
|
||
| y_pred = [[0.2, 0.7], [0.6, 0.5], [0.2, 0.3]] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. y_score2?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm i don't see why this should be renamed to y_score2? could you explain a bit more. sorry if im being slow / missing something obvious
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just cospmetic. But in the test it's called y_score, right? I guess you switch between y_score and y_pred for the same thing multiple times.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah, that makes sense. I didn't write most of these tests, so i just followed the example of the nearest neighbors 😄 . ill change it. |
||
| error_str = ('Found arrays with inconsistent numbers of ' | ||
| 'samples: [2 3]') | ||
| assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't we test this in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should i remove the test?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did not have a detailed look. If we already do check, then yes.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i couldn't find it, but i'd be fine with removing if someone can point me to it
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you checked the coverage? |
||
|
|
||
| # works when the labels argument is used | ||
|
|
||
| true_log_loss = -np.mean(np.log(y_score[:, 1])) | ||
| calculated_log_loss = log_loss(y_true, y_score, labels=[1, 2]) | ||
| assert_almost_equal(calculated_log_loss, true_log_loss) | ||
|
|
||
| # ensure labels work when len(np.unique(y_true)) != y_pred.shape[1] | ||
| y_true = [1, 2, 2] | ||
| y_score2 = [[0.2, 0.7, 0.3], [0.6, 0.5, 0.3], [0.3, 0.9, 0.1]] | ||
| loss = log_loss(y_true, y_score2, labels=[1, 2, 3]) | ||
| assert_almost_equal(loss, 1.0630345, decimal=6) | ||
|
|
||
|
|
||
| def test_log_loss_pandas_input(): | ||
| # case when input is a pandas series and dataframe gh-5715 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the docstring should document the assumptions. I'm not sure we do this in other places but it would be good practice. If labels is None and y_pred is 1-d, what happens? This is sort of no-obvious. Also, the labels in y_pred are assumed to be ordered alphabetically, as done by
LabelBinarizer, right?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe add a sentence to the main part of the docstring "Log-loss is only defined for two or more labels."?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good idea, will do