diff --git a/doc/whats_new.rst b/doc/whats_new.rst index f04ae4cc41eca..199bbb95f2d88 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -270,6 +270,11 @@ Enhancements (`#6913 `_) By `YenChen Lin`_. + - Added ``labels`` flag to :class:`metrics.log_loss` to to explicitly provide + the labels when the number of classes in ``y_true`` and ``y_pred`` differ. + (`#7239 `_) + by `Hong Guangguo`_ with help from `Mads Jensen`_ and `Nelson Liu`_. + Bug fixes ......... @@ -4376,3 +4381,7 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson. .. _Konstantin Podshumok: https://github.com/podshumok .. _David Staub: https://github.com/staubda + +.. _Hong Guangguo: https://github.com/hongguangguo + +.. _Mads Jensen: https://github.com/indianajensen diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 2b8a4289b6a41..608d7486da0e3 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1544,13 +1544,15 @@ def hamming_loss(y_true, y_pred, classes=None, sample_weight=None): raise ValueError("{0} is not supported".format(y_type)) -def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None): +def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None, + labels=None): """Log loss, aka logistic loss or cross-entropy loss. This is the loss function used in (multinomial) logistic regression and extensions of it such as neural networks, defined as the negative log-likelihood of the true labels given a probabilistic classifier's - predictions. For a single sample with true label yt in {0,1} and + predictions. The log loss is only defined for two or more labels. + For a single sample with true label yt in {0,1} and estimated probability yp that yt = 1, the log loss is -log P(yt|yp) = -(yt log(yp) + (1 - yt) log(1 - yp)) @@ -1562,9 +1564,13 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None): y_true : array-like or label indicator matrix Ground truth (correct) labels for n_samples samples. - y_pred : array-like of float, shape = (n_samples, n_classes) + y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,) Predicted probabilities, as returned by a classifier's - predict_proba method. + predict_proba method. If ``y_pred.shape = (n_samples,)`` + the probabilities provided are assumed to be that of the + positive class. The labels in ``y_pred`` are assumed to be + ordered alphabetically, as done by + :class:`preprocessing.LabelBinarizer`. eps : float Log loss is undefined for p=0 or p=1, so probabilities are @@ -1577,6 +1583,12 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None): sample_weight : array-like of shape = [n_samples], optional Sample weights. + labels : array-like, optional (default=None) + If not provided, labels will be inferred from y_true. If ``labels`` + is ``None`` and ``y_pred`` has shape (n_samples,) the labels are + assumed to be binary and are inferred from ``y_true``. + .. versionadded:: 0.18 + Returns ------- loss : float @@ -1596,37 +1608,58 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None): ----- The logarithm used is the natural logarithm (base-e). """ + y_pred = check_array(y_pred, ensure_2d=False) + check_consistent_length(y_pred, y_true) + lb = LabelBinarizer() - T = lb.fit_transform(y_true) - if T.shape[1] == 1: - T = np.append(1 - T, T, axis=1) - y_pred = check_array(y_pred, ensure_2d=False) - # Clipping - Y = np.clip(y_pred, eps, 1 - eps) + if labels is not None: + lb.fit(labels) + else: + lb.fit(y_true) + + if len(lb.classes_) == 1: + if labels is None: + raise ValueError('y_true contains only one label ({0}). Please provide ' + 'the true labels explicitly through the labels ' + 'argument.'.format(lb.classes_[0])) + else: + raise ValueError('The labels array needs to contain at least two labels' + 'for log_loss, got {0}.'.format(lb.classes_)) - # This happens in cases when elements in y_pred have type "str". - if not isinstance(Y, np.ndarray): - raise ValueError("y_pred should be an array of floats.") + transformed_labels = lb.transform(y_true) + + if transformed_labels.shape[1] == 1: + transformed_labels = np.append(1 - transformed_labels, + transformed_labels, axis=1) + + # Clipping + y_pred = np.clip(y_pred, eps, 1 - eps) # If y_pred is of single dimension, assume y_true to be binary # and then check. - if Y.ndim == 1: - Y = Y[:, np.newaxis] - if Y.shape[1] == 1: - Y = np.append(1 - Y, Y, axis=1) + if y_pred.ndim == 1: + y_pred = y_pred[:, np.newaxis] + if y_pred.shape[1] == 1: + y_pred = np.append(1 - y_pred, y_pred, axis=1) # Check if dimensions are consistent. - check_consistent_length(T, Y) - T = check_array(T) - Y = check_array(Y) - if T.shape[1] != Y.shape[1]: - raise ValueError("y_true and y_pred have different number of classes " - "%d, %d" % (T.shape[1], Y.shape[1])) + transformed_labels = check_array(transformed_labels) + if len(lb.classes_) != y_pred.shape[1]: + if labels is None: + raise ValueError("y_true and y_pred contain different number of classes " + "{0}, {1}. Please provide the true labels explicitly " + "through the labels argument. Classes found in" + "y_true: {2}".format(transformed_labels.shape[1], + y_pred.shape[1], lb.classes_)) + else: + raise ValueError('The number of classes in labels is different ' + 'from that in y_pred. Classes found in ' + 'labels: {0}'.format(lb.classes_)) # Renormalize - Y /= Y.sum(axis=1)[:, np.newaxis] - loss = -(T * np.log(Y)).sum(axis=1) + y_pred /= y_pred.sum(axis=1)[:, np.newaxis] + loss = -(transformed_labels * np.log(y_pred)).sum(axis=1) return _weighted_sum(loss, sample_weight, normalize) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index e578964b600f2..c8b06b9413e5e 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -45,7 +45,6 @@ from sklearn.metrics import zero_one_loss from sklearn.metrics import brier_score_loss - from sklearn.metrics.classification import _check_targets from sklearn.exceptions import UndefinedMetricWarning @@ -1384,6 +1383,32 @@ def test_log_loss(): loss = log_loss(y_true, y_pred) assert_almost_equal(loss, 1.0383217, decimal=6) + # test labels option + + y_true = [2, 2] + y_pred = [[0.2, 0.7], [0.6, 0.5]] + y_score = np.array([[0.1, 0.9], [0.1, 0.9]]) + error_str = ('y_true contains only one label (2). Please provide ' + 'the true labels explicitly through the labels argument.') + assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred) + + y_pred = [[0.2, 0.7], [0.6, 0.5], [0.2, 0.3]] + error_str = ('Found arrays with inconsistent numbers of ' + 'samples: [2 3]') + assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred) + + # works when the labels argument is used + + true_log_loss = -np.mean(np.log(y_score[:, 1])) + calculated_log_loss = log_loss(y_true, y_score, labels=[1, 2]) + assert_almost_equal(calculated_log_loss, true_log_loss) + + # ensure labels work when len(np.unique(y_true)) != y_pred.shape[1] + y_true = [1, 2, 2] + y_score2 = [[0.2, 0.7, 0.3], [0.6, 0.5, 0.3], [0.3, 0.9, 0.1]] + loss = log_loss(y_true, y_score2, labels=[1, 2, 3]) + assert_almost_equal(loss, 1.0630345, decimal=6) + def test_log_loss_pandas_input(): # case when input is a pandas series and dataframe gh-5715