From 6028e421126e63990444a0aaa1f6a49b274f2747 Mon Sep 17 00:00:00 2001
From: ghg <ghghaut@gmail.com>
Date: Tue, 26 Apr 2016 00:49:06 +0800
Subject: [PATCH 1/3] fixed log_loss bug

enhance log_loss labels option feature

log_loss

changed test log_loss case

u

add ValueError in log_loss
---
 sklearn/metrics/classification.py            | 17 +++++++++---
 sklearn/metrics/tests/test_classification.py | 28 +++++++++++++++++++-
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 2b8a4289b6a41..41315a1efb7d3 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -1544,7 +1544,8 @@ def hamming_loss(y_true, y_pred, classes=None, sample_weight=None):
         raise ValueError("{0} is not supported".format(y_type))
 
 
-def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
+def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
+             sample_weight=None):
     """Log loss, aka logistic loss or cross-entropy loss.
 
     This is the loss function used in (multinomial) logistic regression
@@ -1566,6 +1567,10 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
         Predicted probabilities, as returned by a classifier's
         predict_proba method.
 
+
+    labels : array-like, optional (default=None)
+        If not provided, labels will be inferred from y_true
+
     eps : float
         Log loss is undefined for p=0 or p=1, so probabilities are
         clipped to max(eps, min(1 - eps, p)).
@@ -1597,11 +1602,17 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
     The logarithm used is the natural logarithm (base-e).
     """
     lb = LabelBinarizer()
-    T = lb.fit_transform(y_true)
+    lb.fit(labels) if labels is not None else lb.fit(y_true)
+    if labels is None and len(lb.classes_) == 1:
+        raise ValueError('y_true has only one label,'
+        'maybe get error log loss, should use labels option')
+
+    T = lb.transform(y_true)
+
     if T.shape[1] == 1:
         T = np.append(1 - T, T, axis=1)
-
     y_pred = check_array(y_pred, ensure_2d=False)
+
     # Clipping
     Y = np.clip(y_pred, eps, 1 - eps)
 
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index e578964b600f2..f28e16e74cc9a 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -45,7 +45,6 @@
 from sklearn.metrics import zero_one_loss
 from sklearn.metrics import brier_score_loss
 
-
 from sklearn.metrics.classification import _check_targets
 from sklearn.exceptions import UndefinedMetricWarning
 
@@ -1384,6 +1383,33 @@ def test_log_loss():
     loss = log_loss(y_true, y_pred)
     assert_almost_equal(loss, 1.0383217, decimal=6)
 
+    #test labels option
+
+    X = [[1,1], [1,1], [2,2], [2,2]]
+    y_label = [1,1,2,2]
+
+    X_test = [[2,2], [2,2]]
+    y_true = [2,2]
+    y_score = np.array([[0.1,0.9], [0.1, 0.9]])
+    
+    # because y_true label are the same, if not use labels option, will get error
+    #error_logloss = log_loss(y_true, y_score)
+    #label_not_of_2_loss = -np.mean(np.log(y_score[:,0]))
+    #assert_almost_equal(error_logloss, label_not_of_2_loss)
+    #assert_raises(log_loss(y_true, y_score))
+
+    error_str  = ('y_true has only one label,'
+        'maybe get error log loss, should use labels option')
+
+    assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred)
+
+    # use labels, it works
+    ture_log_loss = -np.mean(np.log(y_score[:, 1]))
+    calculated_log_loss = log_loss(y_true, y_score, labels=[1, 2])
+    assert_almost_equal(calculated_log_loss, ture_log_loss)
+
+    
+
 
 def test_log_loss_pandas_input():
     # case when input is a pandas series and dataframe gh-5715

From 79406f4f07770c10e8384eec2bdcaf2bab2e62a8 Mon Sep 17 00:00:00 2001
From: Mads Jensen <jensen.mn@gmail.com>
Date: Tue, 9 Aug 2016 18:06:10 +0100
Subject: [PATCH 2/3] fixed error message when y_pred and y_test labels don't
 match

fixes as per existing pull request #6714

fixed log_loss bug

enhance log_loss labels option feature

log_loss

changed test log_loss case

u

add ValueError in log_loss

fixes as per existing pull request #6714

fixed error message when y_pred and y_test labels don't match

fixed error message when y_pred and y_test labels don't match

corrected doc/whats_new.rst for syntax and with correct formatting of credits

additional formatting fixes for doc/whats_new.rst

fixed versionadded comment

removed superfluous line

removed superflous line
---
 doc/whats_new.rst                            |  9 ++++++
 sklearn/metrics/classification.py            | 20 ++++++------
 sklearn/metrics/tests/test_classification.py | 34 +++++++++-----------
 3 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index f04ae4cc41eca..21a0211540a8e 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -270,6 +270,11 @@ Enhancements
      (`#6913 <https://github.com/scikit-learn/scikit-learn/pull/6913>`_)
      By `YenChen Lin`_.
 
+   - Added `labels` flag to :class:`metrics.log_loss` to correct metric`s when
+     only one class is present in test data set
+     `#7166 <https://github.com/scikit-learn/scikit-learn/pull/7166/>`_ 
+     by `Hong Guangguo`_ with support of `Mads Jensen`_.
+
 Bug fixes
 .........
 
@@ -4376,3 +4381,7 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 .. _Konstantin Podshumok: https://github.com/podshumok
 
 .. _David Staub: https://github.com/staubda
+
+.. _Hong Guangguo: https://github.com/hongguangguo
+
+.. _Mads Jensen: https://github.com/indianajensen
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 41315a1efb7d3..e8d530cfb5b10 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -1544,8 +1544,8 @@ def hamming_loss(y_true, y_pred, classes=None, sample_weight=None):
         raise ValueError("{0} is not supported".format(y_type))
 
 
-def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
-             sample_weight=None):
+def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
+             labels=None):
     """Log loss, aka logistic loss or cross-entropy loss.
 
     This is the loss function used in (multinomial) logistic regression
@@ -1567,10 +1567,6 @@ def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
         Predicted probabilities, as returned by a classifier's
         predict_proba method.
 
-
-    labels : array-like, optional (default=None)
-        If not provided, labels will be inferred from y_true
-
     eps : float
         Log loss is undefined for p=0 or p=1, so probabilities are
         clipped to max(eps, min(1 - eps, p)).
@@ -1582,6 +1578,10 @@ def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
+    labels : array-like, optional (default=None) 
+        If not provided, labels will be inferred from y_true
+        .. versionadded:: 0.18
+        
     Returns
     -------
     loss : float
@@ -1604,8 +1604,8 @@ def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
     lb = LabelBinarizer()
     lb.fit(labels) if labels is not None else lb.fit(y_true)
     if labels is None and len(lb.classes_) == 1:
-        raise ValueError('y_true has only one label,'
-        'maybe get error log loss, should use labels option')
+        raise ValueError('y_true has only one label. Please provide '
+        'the true labels explicitly through the labels argument.')
 
     T = lb.transform(y_true)
 
@@ -1633,7 +1633,9 @@ def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
     Y = check_array(Y)
     if T.shape[1] != Y.shape[1]:
         raise ValueError("y_true and y_pred have different number of classes "
-                         "%d, %d" % (T.shape[1], Y.shape[1]))
+                         "%d, %d.\nPlease provide the true labels explicitly "
+                         "through the labels argument" %
+                         (T.shape[1], Y.shape[1]))
 
     # Renormalize
     Y /= Y.sum(axis=1)[:, np.newaxis]
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index f28e16e74cc9a..9ac6eb8d0a297 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -1383,32 +1383,28 @@ def test_log_loss():
     loss = log_loss(y_true, y_pred)
     assert_almost_equal(loss, 1.0383217, decimal=6)
 
-    #test labels option
+    # test labels option
 
-    X = [[1,1], [1,1], [2,2], [2,2]]
-    y_label = [1,1,2,2]
+    y_true = [2, 2]
+    y_score = np.array([[0.1, 0.9], [0.1, 0.9]])
 
-    X_test = [[2,2], [2,2]]
-    y_true = [2,2]
-    y_score = np.array([[0.1,0.9], [0.1, 0.9]])
-    
-    # because y_true label are the same, if not use labels option, will get error
-    #error_logloss = log_loss(y_true, y_score)
-    #label_not_of_2_loss = -np.mean(np.log(y_score[:,0]))
-    #assert_almost_equal(error_logloss, label_not_of_2_loss)
-    #assert_raises(log_loss(y_true, y_score))
+    # because y_true label are the same, there should be an error if the
+    # labels option has not been used
 
-    error_str  = ('y_true has only one label,'
-        'maybe get error log loss, should use labels option')
+    # error_logloss = log_loss(y_true, y_score)
+    # label_not_of_2_loss = -np.mean(np.log(y_score[:,0]))
+    # assert_almost_equal(error_logloss, label_not_of_2_loss)
+    # assert_raises(log_loss(y_true, y_score))
+
+    error_str = ('y_true has only one label. Please provide '
+                 'the true labels explicitly through the labels argument.')
 
     assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred)
 
-    # use labels, it works
-    ture_log_loss = -np.mean(np.log(y_score[:, 1]))
+    # when the labels argument is used, it works
+    true_log_loss = -np.mean(np.log(y_score[:, 1]))
     calculated_log_loss = log_loss(y_true, y_score, labels=[1, 2])
-    assert_almost_equal(calculated_log_loss, ture_log_loss)
-
-    
+    assert_almost_equal(calculated_log_loss, true_log_loss)
 
 
 def test_log_loss_pandas_input():

From d97a25fc3b09f5a6e3101d410b299e1351d0c0b0 Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Wed, 24 Aug 2016 13:34:45 -0700
Subject: [PATCH 3/3] Wrap up changes to fix log_loss bug and clean up log_loss

fix a typo in whatsnew

refactor conditional and move dtype check before np.clip

general cleanup of log_loss

remove dtype checks

edit non-regression test and wordings

fix non-regression test

misc doc fixes / clarifications + final touches

fix naming of y_score2 variable

specify log loss is only valid for 2 labels or more
---
 doc/whats_new.rst                            |  8 +-
 sklearn/metrics/classification.py            | 86 ++++++++++++--------
 sklearn/metrics/tests/test_classification.py | 25 +++---
 3 files changed, 71 insertions(+), 48 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 21a0211540a8e..199bbb95f2d88 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -270,10 +270,10 @@ Enhancements
      (`#6913 <https://github.com/scikit-learn/scikit-learn/pull/6913>`_)
      By `YenChen Lin`_.
 
-   - Added `labels` flag to :class:`metrics.log_loss` to correct metric`s when
-     only one class is present in test data set
-     `#7166 <https://github.com/scikit-learn/scikit-learn/pull/7166/>`_ 
-     by `Hong Guangguo`_ with support of `Mads Jensen`_.
+   - Added ``labels`` flag to :class:`metrics.log_loss` to to explicitly provide
+     the labels when the number of classes in ``y_true`` and ``y_pred`` differ.
+     (`#7239 <https://github.com/scikit-learn/scikit-learn/pull/7239/>`_)
+     by `Hong Guangguo`_ with help from `Mads Jensen`_ and `Nelson Liu`_.
 
 Bug fixes
 .........
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index e8d530cfb5b10..608d7486da0e3 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -1551,7 +1551,8 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
     This is the loss function used in (multinomial) logistic regression
     and extensions of it such as neural networks, defined as the negative
     log-likelihood of the true labels given a probabilistic classifier's
-    predictions. For a single sample with true label yt in {0,1} and
+    predictions. The log loss is only defined for two or more labels.
+    For a single sample with true label yt in {0,1} and
     estimated probability yp that yt = 1, the log loss is
 
         -log P(yt|yp) = -(yt log(yp) + (1 - yt) log(1 - yp))
@@ -1563,9 +1564,13 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
     y_true : array-like or label indicator matrix
         Ground truth (correct) labels for n_samples samples.
 
-    y_pred : array-like of float, shape = (n_samples, n_classes)
+    y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
         Predicted probabilities, as returned by a classifier's
-        predict_proba method.
+        predict_proba method. If ``y_pred.shape = (n_samples,)``
+        the probabilities provided are assumed to be that of the
+        positive class. The labels in ``y_pred`` are assumed to be
+        ordered alphabetically, as done by
+        :class:`preprocessing.LabelBinarizer`.
 
     eps : float
         Log loss is undefined for p=0 or p=1, so probabilities are
@@ -1578,10 +1583,12 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
     sample_weight : array-like of shape = [n_samples], optional
         Sample weights.
 
-    labels : array-like, optional (default=None) 
-        If not provided, labels will be inferred from y_true
+    labels : array-like, optional (default=None)
+        If not provided, labels will be inferred from y_true. If ``labels``
+        is ``None`` and ``y_pred`` has shape (n_samples,) the labels are
+        assumed to be binary and are inferred from ``y_true``.
         .. versionadded:: 0.18
-        
+
     Returns
     -------
     loss : float
@@ -1601,45 +1608,58 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
     -----
     The logarithm used is the natural logarithm (base-e).
     """
+    y_pred = check_array(y_pred, ensure_2d=False)
+    check_consistent_length(y_pred, y_true)
+
     lb = LabelBinarizer()
-    lb.fit(labels) if labels is not None else lb.fit(y_true)
-    if labels is None and len(lb.classes_) == 1:
-        raise ValueError('y_true has only one label. Please provide '
-        'the true labels explicitly through the labels argument.')
 
-    T = lb.transform(y_true)
+    if labels is not None:
+        lb.fit(labels)
+    else:
+        lb.fit(y_true)
 
-    if T.shape[1] == 1:
-        T = np.append(1 - T, T, axis=1)
-    y_pred = check_array(y_pred, ensure_2d=False)
+    if len(lb.classes_) == 1:
+        if labels is None:
+            raise ValueError('y_true contains only one label ({0}). Please provide '
+                             'the true labels explicitly through the labels '
+                             'argument.'.format(lb.classes_[0]))
+        else:
+            raise ValueError('The labels array needs to contain at least two labels'
+                             'for log_loss, got {0}.'.format(lb.classes_))
 
-    # Clipping
-    Y = np.clip(y_pred, eps, 1 - eps)
+    transformed_labels = lb.transform(y_true)
+
+    if transformed_labels.shape[1] == 1:
+        transformed_labels = np.append(1 - transformed_labels,
+                                       transformed_labels, axis=1)
 
-    # This happens in cases when elements in y_pred have type "str".
-    if not isinstance(Y, np.ndarray):
-        raise ValueError("y_pred should be an array of floats.")
+    # Clipping
+    y_pred = np.clip(y_pred, eps, 1 - eps)
 
     # If y_pred is of single dimension, assume y_true to be binary
     # and then check.
-    if Y.ndim == 1:
-        Y = Y[:, np.newaxis]
-    if Y.shape[1] == 1:
-        Y = np.append(1 - Y, Y, axis=1)
+    if y_pred.ndim == 1:
+        y_pred = y_pred[:, np.newaxis]
+    if y_pred.shape[1] == 1:
+        y_pred = np.append(1 - y_pred, y_pred, axis=1)
 
     # Check if dimensions are consistent.
-    check_consistent_length(T, Y)
-    T = check_array(T)
-    Y = check_array(Y)
-    if T.shape[1] != Y.shape[1]:
-        raise ValueError("y_true and y_pred have different number of classes "
-                         "%d, %d.\nPlease provide the true labels explicitly "
-                         "through the labels argument" %
-                         (T.shape[1], Y.shape[1]))
+    transformed_labels = check_array(transformed_labels)
+    if len(lb.classes_) != y_pred.shape[1]:
+        if labels is None:
+            raise ValueError("y_true and y_pred contain different number of classes "
+                             "{0}, {1}. Please provide the true labels explicitly "
+                             "through the labels argument. Classes found in"
+                             "y_true: {2}".format(transformed_labels.shape[1],
+                                               y_pred.shape[1], lb.classes_))
+        else:
+            raise ValueError('The number of classes in labels is different '
+                             'from that in y_pred. Classes found in '
+                             'labels: {0}'.format(lb.classes_))
 
     # Renormalize
-    Y /= Y.sum(axis=1)[:, np.newaxis]
-    loss = -(T * np.log(Y)).sum(axis=1)
+    y_pred /= y_pred.sum(axis=1)[:, np.newaxis]
+    loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)
 
     return _weighted_sum(loss, sample_weight, normalize)
 
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 9ac6eb8d0a297..c8b06b9413e5e 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -1386,26 +1386,29 @@ def test_log_loss():
     # test labels option
 
     y_true = [2, 2]
+    y_pred = [[0.2, 0.7], [0.6, 0.5]]
     y_score = np.array([[0.1, 0.9], [0.1, 0.9]])
-
-    # because y_true label are the same, there should be an error if the
-    # labels option has not been used
-
-    # error_logloss = log_loss(y_true, y_score)
-    # label_not_of_2_loss = -np.mean(np.log(y_score[:,0]))
-    # assert_almost_equal(error_logloss, label_not_of_2_loss)
-    # assert_raises(log_loss(y_true, y_score))
-
-    error_str = ('y_true has only one label. Please provide '
+    error_str = ('y_true contains only one label (2). Please provide '
                  'the true labels explicitly through the labels argument.')
+    assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred)
 
+    y_pred = [[0.2, 0.7], [0.6, 0.5], [0.2, 0.3]]
+    error_str = ('Found arrays with inconsistent numbers of '
+                 'samples: [2 3]')
     assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred)
 
-    # when the labels argument is used, it works
+    # works when the labels argument is used
+
     true_log_loss = -np.mean(np.log(y_score[:, 1]))
     calculated_log_loss = log_loss(y_true, y_score, labels=[1, 2])
     assert_almost_equal(calculated_log_loss, true_log_loss)
 
+    # ensure labels work when len(np.unique(y_true)) != y_pred.shape[1]
+    y_true = [1, 2, 2]
+    y_score2 = [[0.2, 0.7, 0.3], [0.6, 0.5, 0.3], [0.3, 0.9, 0.1]]
+    loss = log_loss(y_true, y_score2, labels=[1, 2, 3])
+    assert_almost_equal(loss, 1.0630345, decimal=6)
+
 
 def test_log_loss_pandas_input():
     # case when input is a pandas series and dataframe gh-5715