scikit-learn · DanielSidhion · Dec 24, 2014 · Dec 25, 2014 · Jan 18, 2016 · Jan 19, 2016
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
@@ -17,6 +17,7 @@
 #          Noel Dawe <[email protected]>
 #          Jatin Shah <[email protected]>
 #          Saurabh Jha <[email protected]>
+#          Bernardo Stein <[email protected]>
 # License: BSD 3 clause
 
 from __future__ import division
@@ -178,7 +179,7 @@ def accuracy_score(y_true, y_pred, normalize=True, sample_weight=None):
     return _weighted_sum(score, sample_weight, normalize)
 
 
-def confusion_matrix(y_true, y_pred, labels=None):
+def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None):
     """Compute confusion matrix to evaluate the accuracy of a classification
 
     By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`
@@ -201,6 +202,8 @@ def confusion_matrix(y_true, y_pred, labels=None):
         If none is given, those that appear at least once
         in ``y_true`` or ``y_pred`` are used in sorted order.
 
+    sample_weight : array-like of shape = [n_samples], optional
+        Sample weights.
 
     Returns
     -------
@@ -239,6 +242,13 @@ def confusion_matrix(y_true, y_pred, labels=None):
     else:
         labels = np.asarray(labels)
 
+    if sample_weight is None:
+        sample_weight = np.ones(y_true.shape[0], dtype=np.int)
+    else:
+        sample_weight = np.asarray(sample_weight)
+
+    check_consistent_length(sample_weight, y_true, y_pred)
+
     n_labels = labels.size
     label_to_ind = dict((y, x) for x, y in enumerate(labels))
     # convert yt, yp into index
@@ -249,8 +259,10 @@ def confusion_matrix(y_true, y_pred, labels=None):
     ind = np.logical_and(y_pred < n_labels, y_true < n_labels)
     y_pred = y_pred[ind]
     y_true = y_true[ind]
+    # also eliminate weights of eliminated items
+    sample_weight = sample_weight[ind]
 
-    CM = coo_matrix((np.ones(y_true.shape[0], dtype=np.int), (y_true, y_pred)),
+    CM = coo_matrix((sample_weight, (y_true, y_pred)),
                     shape=(n_labels, n_labels)
                     ).toarray()
 

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
@@ -511,6 +511,21 @@ def test(y_true, y_pred, string_type=False):
          string_type=True)
 
 
+def test_confusion_matrix_sample_weight():
+    """Test confusion matrix - case with sample_weight"""
+    y_true, y_pred, _ = make_prediction(binary=False)
+
+    weights = [.1] * 25 + [.2] * 25 + [.3] * 25
+
+    cm = confusion_matrix(y_true, y_pred, sample_weight=weights)
+
+    true_cm = (.1 * confusion_matrix(y_true[:25], y_pred[:25]) +
+              .2 * confusion_matrix(y_true[25:50], y_pred[25:50]) +
+              .3 * confusion_matrix(y_true[50:], y_pred[50:]))
+
+    assert_array_almost_equal(cm, true_cm)
+
+
 def test_confusion_matrix_multiclass_subset_labels():
     # Test confusion matrix - multi-class case with subset of labels
     y_true, y_pred, _ = make_prediction(binary=False)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
@@ -360,7 +360,11 @@
 # No Sample weight support
 METRICS_WITHOUT_SAMPLE_WEIGHT = [
     "cohen_kappa_score",
-    "confusion_matrix",
+    "confusion_matrix", # Left this one here because the tests in this file do
+                        # not work for confusion_matrix, as its output is a
+                        # matrix instead of a number. Testing of
+                        # confusion_matrix with sample_weight is in
+                        # test_classification.py
     "median_absolute_error",
 ]