From 64e30d6ae583649698a782dca7d40f87fa93b081 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sun, 5 Nov 2017 21:33:29 +0530 Subject: [PATCH 01/88] multiclass jaccard similarity not equal to accurary_score Fixes #7332 --- sklearn/metrics/classification.py | 11 ++++++++++- sklearn/metrics/tests/test_classification.py | 4 ++++ sklearn/svm/base.py | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 7d8b887c66624..187f3a138cc5e 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -441,6 +441,13 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True, >>> jaccard_similarity_score(np.array([[0, 1], [1, 1]]),\ np.ones((2, 2))) 0.75 + + In the multiclass case: + + >>> y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] + >>> y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] + >>> jaccard_similarity_score(y_true, y_pred) + 0.38888888888888884 """ # Compute accuracy for each possible representation @@ -454,7 +461,9 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True, score = pred_and_true / pred_or_true score[pred_or_true == 0.0] = 1.0 else: - score = y_true == y_pred + C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) + den = C.sum(0) + C.sum(1) - C.diagonal() + score = C.diagonal()/den return _weighted_sum(score, sample_weight, normalize) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index c259036807f7f..520bc99277683 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -955,6 +955,10 @@ def test_multilabel_jaccard_similarity_score(): assert_equal(jaccard_similarity_score(y1, np.zeros(y1.shape)), 0) assert_equal(jaccard_similarity_score(y2, np.zeros(y1.shape)), 0) + y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] + y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] + assert_almost_equal(jaccard_similarity_score(y1, y2), 7. / 18) + @ignore_warnings def test_precision_recall_f1_score_multilabel_1(): diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py index 0b1719562cd57..7ffd373182957 100644 --- a/sklearn/svm/base.py +++ b/sklearn/svm/base.py @@ -554,7 +554,7 @@ def predict(self, X): # estimators. def _check_proba(self): if not self.probability: - raise AttributeError("predict_proba is not available when " + raise AttributeError("predict_proba is not available when" " probability=False") if self._impl not in ('c_svc', 'nu_svc'): raise AttributeError("predict_proba only implemented for SVC" From a495cfc06491d5f292c970d70ba7acab2dffee8c Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 8 Nov 2017 21:51:17 +0530 Subject: [PATCH 02/88] add space and fix input --- sklearn/metrics/classification.py | 3 ++- sklearn/metrics/tests/test_classification.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 187f3a138cc5e..cbb71c7abfc0f 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -463,11 +463,12 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True, else: C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) den = C.sum(0) + C.sum(1) - C.diagonal() - score = C.diagonal()/den + score = C.diagonal() / den return _weighted_sum(score, sample_weight, normalize) + def matthews_corrcoef(y_true, y_pred, sample_weight=None): """Compute the Matthews correlation coefficient (MCC) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 520bc99277683..a5ce523583b03 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -957,7 +957,7 @@ def test_multilabel_jaccard_similarity_score(): y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] - assert_almost_equal(jaccard_similarity_score(y1, y2), 7. / 18) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred), 7. / 18) @ignore_warnings From fcba7f05122924a85f6f3303674fa19c4db9aef2 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Fri, 10 Nov 2017 14:17:37 +0530 Subject: [PATCH 03/88] score being a n_class size array and weight already taken care of --- sklearn/metrics/classification.py | 10 ++++++---- sklearn/metrics/tests/test_classification.py | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index cbb71c7abfc0f..382fa3c2c34d8 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -434,7 +434,7 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True, >>> jaccard_similarity_score(y_true, y_pred) 0.5 >>> jaccard_similarity_score(y_true, y_pred, normalize=False) - 2 + 2.0 In the multilabel case with binary label indicators: @@ -460,13 +460,15 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True, pred_and_true = count_nonzero(y_true.multiply(y_pred), axis=1) score = pred_and_true / pred_or_true score[pred_or_true == 0.0] = 1.0 + return _weighted_sum(score, sample_weight, normalize) else: C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) den = C.sum(0) + C.sum(1) - C.diagonal() score = C.diagonal() / den - - return _weighted_sum(score, sample_weight, normalize) - + if normalize: + return np.average(score) + else: + return np.sum(score) def matthews_corrcoef(y_true, y_pred, sample_weight=None): diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index a5ce523583b03..6d5d287d43cc5 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -957,6 +957,8 @@ def test_multilabel_jaccard_similarity_score(): y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + normalize=False), 7. / 6) assert_almost_equal(jaccard_similarity_score(y_true, y_pred), 7. / 18) From d49ccab3559c7d8a4258f7c2f5b3c39119706059 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Fri, 24 Nov 2017 17:26:52 +0530 Subject: [PATCH 04/88] add space to fix printing of doctest --- sklearn/metrics/classification.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 382fa3c2c34d8..eb8b174278697 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1021,6 +1021,7 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, It is possible to compute per-label precisions, recalls, F1-scores and supports instead of averaging: + >>> precision_recall_fscore_support(y_true, y_pred, average=None, ... labels=['pig', 'dog', 'cat']) ... # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE From 615ac9ae335000bdb1ea8c1ae4980a91b9d891b3 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Fri, 24 Nov 2017 23:52:03 +0530 Subject: [PATCH 05/88] add support for 'average' of type 'macro', 'micro', 'weighted' --- sklearn/metrics/classification.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index eb8b174278697..22faa630d53b3 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -451,6 +451,9 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True, """ # Compute accuracy for each possible representation + average_options = (None, 'micro', 'macro', 'weighted') + if average not in average_options: + raise ValueError("average has to be one of " + str(average_options)) y_type, y_true, y_pred = _check_targets(y_true, y_pred) check_consistent_length(y_true, y_pred, sample_weight) if y_type.startswith('multilabel'): @@ -464,11 +467,20 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True, else: C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) den = C.sum(0) + C.sum(1) - C.diagonal() - score = C.diagonal() / den - if normalize: + if average == 'macro': + den = C.sum(0) + C.sum(1) - C.diagonal() + score = C.diagonal() / den return np.average(score) - else: - return np.sum(score) + elif average == 'micro': + den = 2*np.sum(C) - np.sum(C.diagonal()) + score = np.sum(C.diagonal()) + return score / den + elif average == 'weighted': + den = C.sum(0) + C.sum(1) - C.diagonal() + score = C.diagonal()/den + if sample_weight == None: + sample_weight = C.sum(0)/C.sum() + return np.sum(sample_weight*score) def matthews_corrcoef(y_true, y_pred, sample_weight=None): From 78b2a846d9c79fbf56ebada7b3acfe07a9fde535 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sat, 25 Nov 2017 12:01:23 +0530 Subject: [PATCH 06/88] add tests and make documentation changes --- doc/modules/model_evaluation.rst | 10 ++-- sklearn/metrics/classification.py | 52 +++++++++++--------- sklearn/metrics/tests/test_classification.py | 9 +++- 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 4a19e27e9c11c..784b505dddbae 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -676,10 +676,14 @@ score is equal to the classification accuracy. >>> from sklearn.metrics import jaccard_similarity_score >>> y_pred = [0, 2, 1, 3] >>> y_true = [0, 1, 2, 3] - >>> jaccard_similarity_score(y_true, y_pred) + >>> jaccard_similarity_score(y_true, y_pred, average='macro') 0.5 - >>> jaccard_similarity_score(y_true, y_pred, normalize=False) - 2 + >>> jaccard_similarity_score(y_true, y_pred, average='micro') + 0.33... + >>> jaccard_similarity_score(y_true, y_pred, average='weighted') + 0.5 + >>> jaccard_similarity_score(y_true, y_pred) + array([ 1., 0., 0., 1. ]) In the multilabel case with binary label indicators: :: diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 22faa630d53b3..12e04c4dc9311 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -372,7 +372,7 @@ class labels [2]_. return 1 - k -def jaccard_similarity_score(y_true, y_pred, normalize=True, +def jaccard_similarity_score(y_true, y_pred, average=None, sample_weight=None): """Jaccard similarity coefficient score @@ -391,23 +391,28 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True, y_pred : 1d array-like, or label indicator array / sparse matrix Predicted labels, as returned by a classifier. - normalize : bool, optional (default=True) - If ``False``, return the sum of the Jaccard similarity coefficient - over the sample set. Otherwise, return the average of Jaccard - similarity coefficient. - sample_weight : array-like of shape = [n_samples], optional Sample weights. + average : string, [None (default), 'micro', 'macro', 'weighted'] + If ``None``, the scores for each class are returned. Otherwise, this + determines the type of averaging performed on the data: + + ``'micro'``: + Calculate metrics globally by counting the total true positives, + false negatives and false positives. + ``'macro'``: + Calculate metrics for each label, and find their unweighted + mean. This does not take label imbalance into account. + ``'weighted'``: + Calculate metrics for each label, and find their average, weighted + by support (the number of true instances for each label). This + alters 'macro' to account for label imbalance. + Returns ------- - score : float - If ``normalize == True``, return the average Jaccard similarity - coefficient, else it returns the sum of the Jaccard similarity - coefficient over the sample set. - - The best performance is 1 with ``normalize == True`` and the number - of samples with ``normalize == False``. + score: float (if average is not None) or array of float, shape =\ + [n_unique_labels] See also -------- @@ -415,26 +420,24 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True, Notes ----- - In binary and multiclass classification, this function is equivalent - to the ``accuracy_score``. It differs in the multilabel classification - problem. + In differs in implementation from ``accuracy_score`` from all three + classifications i.e. binary, mutliclass and multilabel. References ---------- .. [1] `Wikipedia entry for the Jaccard index `_ - Examples -------- >>> import numpy as np >>> from sklearn.metrics import jaccard_similarity_score >>> y_pred = [0, 2, 1, 3] >>> y_true = [0, 1, 2, 3] - >>> jaccard_similarity_score(y_true, y_pred) + >>> jaccard_similarity_score(y_true, y_pred, average='macro') 0.5 - >>> jaccard_similarity_score(y_true, y_pred, normalize=False) - 2.0 + >>> jaccard_similarity_score(y_true, y_pred, normalize='micro') + 0.33... In the multilabel case with binary label indicators: @@ -450,12 +453,14 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True, 0.38888888888888884 """ - # Compute accuracy for each possible representation average_options = (None, 'micro', 'macro', 'weighted') if average not in average_options: raise ValueError("average has to be one of " + str(average_options)) + + # Compute accuracy for each possible representation y_type, y_true, y_pred = _check_targets(y_true, y_pred) check_consistent_length(y_true, y_pred, sample_weight) + if y_type.startswith('multilabel'): with np.errstate(divide='ignore', invalid='ignore'): # oddly, we may get an "invalid" rather than a "divide" error here @@ -466,7 +471,6 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True, return _weighted_sum(score, sample_weight, normalize) else: C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) - den = C.sum(0) + C.sum(1) - C.diagonal() if average == 'macro': den = C.sum(0) + C.sum(1) - C.diagonal() score = C.diagonal() / den @@ -481,6 +485,10 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True, if sample_weight == None: sample_weight = C.sum(0)/C.sum() return np.sum(sample_weight*score) + else: + den = C.sum(0) + C.sum(1) - C.diagonal() + score = C.diagonal() / den + return score def matthews_corrcoef(y_true, y_pred, sample_weight=None): diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 6d5d287d43cc5..f6ebc5abfac76 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -958,8 +958,13 @@ def test_multilabel_jaccard_similarity_score(): y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - normalize=False), 7. / 6) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred), 7. / 18) + average='macro'), 7. / 18) + assert_equal(jaccard_similarity_score(y_true, y_pred, + average='micro'), 1. / 2) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='weighted'), 7. / 12) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred), + np.array([2. / 3, 0., 1. / 2])) @ignore_warnings From 41f7e2ba2113ef0e59ed08a4945645a2d98b9b5f Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 29 Nov 2017 12:14:22 +0530 Subject: [PATCH 07/88] use 'average' for 'multilabel' classification --- sklearn/metrics/classification.py | 22 ++++++++++++++------ sklearn/metrics/tests/test_classification.py | 10 +++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 12e04c4dc9311..819d4a8960f35 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -463,12 +463,22 @@ def jaccard_similarity_score(y_true, y_pred, average=None, if y_type.startswith('multilabel'): with np.errstate(divide='ignore', invalid='ignore'): - # oddly, we may get an "invalid" rather than a "divide" error here - pred_or_true = count_nonzero(y_true + y_pred, axis=1) - pred_and_true = count_nonzero(y_true.multiply(y_pred), axis=1) - score = pred_and_true / pred_or_true - score[pred_or_true == 0.0] = 1.0 - return _weighted_sum(score, sample_weight, normalize) + pred_or_true = count_nonzero(y_true + y_pred, axis=0) + pred_and_true = count_nonzero(y_true.multiply(y_pred), axis=0) + if average == 'macro': + score = pred_and_true / pred_or_true + n_features = y_true.shape[1] + return np.sum(score) / n_features + elif average == 'micro': + score = np.sum(pred_and_true) / np.sum(pred_or_true) + return score + elif average == 'weighted': + score = pred_and_true / pred_or_true + score = _weighted_sum(score, sample_weight, normalize=True) + return score + else: + score = pred_and_true / pred_or_true + return score else: C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) if average == 'macro': diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index f6ebc5abfac76..8909ada0f54bd 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -966,6 +966,16 @@ def test_multilabel_jaccard_similarity_score(): assert_almost_equal(jaccard_similarity_score(y_true, y_pred), np.array([2. / 3, 0., 1. / 2])) + # multilabel testing + y_true = np.array([[0, 1, 1], [1, 0, 0]]) + y_pred = np.array([[1, 1, 1], [1, 0, 1]]) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='macro'), 2. / 3) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='mirco'), 3. / 5) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred), + np.array([1. / 2, 1., 1. / 2])) + @ignore_warnings def test_precision_recall_f1_score_multilabel_1(): From a7d01118f38b8295bf54521ce5a5dc2b1633cb1b Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 29 Nov 2017 18:43:57 +0530 Subject: [PATCH 08/88] introduce average='binary', average='samples' --- sklearn/metrics/classification.py | 44 ++++++++++++++++++-- sklearn/metrics/tests/test_classification.py | 10 ++++- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 819d4a8960f35..4312b73ec6bdd 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -372,7 +372,7 @@ class labels [2]_. return 1 - k -def jaccard_similarity_score(y_true, y_pred, average=None, +def jaccard_similarity_score(y_true, y_pred, pos_label=1, average=None, sample_weight=None): """Jaccard similarity coefficient score @@ -394,10 +394,14 @@ def jaccard_similarity_score(y_true, y_pred, average=None, sample_weight : array-like of shape = [n_samples], optional Sample weights. - average : string, [None (default), 'micro', 'macro', 'weighted'] + average : string, [None (default), 'binary', 'micro', 'macro', 'samples', \ + 'weighted',] If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: + ``'binary'``: + Only report results for the class specified by ``pos_label``. + This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. @@ -408,6 +412,9 @@ def jaccard_similarity_score(y_true, y_pred, average=None, Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance. + ``'samples'``: + Calculate metrics for each instance, and find their average (only + meaningful for multilabel classification). Returns ------- @@ -420,7 +427,7 @@ def jaccard_similarity_score(y_true, y_pred, average=None, Notes ----- - In differs in implementation from ``accuracy_score`` from all three + It differs in implementation from ``accuracy_score`` from all three classifications i.e. binary, mutliclass and multilabel. References @@ -453,18 +460,47 @@ def jaccard_similarity_score(y_true, y_pred, average=None, 0.38888888888888884 """ - average_options = (None, 'micro', 'macro', 'weighted') + average_options = (None, 'binary', 'micro', 'macro', 'weighted', 'samples') if average not in average_options: raise ValueError("average has to be one of " + str(average_options)) # Compute accuracy for each possible representation y_type, y_true, y_pred = _check_targets(y_true, y_pred) check_consistent_length(y_true, y_pred, sample_weight) + present_labels = unique_labels(y_true, y_pred) if y_type.startswith('multilabel'): + # default average in multilabel is 'samples' + if average == None: + average = 'samples' + with np.errstate(divide='ignore', invalid='ignore'): + + if average == 'samples': + pred_or_true = count_nonzero(y_true + y_pred, axis=1) + pred_and_true = count_nonzero(y_true.multiply(y_pred), axis=1) + score = pred_and_true / pred_or_true + score[pred_or_true == 0.0] = 1.0 + return _weighted_sum(score, sample_weight, normalize=True) + + if average == 'binary': + if pos_label not in present_labels: + if len(present_labels) < 2: + # only -ve labels + return 0. + else: + raise ValueError("pos_label=%r is not a valid label" + ": %r" % (pos_label, present_labels)) + y_true_pos = y_true[:, pos_label - 1] + y_pred_pos = y_pred[:, pos_label - 1] + pred_or_true = count_nonzero(y_true_pos + y_pred_pos) + pred_and_true = count_nonzero(y_true_pos.multiply(y_pred_pos)) + score = pred_and_true / pred_or_true + return score + pred_or_true = count_nonzero(y_true + y_pred, axis=0) pred_and_true = count_nonzero(y_true.multiply(y_pred), axis=0) + if average == 'macro': score = pred_and_true / pred_or_true n_features = y_true.shape[1] diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 8909ada0f54bd..5dda988af792f 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -969,12 +969,18 @@ def test_multilabel_jaccard_similarity_score(): # multilabel testing y_true = np.array([[0, 1, 1], [1, 0, 0]]) y_pred = np.array([[1, 1, 1], [1, 0, 1]]) + # average='macro' assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='macro'), 2. / 3) + # average='micro' assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='mirco'), 3. / 5) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred), - np.array([1. / 2, 1., 1. / 2])) + # average='samples' (default) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred), 7. / 12) + # average='binary' + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='binary', pos_label=1), 1. / 2) + # average='weighted' @ignore_warnings From 057815a168fa4afe99be3e08c5cc875533c5cf77 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Fri, 1 Dec 2017 08:21:34 +0530 Subject: [PATCH 09/88] show errors and warning before anything this deals with both multilabel and multiclass problems --- sklearn/metrics/classification.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 4312b73ec6bdd..9d9b432341fcc 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -469,6 +469,25 @@ def jaccard_similarity_score(y_true, y_pred, pos_label=1, average=None, check_consistent_length(y_true, y_pred, sample_weight) present_labels = unique_labels(y_true, y_pred) + if average == 'binary': + if y_type == 'binary': + if pos_label not in present_labels: + if len(present_labels) < 2: + # only -ve labels + return 0. + else: + raise ValueError("pos_label=%r is not a valid label: " + "%r" % (pos_label, present_labels)) + labels = [pos_label] + else: + raise ValueError("Target is %s but average='binary'. Please " + "choose another average setting." % y_type) + elif pos_label not in (None, 1): + warnings.warn("Note that pos_label (set to %r) is ignored when " + "average != 'binary' (got %r). You may use " + "labels=[pos_label] to specify a single positive class." + % (pos_label, average), UserWarning) + if y_type.startswith('multilabel'): # default average in multilabel is 'samples' if average == None: @@ -483,14 +502,6 @@ def jaccard_similarity_score(y_true, y_pred, pos_label=1, average=None, score[pred_or_true == 0.0] = 1.0 return _weighted_sum(score, sample_weight, normalize=True) - if average == 'binary': - if pos_label not in present_labels: - if len(present_labels) < 2: - # only -ve labels - return 0. - else: - raise ValueError("pos_label=%r is not a valid label" - ": %r" % (pos_label, present_labels)) y_true_pos = y_true[:, pos_label - 1] y_pred_pos = y_pred[:, pos_label - 1] pred_or_true = count_nonzero(y_true_pos + y_pred_pos) From f1bd76fdcb3fcd829ed01bd8157752a20d12ebda Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Fri, 1 Dec 2017 10:50:14 +0530 Subject: [PATCH 10/88] write separate functions --- sklearn/metrics/classification.py | 1 - sklearn/metrics/tests/test_classification.py | 43 ++++++++++++++------ 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 9d9b432341fcc..aad42d3670977 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -473,7 +473,6 @@ def jaccard_similarity_score(y_true, y_pred, pos_label=1, average=None, if y_type == 'binary': if pos_label not in present_labels: if len(present_labels) < 2: - # only -ve labels return 0. else: raise ValueError("pos_label=%r is not a valid label: " diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 5dda988af792f..3fcfba73fbddb 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -939,6 +939,24 @@ def test_multilabel_hamming_loss(): assert_warns(DeprecationWarning, hamming_loss, y1, y2, classes=[0, 1]) +@ignore_warnings +def test_jaccard_similarity_score(): + y_true = np.array([0, 0]) + y_pred = np.array([0, 0]) + assert_equal(jaccard_similarity_score(y_true, y_pred, average='binary', + pos_label=-1), 0.) + + y_true = np.array([[0, 1, 1], [1, 0, 0]]) + y_pred = np.array([[1, 1, 1], [1, 0, 1]]) + assert_raises(ValueError, jaccard_similarity_score, y_true, y_pred, + average='binary', pos_label=-1) + + y_true = np.array([0, 1, 1, 0, 2]) + y_pred = np.array([1, 1, 1, 1, 0]) + assert_raises(ValueError, jaccard_similarity_score, y_true, y_pred, + average='binary') + + def test_multilabel_jaccard_similarity_score(): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) @@ -955,18 +973,6 @@ def test_multilabel_jaccard_similarity_score(): assert_equal(jaccard_similarity_score(y1, np.zeros(y1.shape)), 0) assert_equal(jaccard_similarity_score(y2, np.zeros(y1.shape)), 0) - y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] - y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='macro'), 7. / 18) - assert_equal(jaccard_similarity_score(y_true, y_pred, - average='micro'), 1. / 2) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='weighted'), 7. / 12) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred), - np.array([2. / 3, 0., 1. / 2])) - - # multilabel testing y_true = np.array([[0, 1, 1], [1, 0, 0]]) y_pred = np.array([[1, 1, 1], [1, 0, 1]]) # average='macro' @@ -983,6 +989,19 @@ def test_multilabel_jaccard_similarity_score(): # average='weighted' +def test_multiclass_jaccard_similarity_score(): + y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] + y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='macro'), 7. / 18) + assert_equal(jaccard_similarity_score(y_true, y_pred, + average='micro'), 1. / 2) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='weighted'), 7. / 12) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred), + np.array([2. / 3, 0., 1. / 2])) + + @ignore_warnings def test_precision_recall_f1_score_multilabel_1(): # Test precision_recall_f1_score on a crafted multilabel example From 581d540e75c9e73a76edd5faf9c3a4c9ea02e840 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sat, 2 Dec 2017 16:22:04 +0530 Subject: [PATCH 11/88] completely okay API and improved doctest --- sklearn/metrics/classification.py | 35 +++++++++++++++----- sklearn/metrics/tests/test_classification.py | 7 ++++ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index aad42d3670977..7c51c5acc6452 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -372,8 +372,8 @@ class labels [2]_. return 1 - k -def jaccard_similarity_score(y_true, y_pred, pos_label=1, average=None, - sample_weight=None): +def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, + average=None, warn=True, sample_weight=None): """Jaccard similarity coefficient score The Jaccard index [1], or Jaccard similarity coefficient, defined as @@ -391,8 +391,20 @@ def jaccard_similarity_score(y_true, y_pred, pos_label=1, average=None, y_pred : 1d array-like, or label indicator array / sparse matrix Predicted labels, as returned by a classifier. - sample_weight : array-like of shape = [n_samples], optional - Sample weights. + labels : list, optional + The set of labels to include when ``average != 'binary'``, and their + order if ``average is None``. Labels present in the data can be + excluded, for example to calculate a multiclass average ignoring a + majority negative class, while labels not present in the data will + result in 0 components in a macro average. For multilabel targets, + labels are column indices. By default, all labels in ``y_true`` and + ``y_pred`` are used in sorted order. + + pos_label : str or int, 1 by default + The class to report if ``average='binary'`` and the data is binary. + If the data are multiclass or multilabel, this will be ignored; + setting ``labels=[pos_label]`` and ``average != 'binary'`` will report + scores for that label only. average : string, [None (default), 'binary', 'micro', 'macro', 'samples', \ 'weighted',] @@ -416,6 +428,12 @@ def jaccard_similarity_score(y_true, y_pred, pos_label=1, average=None, Calculate metrics for each instance, and find their average (only meaningful for multilabel classification). + sample_weight : array-like of shape = [n_samples], optional + Sample weights. + + warn : bool, for internal use + This determines whether warning will be raised or not. + Returns ------- score: float (if average is not None) or array of float, shape =\ @@ -444,20 +462,21 @@ def jaccard_similarity_score(y_true, y_pred, pos_label=1, average=None, >>> jaccard_similarity_score(y_true, y_pred, average='macro') 0.5 >>> jaccard_similarity_score(y_true, y_pred, normalize='micro') + ... # doctest: +ELLIPSIS 0.33... In the multilabel case with binary label indicators: - >>> jaccard_similarity_score(np.array([[0, 1], [1, 1]]),\ - np.ones((2, 2))) + >>> jaccard_similarity_score(np.array([[0, 1], [1, 1]]), + ... np.ones((2, 2))) 0.75 In the multiclass case: >>> y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] >>> y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] - >>> jaccard_similarity_score(y_true, y_pred) - 0.38888888888888884 + >>> jaccard_similarity_score(y_true, y_pred) # doctest: +ELLIPSIS + 0.388... """ average_options = (None, 'binary', 'micro', 'macro', 'weighted', 'samples') diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 3fcfba73fbddb..361484f962557 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -956,6 +956,13 @@ def test_jaccard_similarity_score(): assert_raises(ValueError, jaccard_similarity_score, y_true, y_pred, average='binary') + assert_warns_message(UserWarning, + "Note that pos_label (set to 3) is ignored when" + "average != 'binary' (got None). You may use " + "labels=[pos_label] to specify a single positive" + "class.", jaccard_similarity_score, y_true, y_pred, + pos_label=3) + def test_multilabel_jaccard_similarity_score(): # Dense label indicator matrix format From aefe9216088eff084f3d5402b23f8bb98c5bdfb8 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sat, 2 Dec 2017 18:05:02 +0530 Subject: [PATCH 12/88] fix lgtm error and better control flow --- sklearn/metrics/classification.py | 34 +++++++++----------- sklearn/metrics/tests/test_classification.py | 6 ++-- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 7c51c5acc6452..3e56533f524e2 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -506,43 +506,41 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, "labels=[pos_label] to specify a single positive class." % (pos_label, average), UserWarning) + if labels is None: + labels = present_labels + n_labels = None + else: + n_labels = len(labels) + labels = np.hstack([labels, np.setdiff1d(present_labels, labels, + assume_unique=True)]) + if y_type.startswith('multilabel'): # default average in multilabel is 'samples' if average == None: average = 'samples' with np.errstate(divide='ignore', invalid='ignore'): + sum_axis = 1 if average == 'samples' else 0 + + pred_or_true = count_nonzero(y_true + y_pred, axis=sum_axis) + pred_and_true = count_nonzero(y_true.multiply(y_pred), + axis=sum_axis) if average == 'samples': - pred_or_true = count_nonzero(y_true + y_pred, axis=1) - pred_and_true = count_nonzero(y_true.multiply(y_pred), axis=1) score = pred_and_true / pred_or_true score[pred_or_true == 0.0] = 1.0 return _weighted_sum(score, sample_weight, normalize=True) - - y_true_pos = y_true[:, pos_label - 1] - y_pred_pos = y_pred[:, pos_label - 1] - pred_or_true = count_nonzero(y_true_pos + y_pred_pos) - pred_and_true = count_nonzero(y_true_pos.multiply(y_pred_pos)) - score = pred_and_true / pred_or_true - return score - - pred_or_true = count_nonzero(y_true + y_pred, axis=0) - pred_and_true = count_nonzero(y_true.multiply(y_pred), axis=0) - - if average == 'macro': + elif average == 'macro': score = pred_and_true / pred_or_true n_features = y_true.shape[1] return np.sum(score) / n_features elif average == 'micro': score = np.sum(pred_and_true) / np.sum(pred_or_true) return score - elif average == 'weighted': - score = pred_and_true / pred_or_true - score = _weighted_sum(score, sample_weight, normalize=True) - return score else: + # average='weighted' score = pred_and_true / pred_or_true + score = _weighted_sum(score, sample_weight, normalize=True) return score else: C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 361484f962557..8a6d6ef9589fa 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -987,10 +987,12 @@ def test_multilabel_jaccard_similarity_score(): average='macro'), 2. / 3) # average='micro' assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='mirco'), 3. / 5) + average='micro'), 3. / 5) # average='samples' (default) assert_almost_equal(jaccard_similarity_score(y_true, y_pred), 7. / 12) - # average='binary' + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + sample_weight=np.array([0.1, 0.9])), 31. / 60) + # average='binary' (wrong example) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='binary', pos_label=1), 1. / 2) # average='weighted' From 83df9581aa9a05b6da121207603264bc1f83ebfa Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sat, 2 Dec 2017 19:12:30 +0530 Subject: [PATCH 13/88] add normalize in API --- sklearn/metrics/classification.py | 32 +++++++++++++++----- sklearn/metrics/tests/test_classification.py | 8 ++--- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 3e56533f524e2..cc62df2ea3ba2 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -373,7 +373,8 @@ class labels [2]_. def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, - average=None, warn=True, sample_weight=None): + average=None, warn=True, normalize=None, + sample_weight=None): """Jaccard similarity coefficient score The Jaccard index [1], or Jaccard similarity coefficient, defined as @@ -428,12 +429,17 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, Calculate metrics for each instance, and find their average (only meaningful for multilabel classification). + warn : bool, for internal use + This determines whether warning will be raised or not, + + normalize: True, False or None (default) + whether to normalize (default) the result or return an array + (specified with `normalize=False`). This is only to be specified + in case `average='samples'`. + sample_weight : array-like of shape = [n_samples], optional Sample weights. - warn : bool, for internal use - This determines whether warning will be raised or not. - Returns ------- score: float (if average is not None) or array of float, shape =\ @@ -461,7 +467,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, >>> y_true = [0, 1, 2, 3] >>> jaccard_similarity_score(y_true, y_pred, average='macro') 0.5 - >>> jaccard_similarity_score(y_true, y_pred, normalize='micro') + >>> jaccard_similarity_score(y_true, y_pred, average='micro') ... # doctest: +ELLIPSIS 0.33... @@ -506,6 +512,17 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, "labels=[pos_label] to specify a single positive class." % (pos_label, average), UserWarning) + if average == None: + average = 'samples' + + if average == 'samples': + if normalize is None: + normalize = True + elif normalize is not None: + warnings.warn("Note that normalize (set to %r) is ignored when " + "average != 'samples' (got %r)." + % (normalize, average), UserWarning) + if labels is None: labels = present_labels n_labels = None @@ -516,8 +533,6 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, if y_type.startswith('multilabel'): # default average in multilabel is 'samples' - if average == None: - average = 'samples' with np.errstate(divide='ignore', invalid='ignore'): sum_axis = 1 if average == 'samples' else 0 @@ -529,7 +544,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, if average == 'samples': score = pred_and_true / pred_or_true score[pred_or_true == 0.0] = 1.0 - return _weighted_sum(score, sample_weight, normalize=True) + return _weighted_sum(score, sample_weight, normalize=normalize) elif average == 'macro': score = pred_and_true / pred_or_true n_features = y_true.shape[1] @@ -559,6 +574,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, sample_weight = C.sum(0)/C.sum() return np.sum(sample_weight*score) else: + # average='samples' den = C.sum(0) + C.sum(1) - C.diagonal() score = C.diagonal() / den return score diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 8a6d6ef9589fa..85cea82730c55 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -957,9 +957,9 @@ def test_jaccard_similarity_score(): average='binary') assert_warns_message(UserWarning, - "Note that pos_label (set to 3) is ignored when" + "Note that pos_label (set to 3) is ignored when " "average != 'binary' (got None). You may use " - "labels=[pos_label] to specify a single positive" + "labels=[pos_label] to specify a single positive " "class.", jaccard_similarity_score, y_true, y_pred, pos_label=3) @@ -993,8 +993,8 @@ def test_multilabel_jaccard_similarity_score(): assert_almost_equal(jaccard_similarity_score(y_true, y_pred, sample_weight=np.array([0.1, 0.9])), 31. / 60) # average='binary' (wrong example) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='binary', pos_label=1), 1. / 2) +# assert_almost_equal(jaccard_similarity_score(y_true, y_pred, +# average='binary', pos_label=1), 1. / 2) # average='weighted' From 041c668afb913c09b647769f14dcccec0ce2bdf8 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 12 Dec 2017 16:18:47 +0530 Subject: [PATCH 14/88] raise ValueError for not-providing 'avergae' in multiclass --- sklearn/metrics/classification.py | 42 ++++++++------------ sklearn/metrics/tests/test_classification.py | 19 +++++---- 2 files changed, 28 insertions(+), 33 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index cc62df2ea3ba2..e8d8130dd9496 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -408,7 +408,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, scores for that label only. average : string, [None (default), 'binary', 'micro', 'macro', 'samples', \ - 'weighted',] + 'weighted'] If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -429,11 +429,11 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, Calculate metrics for each instance, and find their average (only meaningful for multilabel classification). - warn : bool, for internal use + warn : bool, optional (default=True), for internal use This determines whether warning will be raised or not, normalize: True, False or None (default) - whether to normalize (default) the result or return an array + whether to normalize the result or return an array (specified with `normalize=False`). This is only to be specified in case `average='samples'`. @@ -449,11 +449,6 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, -------- accuracy_score, hamming_loss, zero_one_loss - Notes - ----- - It differs in implementation from ``accuracy_score`` from all three - classifications i.e. binary, mutliclass and multilabel. - References ---------- .. [1] `Wikipedia entry for the Jaccard index @@ -485,8 +480,8 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, 0.388... """ - average_options = (None, 'binary', 'micro', 'macro', 'weighted', 'samples') - if average not in average_options: + average_options = (None, 'micro', 'macro', 'weighted', 'samples') + if average not in average_options and average != 'binary': raise ValueError("average has to be one of " + str(average_options)) # Compute accuracy for each possible representation @@ -512,17 +507,6 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, "labels=[pos_label] to specify a single positive class." % (pos_label, average), UserWarning) - if average == None: - average = 'samples' - - if average == 'samples': - if normalize is None: - normalize = True - elif normalize is not None: - warnings.warn("Note that normalize (set to %r) is ignored when " - "average != 'samples' (got %r)." - % (normalize, average), UserWarning) - if labels is None: labels = present_labels n_labels = None @@ -532,6 +516,10 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, assume_unique=True)]) if y_type.startswith('multilabel'): + if average is None: + average = 'samples' + if average == 'samples' and normalize is None: + normalize = True # default average in multilabel is 'samples' with np.errstate(divide='ignore', invalid='ignore'): @@ -573,11 +561,15 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, if sample_weight == None: sample_weight = C.sum(0)/C.sum() return np.sum(sample_weight*score) +# else: +# # average='samples' +# den = C.sum(0) + C.sum(1) - C.diagonal() +# score = C.diagonal() / den +# return score else: - # average='samples' - den = C.sum(0) + C.sum(1) - C.diagonal() - score = C.diagonal() / den - return score + raise ValueError("In multiclass classification average must be " + "one of ('micro', 'macro', 'weighted'), got " + "average=%s." % average) def matthews_corrcoef(y_true, y_pred, sample_weight=None): diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 85cea82730c55..ae5c2552e2384 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -956,12 +956,13 @@ def test_jaccard_similarity_score(): assert_raises(ValueError, jaccard_similarity_score, y_true, y_pred, average='binary') - assert_warns_message(UserWarning, - "Note that pos_label (set to 3) is ignored when " - "average != 'binary' (got None). You may use " - "labels=[pos_label] to specify a single positive " - "class.", jaccard_similarity_score, y_true, y_pred, - pos_label=3) + # test 'pos_label' +# assert_warns_message(UserWarning, +# "Note that pos_label (set to 3) is ignored when " +# "average != 'binary' (got None). You may use " +# "labels=[pos_label] to specify a single positive " +# "class.", jaccard_similarity_score, y_true, y_pred, +# pos_label=3) def test_multilabel_jaccard_similarity_score(): @@ -1007,8 +1008,10 @@ def test_multiclass_jaccard_similarity_score(): average='micro'), 1. / 2) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='weighted'), 7. / 12) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred), - np.array([2. / 3, 0., 1. / 2])) + msg = ("In multiclass classification average must be one of " + "('micro', 'macro', 'weighted'), got average=None.") + assert_raise_message(ValueError, msg, jaccard_similarity_score, y_true, + y_pred) @ignore_warnings From 39b92b13ea9416646a14f7ded8ad50a0ee7d5879 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 12 Dec 2017 18:25:01 +0530 Subject: [PATCH 15/88] fixed errors with multiclass for different average values --- sklearn/metrics/classification.py | 20 ++++++++++---------- sklearn/metrics/tests/test_classification.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index e8d8130dd9496..3786037229c1d 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -476,8 +476,9 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, >>> y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] >>> y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] - >>> jaccard_similarity_score(y_true, y_pred) # doctest: +ELLIPSIS - 0.388... + >>> jaccard_similarity_score(y_true, y_pred, average='weighted') + ... # doctest: +ELLIPSIS + 0.4722... """ average_options = (None, 'micro', 'macro', 'weighted', 'samples') @@ -556,16 +557,15 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, score = np.sum(C.diagonal()) return score / den elif average == 'weighted': + # computation similar to average='macro', apart from computation + # of sample_weight below den = C.sum(0) + C.sum(1) - C.diagonal() - score = C.diagonal()/den - if sample_weight == None: - sample_weight = C.sum(0)/C.sum() + score = C.diagonal() / den + if sample_weight is None: + _, y_true = np.unique(y_true, return_inverse=True) + num = np.bincount(y_true) + sample_weight = num / np.sum(num) return np.sum(sample_weight*score) -# else: -# # average='samples' -# den = C.sum(0) + C.sum(1) - C.diagonal() -# score = C.diagonal() / den -# return score else: raise ValueError("In multiclass classification average must be " "one of ('micro', 'macro', 'weighted'), got " diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index ae5c2552e2384..b36c42bf523d7 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1007,7 +1007,7 @@ def test_multiclass_jaccard_similarity_score(): assert_equal(jaccard_similarity_score(y_true, y_pred, average='micro'), 1. / 2) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='weighted'), 7. / 12) + average='weighted'), 17. / 36) msg = ("In multiclass classification average must be one of " "('micro', 'macro', 'weighted'), got average=None.") assert_raise_message(ValueError, msg, jaccard_similarity_score, y_true, From a0712b548285a8a6550829ebeda807c329a7b880 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 12 Dec 2017 18:48:35 +0530 Subject: [PATCH 16/88] fix tests, use assert_raise_message instead --- sklearn/metrics/tests/test_classification.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index b36c42bf523d7..86d6cd26a23b9 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -948,21 +948,22 @@ def test_jaccard_similarity_score(): y_true = np.array([[0, 1, 1], [1, 0, 0]]) y_pred = np.array([[1, 1, 1], [1, 0, 1]]) - assert_raises(ValueError, jaccard_similarity_score, y_true, y_pred, - average='binary', pos_label=-1) + msg1 = ("Target is multilabel-indicator but average='binary'. " + "Please choose another average setting.") + assert_raise_message(ValueError, msg1, jaccard_similarity_score, y_true, + y_pred, average='binary', pos_label=-1) y_true = np.array([0, 1, 1, 0, 2]) y_pred = np.array([1, 1, 1, 1, 0]) assert_raises(ValueError, jaccard_similarity_score, y_true, y_pred, average='binary') - # test 'pos_label' -# assert_warns_message(UserWarning, -# "Note that pos_label (set to 3) is ignored when " -# "average != 'binary' (got None). You may use " -# "labels=[pos_label] to specify a single positive " -# "class.", jaccard_similarity_score, y_true, y_pred, -# pos_label=3) + assert_warns_message(UserWarning, + "Note that pos_label (set to 3) is ignored when " + "average != 'binary' (got 'micro'). You may use " + "labels=[pos_label] to specify a single positive " + "class.", jaccard_similarity_score, y_true, y_pred, + average='micro', pos_label=3) def test_multilabel_jaccard_similarity_score(): From 113072a2de84bdde1f401772896f6bdacb18c253 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Fri, 15 Dec 2017 15:25:18 +0530 Subject: [PATCH 17/88] add common_test for jaccard_similarity_score --- sklearn/metrics/tests/test_common.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index e68f4024b24af..08fc4429533dc 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -127,24 +127,32 @@ "weighted_f2_score": partial(fbeta_score, average="weighted", beta=2), "weighted_precision_score": partial(precision_score, average="weighted"), "weighted_recall_score": partial(recall_score, average="weighted"), + "weighted_jaccard_similarity_score": + partial(jaccard_similarity_score, average="weighted"), "micro_f0.5_score": partial(fbeta_score, average="micro", beta=0.5), "micro_f1_score": partial(f1_score, average="micro"), "micro_f2_score": partial(fbeta_score, average="micro", beta=2), "micro_precision_score": partial(precision_score, average="micro"), "micro_recall_score": partial(recall_score, average="micro"), + "micro_jaccard_similarity_score": + partial(jaccard_similarity_score, average="micro"), "macro_f0.5_score": partial(fbeta_score, average="macro", beta=0.5), "macro_f1_score": partial(f1_score, average="macro"), "macro_f2_score": partial(fbeta_score, average="macro", beta=2), "macro_precision_score": partial(precision_score, average="macro"), "macro_recall_score": partial(recall_score, average="macro"), + "macro_jaccard_similarity_score": + partial(jaccard_similarity_score, average="macro"), "samples_f0.5_score": partial(fbeta_score, average="samples", beta=0.5), "samples_f1_score": partial(f1_score, average="samples"), "samples_f2_score": partial(fbeta_score, average="samples", beta=2), "samples_precision_score": partial(precision_score, average="samples"), "samples_recall_score": partial(recall_score, average="samples"), + "samples_jaccard_similarity_score": + partial(jaccard_similarity_score, average="samples"), "cohen_kappa_score": cohen_kappa_score, } @@ -200,6 +208,7 @@ "samples_precision_score", "samples_recall_score", "coverage_error", + "samples_jaccard_similarity_score", "average_precision_score", "weighted_average_precision_score", @@ -222,6 +231,8 @@ "macro_roc_auc", "samples_roc_auc", + "samples_jaccard_similarity_score", + # with default average='binary', multiclass is prohibited "precision_score", "recall_score", @@ -236,7 +247,8 @@ # Metrics with an "average" argument METRICS_WITH_AVERAGING = [ - "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score" + "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score", + "jaccard_similarity_score" ] # Threshold-based metrics with an "average" argument @@ -272,15 +284,19 @@ "hamming_loss", "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score", + "jaccard_similarity_score", "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score", "weighted_precision_score", "weighted_recall_score", + "weighted_jaccard_similarity_score", "micro_f0.5_score", "micro_f1_score", "micro_f2_score", "micro_precision_score", "micro_recall_score", + "micro_jaccard_similarity_score", "macro_f0.5_score", "macro_f1_score", "macro_f2_score", "macro_precision_score", "macro_recall_score", + "macro_jaccard_similarity_score", "cohen_kappa_score", ] @@ -316,15 +332,19 @@ "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score", "weighted_precision_score", "weighted_recall_score", + "weighted_jaccard_similarity_score", "macro_f0.5_score", "macro_f1_score", "macro_f2_score", "macro_precision_score", "macro_recall_score", + "macro_jaccard_similarity_score", "micro_f0.5_score", "micro_f1_score", "micro_f2_score", "micro_precision_score", "micro_recall_score", + "micro_jaccard_similarity_score", "samples_f0.5_score", "samples_f1_score", "samples_f2_score", "samples_precision_score", "samples_recall_score", + "samples_jaccard_similarity_score" ] # Regression metrics with "multioutput-continuous" format support @@ -341,6 +361,8 @@ "jaccard_similarity_score", "unnormalized_jaccard_similarity_score", "zero_one_loss", "unnormalized_zero_one_loss", + "micro_jaccard_similarity_score", "macro_jaccard_similarity_score", + "f1_score", "micro_f1_score", "macro_f1_score", "weighted_recall_score", # P = R = F = accuracy in multiclass case From c52d5774688c0edaa23c393385c2535ca3fb217c Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sun, 17 Dec 2017 15:00:19 +0530 Subject: [PATCH 18/88] use `average='none-samples'` instead of 'normalize=False' --- sklearn/metrics/classification.py | 8 +++++++- sklearn/metrics/tests/test_classification.py | 6 ++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 3786037229c1d..c16b233c6f6bd 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -428,6 +428,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification). + ``'none-samples'``: warn : bool, optional (default=True), for internal use This determines whether warning will be raised or not, @@ -437,6 +438,10 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, (specified with `normalize=False`). This is only to be specified in case `average='samples'`. + .. versionchanged: 0.20 + 'normalize' is deprecated and will be removed in 0.22, instead use + `average='none-samples'` + sample_weight : array-like of shape = [n_samples], optional Sample weights. @@ -481,7 +486,8 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, 0.4722... """ - average_options = (None, 'micro', 'macro', 'weighted', 'samples') + average_options = (None, 'micro', 'macro', 'weighted', 'samples', + 'none-samples') if average not in average_options and average != 'binary': raise ValueError("average has to be one of " + str(average_options)) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 86d6cd26a23b9..a95382a9b1d96 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -955,8 +955,10 @@ def test_jaccard_similarity_score(): y_true = np.array([0, 1, 1, 0, 2]) y_pred = np.array([1, 1, 1, 1, 0]) - assert_raises(ValueError, jaccard_similarity_score, y_true, y_pred, - average='binary') + msg2 = ("Target is multiclass but average='binary'. Please choose " + "another average setting.") + assert_raise_message(ValueError, msg2, jaccard_similarity_score, y_true, + y_pred, average='binary') assert_warns_message(UserWarning, "Note that pos_label (set to 3) is ignored when " From 2e2d762f6809c131e0c5fe528d6e30856796b981 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sun, 17 Dec 2017 18:45:43 +0530 Subject: [PATCH 19/88] average='micro' in multiclass case is equivalent to accuracy_score --- sklearn/metrics/classification.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index c16b233c6f6bd..5d4a70e237418 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -559,9 +559,10 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, score = C.diagonal() / den return np.average(score) elif average == 'micro': - den = 2*np.sum(C) - np.sum(C.diagonal()) - score = np.sum(C.diagonal()) - return score / den + # micro-average on all labels is not useful in the + # multiclass case. It is identical to accuracy. + score = y_true == y_pred + return _weighted_sum(score, sample_weight, normalize) elif average == 'weighted': # computation similar to average='macro', apart from computation # of sample_weight below From 5504a00c1ac7f087a2fa653464bc49c95eab74d7 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sun, 17 Dec 2017 19:33:18 +0530 Subject: [PATCH 20/88] fixes to multilabel case --- sklearn/metrics/classification.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 5d4a70e237418..5264d5c679c8e 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -373,7 +373,7 @@ class labels [2]_. def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, - average=None, warn=True, normalize=None, + average=None, warn=True, normalize=True, sample_weight=None): """Jaccard similarity coefficient score @@ -433,10 +433,11 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, warn : bool, optional (default=True), for internal use This determines whether warning will be raised or not, - normalize: True, False or None (default) - whether to normalize the result or return an array - (specified with `normalize=False`). This is only to be specified - in case `average='samples'`. + normalize: bool, optional (defaul=True) + If ``False``, return the sum of the Jaccard similarity coefficient + over the sample set. Otherwise, return the average of Jaccard + similarity coefficient. This is only to be specified in case + `average='samples'`. .. versionchanged: 0.20 'normalize' is deprecated and will be removed in 0.22, instead use @@ -463,13 +464,6 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, -------- >>> import numpy as np >>> from sklearn.metrics import jaccard_similarity_score - >>> y_pred = [0, 2, 1, 3] - >>> y_true = [0, 1, 2, 3] - >>> jaccard_similarity_score(y_true, y_pred, average='macro') - 0.5 - >>> jaccard_similarity_score(y_true, y_pred, average='micro') - ... # doctest: +ELLIPSIS - 0.33... In the multilabel case with binary label indicators: @@ -479,6 +473,13 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, In the multiclass case: + >>> y_pred = [0, 2, 1, 3] + >>> y_true = [0, 1, 2, 3] + >>> jaccard_similarity_score(y_true, y_pred, average='macro') + 0.5 + >>> jaccard_similarity_score(y_true, y_pred, average='micro') + ... # doctest: +ELLIPSIS + >>> y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] >>> y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] >>> jaccard_similarity_score(y_true, y_pred, average='weighted') From b30ba53c69cc410975ed9bac2182a91aeb903ca8 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sun, 17 Dec 2017 21:06:11 +0530 Subject: [PATCH 21/88] add error message for `average='samples'` for non-multilable case --- sklearn/metrics/classification.py | 4 ++++ sklearn/metrics/tests/test_classification.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 5264d5c679c8e..dd1368cf6d9f8 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -553,6 +553,10 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, score = pred_and_true / pred_or_true score = _weighted_sum(score, sample_weight, normalize=True) return score + elif average == 'samples': + raise ValueError("Sample-based jaccard similarity score is " + "not meaningful outside multilabel " + "classification. See the accuracy_score instead.") else: C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) if average == 'macro': diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index a95382a9b1d96..c4e1556f36625 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -959,6 +959,10 @@ def test_jaccard_similarity_score(): "another average setting.") assert_raise_message(ValueError, msg2, jaccard_similarity_score, y_true, y_pred, average='binary') + msg3 = ("Sample-based jaccard similarity score is not meaningful outside " + "multilabel classification. See the accuracy_score instead.") + assert_raise_message(ValueError, msg3, jaccard_similarity_score, y_true, + y_pred, average='samples') assert_warns_message(UserWarning, "Note that pos_label (set to 3) is ignored when " From 8d0ca206a3664978b0ba5b71d0f9e15aed9b7c97 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 20 Dec 2017 23:08:50 +0530 Subject: [PATCH 22/88] add none-samples in common test --- sklearn/metrics/classification.py | 2 +- sklearn/metrics/tests/test_common.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index dd1368cf6d9f8..73c6203e9d947 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -373,7 +373,7 @@ class labels [2]_. def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, - average=None, warn=True, normalize=True, + average=None, warn=True, normalize=None, sample_weight=None): """Jaccard similarity coefficient score diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 08fc4429533dc..83cffc2e4fca0 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -155,6 +155,9 @@ partial(jaccard_similarity_score, average="samples"), "cohen_kappa_score": cohen_kappa_score, + + "none-samples_jaccard_similarity_score": + partial(jaccard_similarity_score, average='none-samples') } THRESHOLDED_METRICS = { @@ -209,6 +212,7 @@ "samples_recall_score", "coverage_error", "samples_jaccard_similarity_score", + "none-samples_jaccard_similarity_score", "average_precision_score", "weighted_average_precision_score", @@ -232,6 +236,7 @@ "samples_roc_auc", "samples_jaccard_similarity_score", + "none-samples_jaccard_similarity_score", # with default average='binary', multiclass is prohibited "precision_score", @@ -305,6 +310,7 @@ METRICS_WITH_NORMALIZE_OPTION = [ "accuracy_score", "jaccard_similarity_score", + "samples_jaccard_similarity_score", "zero_one_loss", ] @@ -344,7 +350,8 @@ "samples_f0.5_score", "samples_f1_score", "samples_f2_score", "samples_precision_score", "samples_recall_score", - "samples_jaccard_similarity_score" + "samples_jaccard_similarity_score", + "none-samples_jaccard_similarity_score" ] # Regression metrics with "multioutput-continuous" format support From ce89b5f23a91d02e352b5bbb39bbf6db1bd06d6d Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Thu, 28 Dec 2017 18:01:45 +0530 Subject: [PATCH 23/88] add support for `labels` in multilabel classification --- sklearn/metrics/classification.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 73c6203e9d947..62696561ac366 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -530,9 +530,24 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, normalize = True # default average in multilabel is 'samples' + if not np.all(labels == present_labels): + if np.max(labels) > np.max(present_labels): + raise ValueError('All labels must be in [0, n, labels). ' + 'Got %d > %d' % + (np.max(labels), np.max(present_labels))) + if np.min(labels) < 0: + raise ValueError('All labels must be in [0, n, labels). ' + 'Got %d < 0' % np.min(labels)) + + # wait for response on 'prf-bug' PR, since I'm less than 90% sure + if n_labels is not None: + y_true = y_true[:, labels[:n_labels]] + y_pred = y_pred[:, labels[:n_labels]] + with np.errstate(divide='ignore', invalid='ignore'): sum_axis = 1 if average == 'samples' else 0 + pred_or_true = count_nonzero(y_true + y_pred, axis=sum_axis) pred_and_true = count_nonzero(y_true.multiply(y_pred), axis=sum_axis) From 192bb2dee47536341a0e56aec5d3b2eec90001b3 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sat, 30 Dec 2017 13:44:36 +0530 Subject: [PATCH 24/88] fix multilablel classification labels, sample_weight seems to be working fine, though haven't fully testing them again, will do in next commit --- sklearn/metrics/classification.py | 71 +++++++++++++++----- sklearn/metrics/tests/test_classification.py | 22 +++++- 2 files changed, 74 insertions(+), 19 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 62696561ac366..8739f5a02b9e4 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -524,11 +524,21 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, assume_unique=True)]) if y_type.startswith('multilabel'): - if average is None: - average = 'samples' - if average == 'samples' and normalize is None: - normalize = True - # default average in multilabel is 'samples' + if average in (None, 'samples'): + if normalize is None: + # default is average='samples' + average = 'samples' + else: + if normalize: + average = 'samples' + else: + average = 'none-samples' + warn_message = ("'normalize' was removed in version 0.20 and " + "will be removed in 0.22, instead use " + "`average='%s'`." % normalize) + warnings.warn(warn_message, DeprecationWarning) + # else: + # otherwise what should we do? raise warning or ValueError? if not np.all(labels == present_labels): if np.max(labels) > np.max(present_labels): @@ -545,28 +555,53 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, y_pred = y_pred[:, labels[:n_labels]] with np.errstate(divide='ignore', invalid='ignore'): - sum_axis = 1 if average == 'samples' else 0 - - - pred_or_true = count_nonzero(y_true + y_pred, axis=sum_axis) - pred_and_true = count_nonzero(y_true.multiply(y_pred), - axis=sum_axis) if average == 'samples': + pred_or_true = count_nonzero(y_true + y_pred, axis=1) + pred_and_true = count_nonzero(y_true.multiply(y_pred), + axis=1) + score = pred_and_true / pred_or_true + score[pred_or_true == 0.0] == 1.0 + return _weighted_sum(score, sample_weight, normalize=True) + elif average == 'none-samples': + pred_or_true = count_nonzero(y_true + y_pred, axis=1) + pred_and_true = count_nonzero(y_true.multiply(y_pred), + axis=1) score = pred_and_true / pred_or_true - score[pred_or_true == 0.0] = 1.0 - return _weighted_sum(score, sample_weight, normalize=normalize) + score[pred_or_true == 0.0] == 1.0 + return _weighted_sum(score, sample_weight, normalize=False) + elif average == 'micro': + pred_or_true = count_nonzero(y_true + y_pred, axis=1, + sample_weight=sample_weight) + pred_and_true = count_nonzero(y_true.multiply(y_pred), + axis=1, + sample_weight=sample_weight) + if np.sum(pred_or_true): + score = np.sum(pred_and_true) / np.sum(pred_or_true) + else: + score = 1. + return score elif average == 'macro': + pred_or_true = count_nonzero(y_true + y_pred, axis=0, + sample_weight=sample_weight) + pred_and_true = count_nonzero(y_true.multiply(y_pred), + axis=0, + sample_weight=sample_weight) score = pred_and_true / pred_or_true + score[pred_or_true == 0.0] == 1.0 n_features = y_true.shape[1] return np.sum(score) / n_features - elif average == 'micro': - score = np.sum(pred_and_true) / np.sum(pred_or_true) - return score else: - # average='weighted' + pred_or_true = count_nonzero(y_true + y_pred, axis=0, + sample_weight=sample_weight) + pred_and_true = count_nonzero(y_true.multiply(y_pred), + axis=0, + sample_weight=sample_weight) score = pred_and_true / pred_or_true - score = _weighted_sum(score, sample_weight, normalize=True) + score[pred_or_true == 0.0] == 1.0 + weights = y_true.toarray().sum(axis=0) + score = _weighted_sum(score, sample_weight=weights, + normalize=True) return score elif average == 'samples': raise ValueError("Sample-based jaccard similarity score is " diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index c4e1556f36625..df78094fc1148 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1000,10 +1000,30 @@ def test_multilabel_jaccard_similarity_score(): assert_almost_equal(jaccard_similarity_score(y_true, y_pred), 7. / 12) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, sample_weight=np.array([0.1, 0.9])), 31. / 60) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='samples', + labels=[0, 2]), 1. / 2) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='samples', + labels=[1, 2]), 1. / 2) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='samples', + sample_weight=[1, 2]), 5. / 9) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='micro', + sample_weight=[1, 2]), 4. / 7) + y_true = np.array([[0, 1, 1], [1, 0, 1]]) + y_pred = np.array([[1, 1, 1], [1, 0, 1]]) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='macro'), 5. / 6) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='macro', + sample_weight=[1, 2]), 8. / 9) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='weighted'), 7. / 8) # average='binary' (wrong example) # assert_almost_equal(jaccard_similarity_score(y_true, y_pred, # average='binary', pos_label=1), 1. / 2) - # average='weighted' def test_multiclass_jaccard_similarity_score(): From 149af2aeed74b91520dfd7954fb05719df806a22 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sun, 31 Dec 2017 15:47:11 +0530 Subject: [PATCH 25/88] fix for multiclass --- sklearn/metrics/classification.py | 58 ++++++++++++++------ sklearn/metrics/tests/test_classification.py | 4 +- 2 files changed, 42 insertions(+), 20 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 8739f5a02b9e4..9035f2f79c37f 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -592,6 +592,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, n_features = y_true.shape[1] return np.sum(score) / n_features else: + # average = 'weighted' pred_or_true = count_nonzero(y_true + y_pred, axis=0, sample_weight=sample_weight) pred_and_true = count_nonzero(y_true.multiply(y_pred), @@ -608,26 +609,47 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, "not meaningful outside multilabel " "classification. See the accuracy_score instead.") else: - C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) + le = LabelEncoder() + le.fit(labels) + y_true = le.transform(y_true) + y_pred = le.transform(y_pred) + sorted_labels = le.classes_ + + tp = y_true == y_pred + tp_bins = y_true[tp] + if sample_weight is not None: + tp_bins_weights = np.asarray(sample_weight)[tp] + else: + tp_bins_weights = None + + if len(tp_bins): + tp_sum = np.bincount(tp_bins, weights=tp_bins_weights, + minlength=len(labels)) + else: + true_sum = pred_sum = tp_sum = np.zeros(len(labels)) + if len(y_pred): + pred_sum = np.bincount(y_pred, weights=sample_weight, + minlength=len(labels)) + if len(y_true): + true_sum = np.bincount(y_true, weights=sample_weight, + minlength=len(labels)) + + indices = np.searchsorted(sorted_labels, labels[:n_labels]) + tp_sum = tp_sum[indices] + true_sum = true_sum[indices] + pred_sum = pred_sum[indices] + den = true_sum + pred_sum - tp_sum + if average == 'macro': - den = C.sum(0) + C.sum(1) - C.diagonal() - score = C.diagonal() / den + score = tp_sum / den return np.average(score) - elif average == 'micro': - # micro-average on all labels is not useful in the - # multiclass case. It is identical to accuracy. - score = y_true == y_pred - return _weighted_sum(score, sample_weight, normalize) - elif average == 'weighted': - # computation similar to average='macro', apart from computation - # of sample_weight below - den = C.sum(0) + C.sum(1) - C.diagonal() - score = C.diagonal() / den - if sample_weight is None: - _, y_true = np.unique(y_true, return_inverse=True) - num = np.bincount(y_true) - sample_weight = num / np.sum(num) - return np.sum(sample_weight*score) + if average == 'weighted': + pass + + if average == 'micro': + tp_sum = tp_sum.sum() + score = tp_sum / den + return score else: raise ValueError("In multiclass classification average must be " "one of ('micro', 'macro', 'weighted'), got " diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index df78094fc1148..4cf9abbf66ed4 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1027,8 +1027,8 @@ def test_multilabel_jaccard_similarity_score(): def test_multiclass_jaccard_similarity_score(): - y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] - y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] + y_true = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] + y_pred = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='macro'), 7. / 18) assert_equal(jaccard_similarity_score(y_true, y_pred, From 40fca72ceaad020e92e5cd9cda3a695e8fd7ce30 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Mon, 1 Jan 2018 19:00:49 +0530 Subject: [PATCH 26/88] corrected 'macro', 'weighted' for multiclass only 'micro' remains --- sklearn/metrics/classification.py | 14 ++++++++----- sklearn/metrics/tests/test_classification.py | 22 ++++++++++++++------ 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 9035f2f79c37f..42131331a7000 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -433,7 +433,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, warn : bool, optional (default=True), for internal use This determines whether warning will be raised or not, - normalize: bool, optional (defaul=True) + normalize : bool, optional (defaul=True) If ``False``, return the sum of the Jaccard similarity coefficient over the sample set. Otherwise, return the average of Jaccard similarity coefficient. This is only to be specified in case @@ -615,6 +615,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, y_pred = le.transform(y_pred) sorted_labels = le.classes_ + # labels are now from 0 to len(labels) - 1 tp = y_true == y_pred tp_bins = y_true[tp] if sample_weight is not None: @@ -635,18 +636,21 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, minlength=len(labels)) indices = np.searchsorted(sorted_labels, labels[:n_labels]) - tp_sum = tp_sum[indices] true_sum = true_sum[indices] pred_sum = pred_sum[indices] - den = true_sum + pred_sum - tp_sum + tp_sum = tp_sum[indices] if average == 'macro': + den = true_sum + pred_sum - tp_sum score = tp_sum / den return np.average(score) if average == 'weighted': - pass - + den = true_sum + pred_sum - tp_sum + score = tp_sum / den + return _weighted_sum(score, sample_weight=true_sum, normalize=True) + # wrong logic for 'micro' if average == 'micro': + den = (true_sum + pred_sum - tp_sum).sum() tp_sum = tp_sum.sum() score = tp_sum / den return score diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 4cf9abbf66ed4..7f8874317f083 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1027,14 +1027,24 @@ def test_multilabel_jaccard_similarity_score(): def test_multiclass_jaccard_similarity_score(): - y_true = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] - y_pred = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] + y_true = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat', 'bird', 'bird'] + y_pred = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird', 'bird', 'cat'] assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='macro'), 7. / 18) - assert_equal(jaccard_similarity_score(y_true, y_pred, - average='micro'), 1. / 2) + average='macro'), 7. / 15) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='weighted'), 17. / 36) + average='macro', + labels=['ant', 'bird']), + 1. / 2) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='macro', + labels=['ant', 'cat']), + 8. / 15) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='macro', + labels=['cat', 'bird']), + 11. / 30) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='weighted'), 29. / 60) msg = ("In multiclass classification average must be one of " "('micro', 'macro', 'weighted'), got average=None.") assert_raise_message(ValueError, msg, jaccard_similarity_score, y_true, From 4b504472d3fb49849766f1b4c333d250c2c3dcf1 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 2 Jan 2018 12:47:09 +0530 Subject: [PATCH 27/88] fix completely logic of average='micro', now only 'binary' remains --- sklearn/metrics/classification.py | 23 +++++++++++------ sklearn/metrics/tests/test_classification.py | 27 +++++++++++++++++--- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 42131331a7000..0e13536867fe3 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -636,24 +636,31 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, minlength=len(labels)) indices = np.searchsorted(sorted_labels, labels[:n_labels]) + tp_sum = tp_sum[indices] + + if average == 'micro': + tp_sum = tp_sum.sum() + labels = le.transform(labels[:n_labels]) + union_indices = np.where(np.isin(y_true, labels) + + np.isin(y_pred, labels) == True)[0] + if sample_weight is not None: + den = sample_weight[union_indices].sum() + else: + den = len(union_indices) + score = tp_sum / den + return score + true_sum = true_sum[indices] pred_sum = pred_sum[indices] - tp_sum = tp_sum[indices] if average == 'macro': den = true_sum + pred_sum - tp_sum score = tp_sum / den return np.average(score) - if average == 'weighted': + elif average == 'weighted': den = true_sum + pred_sum - tp_sum score = tp_sum / den return _weighted_sum(score, sample_weight=true_sum, normalize=True) - # wrong logic for 'micro' - if average == 'micro': - den = (true_sum + pred_sum - tp_sum).sum() - tp_sum = tp_sum.sum() - score = tp_sum / den - return score else: raise ValueError("In multiclass classification average must be " "one of ('micro', 'macro', 'weighted'), got " diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 7f8874317f083..172c8b4954a41 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1021,9 +1021,6 @@ def test_multilabel_jaccard_similarity_score(): sample_weight=[1, 2]), 8. / 9) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='weighted'), 7. / 8) - # average='binary' (wrong example) -# assert_almost_equal(jaccard_similarity_score(y_true, y_pred, -# average='binary', pos_label=1), 1. / 2) def test_multiclass_jaccard_similarity_score(): @@ -1045,6 +1042,30 @@ def test_multiclass_jaccard_similarity_score(): 11. / 30) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='weighted'), 29. / 60) + + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='micro', + labels=['ant', 'cat']), + 4. / 7) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='micro', + labels=['cat']), 2. / 5) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='micro', + labels=['ant']), 2. / 3) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='micro', + labels=['bird']), 1. / 3) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='micro', + labels=['ant', 'bird']), + 1. / 2) + weight = np.array([1, 2, 1, 1, 2, 1, 2, 3]) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='micro', + labels=['ant', 'bird'], + sample_weight=weight), + 6. / 11) msg = ("In multiclass classification average must be one of " "('micro', 'macro', 'weighted'), got average=None.") assert_raise_message(ValueError, msg, jaccard_similarity_score, y_true, From fd099e5f6f7594761eadcce70d3defdad35a3378 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 2 Jan 2018 12:48:23 +0530 Subject: [PATCH 28/88] remove 'warn' from API, after discussion on PR with jnothman --- sklearn/metrics/classification.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 0e13536867fe3..4e09d2d76e2cb 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -373,8 +373,7 @@ class labels [2]_. def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, - average=None, warn=True, normalize=None, - sample_weight=None): + average=None, normalize=None, sample_weight=None): """Jaccard similarity coefficient score The Jaccard index [1], or Jaccard similarity coefficient, defined as @@ -430,9 +429,6 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, meaningful for multilabel classification). ``'none-samples'``: - warn : bool, optional (default=True), for internal use - This determines whether warning will be raised or not, - normalize : bool, optional (defaul=True) If ``False``, return the sum of the Jaccard similarity coefficient over the sample set. Otherwise, return the average of Jaccard From 8c9c6145e15f82787ff706f7256e70580ecd47a8 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 2 Jan 2018 15:35:01 +0530 Subject: [PATCH 29/88] fix average='binary' --- sklearn/metrics/classification.py | 2 +- sklearn/metrics/tests/test_classification.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 4e09d2d76e2cb..84f95673d71d7 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -634,7 +634,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, indices = np.searchsorted(sorted_labels, labels[:n_labels]) tp_sum = tp_sum[indices] - if average == 'micro': + if average == 'micro' or average == 'binary': tp_sum = tp_sum.sum() labels = le.transform(labels[:n_labels]) union_indices = np.where(np.isin(y_true, labels) + diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 172c8b4954a41..843164079c054 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1070,6 +1070,13 @@ def test_multiclass_jaccard_similarity_score(): "('micro', 'macro', 'weighted'), got average=None.") assert_raise_message(ValueError, msg, jaccard_similarity_score, y_true, y_pred) + y_true = np.array([1, 0, 1, 1, 0]) + y_pred = np.array([1, 0, 1, 1, 1]) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='binary'), 3. / 4) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='binary', + pos_label=0), 1. / 2) @ignore_warnings From a7d3b4057ffc46491c128c1155e0fec7a68ee713 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 2 Jan 2018 15:45:38 +0530 Subject: [PATCH 30/88] fix doctest, now test_common and lgtm remain to be fixed --- sklearn/metrics/classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 84f95673d71d7..bdac1b23d98a8 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -474,7 +474,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, >>> jaccard_similarity_score(y_true, y_pred, average='macro') 0.5 >>> jaccard_similarity_score(y_true, y_pred, average='micro') - ... # doctest: +ELLIPSIS + 0.5 >>> y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] >>> y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] From 8a7e67389d48fdf657c7e66fddcb0f4fa96da970 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 3 Jan 2018 00:41:09 +0530 Subject: [PATCH 31/88] this fixes lgtm? --- sklearn/metrics/classification.py | 47 ++++++++++++++----------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index bdac1b23d98a8..bcc67d8c2a397 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -609,12 +609,17 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, le.fit(labels) y_true = le.transform(y_true) y_pred = le.transform(y_pred) - sorted_labels = le.classes_ - # labels are now from 0 to len(labels) - 1 + labels = le.transform(labels)[:n_labels] + indices = np.where(np.isin(y_true, labels) + + np.isin(y_pred, labels) == True)[0] + + y_true = y_true[indices] + y_pred = y_pred[indices] tp = y_true == y_pred tp_bins = y_true[tp] if sample_weight is not None: + sample_weight = sample_weight[indices] tp_bins_weights = np.asarray(sample_weight)[tp] else: tp_bins_weights = None @@ -623,7 +628,9 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, tp_sum = np.bincount(tp_bins, weights=tp_bins_weights, minlength=len(labels)) else: + # pathological case true_sum = pred_sum = tp_sum = np.zeros(len(labels)) + if len(y_pred): pred_sum = np.bincount(y_pred, weights=sample_weight, minlength=len(labels)) @@ -631,36 +638,24 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, true_sum = np.bincount(y_true, weights=sample_weight, minlength=len(labels)) - indices = np.searchsorted(sorted_labels, labels[:n_labels]) - tp_sum = tp_sum[indices] - if average == 'micro' or average == 'binary': - tp_sum = tp_sum.sum() - labels = le.transform(labels[:n_labels]) - union_indices = np.where(np.isin(y_true, labels) + - np.isin(y_pred, labels) == True)[0] - if sample_weight is not None: - den = sample_weight[union_indices].sum() - else: - den = len(union_indices) - score = tp_sum / den - return score - - true_sum = true_sum[indices] - pred_sum = pred_sum[indices] - - if average == 'macro': - den = true_sum + pred_sum - tp_sum - score = tp_sum / den - return np.average(score) + num = np.array([tp_sum.sum()]) + den = np.array([true_sum.sum()]) + weights = None + elif average == 'macro': + num = tp_sum[labels] + den = true_sum[labels] + pred_sum[labels] - tp_sum[labels] + weights = None elif average == 'weighted': - den = true_sum + pred_sum - tp_sum - score = tp_sum / den - return _weighted_sum(score, sample_weight=true_sum, normalize=True) + num = tp_sum[labels] + den = true_sum[labels] + pred_sum[labels] - tp_sum[labels] + weights = true_sum[labels] else: raise ValueError("In multiclass classification average must be " "one of ('micro', 'macro', 'weighted'), got " "average=%s." % average) + score = num / den + return np.average(score, weights=weights) def matthews_corrcoef(y_true, y_pred, sample_weight=None): From 6e75c5a277bb68cbdb45ee289176b8325f592c6f Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sun, 7 Jan 2018 20:40:57 +0530 Subject: [PATCH 32/88] fix average='micro' for multiclass jaccard --- sklearn/metrics/classification.py | 21 ++++++++++---------- sklearn/metrics/tests/test_classification.py | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index bcc67d8c2a397..48217e10ce766 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -474,7 +474,8 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, >>> jaccard_similarity_score(y_true, y_pred, average='macro') 0.5 >>> jaccard_similarity_score(y_true, y_pred, average='micro') - 0.5 + ... # doctest: +ELLIPSIS + 0.333... >>> y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] >>> y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] @@ -626,30 +627,30 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, if len(tp_bins): tp_sum = np.bincount(tp_bins, weights=tp_bins_weights, - minlength=len(labels)) + minlength=len(labels))[labels] else: # pathological case true_sum = pred_sum = tp_sum = np.zeros(len(labels)) if len(y_pred): pred_sum = np.bincount(y_pred, weights=sample_weight, - minlength=len(labels)) + minlength=len(labels))[labels] if len(y_true): true_sum = np.bincount(y_true, weights=sample_weight, - minlength=len(labels)) + minlength=len(labels))[labels] if average == 'micro' or average == 'binary': num = np.array([tp_sum.sum()]) - den = np.array([true_sum.sum()]) + den = np.array([true_sum.sum() + pred_sum.sum() - tp_sum.sum()]) weights = None elif average == 'macro': - num = tp_sum[labels] - den = true_sum[labels] + pred_sum[labels] - tp_sum[labels] + num = tp_sum + den = true_sum + pred_sum - tp_sum weights = None elif average == 'weighted': - num = tp_sum[labels] - den = true_sum[labels] + pred_sum[labels] - tp_sum[labels] - weights = true_sum[labels] + num = tp_sum + den = true_sum + pred_sum - tp_sum + weights = true_sum else: raise ValueError("In multiclass classification average must be " "one of ('micro', 'macro', 'weighted'), got " diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 843164079c054..90a216c2df9d2 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1046,7 +1046,7 @@ def test_multiclass_jaccard_similarity_score(): assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='micro', labels=['ant', 'cat']), - 4. / 7) + 1. / 2) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='micro', labels=['cat']), 2. / 5) From c80059839fe2ee83898640f5ea974dd70d1d3728 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sun, 7 Jan 2018 21:36:12 +0530 Subject: [PATCH 33/88] add smart tests --- sklearn/metrics/tests/test_classification.py | 54 +++++++------------- 1 file changed, 19 insertions(+), 35 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 90a216c2df9d2..9c5e719e7d3e5 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -10,7 +10,7 @@ from sklearn import svm from sklearn.datasets import make_multilabel_classification -from sklearn.preprocessing import label_binarize +from sklearn.preprocessing import label_binarize, LabelBinarizer from sklearn.utils.validation import check_random_state from sklearn.utils.testing import assert_raises, clean_warning_registry @@ -1026,40 +1026,24 @@ def test_multilabel_jaccard_similarity_score(): def test_multiclass_jaccard_similarity_score(): y_true = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat', 'bird', 'bird'] y_pred = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird', 'bird', 'cat'] - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='macro'), 7. / 15) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='macro', - labels=['ant', 'bird']), - 1. / 2) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='macro', - labels=['ant', 'cat']), - 8. / 15) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='macro', - labels=['cat', 'bird']), - 11. / 30) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='weighted'), 29. / 60) - - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='micro', - labels=['ant', 'cat']), - 1. / 2) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='micro', - labels=['cat']), 2. / 5) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='micro', - labels=['ant']), 2. / 3) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='micro', - labels=['bird']), 1. / 3) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='micro', - labels=['ant', 'bird']), - 1. / 2) + labels = ['ant', 'bird', 'cat'] + lb = LabelBinarizer() + lb.fit(labels) + y_true_bin = lb.transform(y_true) + y_pred_bin = lb.transform(y_pred) + multi_jaccard_similarity_score = partial(jaccard_similarity_score, y_true, + y_pred) + bin_jaccard_similarity_score = partial(jaccard_similarity_score, y_true_bin + , y_pred_bin) + multi_labels_list = [['ant', 'bird'], ['ant', 'cat'], ['cat', 'bird'], + ['ant'], ['bird'], ['cat'], None] + bin_labels_list = [[0, 1], [0, 2], [2, 1], [0], [1], [2], None] + for average in ('macro', 'weighted', 'micro'): + for m_label, b_label in zip(multi_labels_list, bin_labels_list): + assert_almost_equal(multi_jaccard_similarity_score(average=average, + labels=m_label), + bin_jaccard_similarity_score(average=average, + labels=b_label)) weight = np.array([1, 2, 1, 1, 2, 1, 2, 3]) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='micro', From c3fa41d195d96a80d9bfc196409250a2c214e21e Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Mon, 8 Jan 2018 16:16:12 +0530 Subject: [PATCH 34/88] first fix for test_common --- sklearn/metrics/tests/test_common.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 83cffc2e4fca0..ff03ac4ce0cbd 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -224,6 +224,11 @@ "label_ranking_average_precision_score", ] +# Those metrics don't support average=None for multiclass input +NONE_AVERAGE_UNDEFINED_MULTICLASS = [ + "jaccard_similarity_score" +] + # Those metrics don't support multiclass inputs METRIC_UNDEFINED_MULTICLASS = [ "brier_score_loss", @@ -1142,9 +1147,14 @@ def test_no_averaging_labels(): [y_true_multilabel, y_pred_multilabel]]: if name not in MULTILABELS_METRICS and y_pred.ndim > 1: continue + if name in NONE_AVERAGE_UNDEFINED_MULTICLASS and y_pred.ndim < 2: + continue metric = ALL_METRICS[name] score_labels = metric(y_true, y_pred, labels=labels, average=None) score = metric(y_true, y_pred, average=None) - assert_array_equal(score_labels, score[inverse_labels]) + if isinstance(score, np.ndarray): + assert_array_equal(score_labels, score[inverse_labels]) + else: + assert_almost_equal(score_labels, score) From 27ffebf1e3f8113efc9a3bad9a4005e0c2839afd Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Mon, 8 Jan 2018 17:31:58 +0530 Subject: [PATCH 35/88] fixes LGTM errors? --- sklearn/metrics/classification.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 48217e10ce766..4663e9e705b92 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -640,22 +640,19 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, minlength=len(labels))[labels] if average == 'micro' or average == 'binary': - num = np.array([tp_sum.sum()]) - den = np.array([true_sum.sum() + pred_sum.sum() - tp_sum.sum()]) + tp_sum = np.array([tp_sum.sum()]) + true_sum = np.array([true_sum.sum()]) + pred_sum = np.array([pred_sum.sum()]) weights = None elif average == 'macro': - num = tp_sum - den = true_sum + pred_sum - tp_sum weights = None elif average == 'weighted': - num = tp_sum - den = true_sum + pred_sum - tp_sum weights = true_sum else: raise ValueError("In multiclass classification average must be " "one of ('micro', 'macro', 'weighted'), got " "average=%s." % average) - score = num / den + score = tp_sum / (true_sum + pred_sum - tp_sum) return np.average(score, weights=weights) From e017ccfe2e104f27e310ac3446f381130014b213 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 9 Jan 2018 11:44:53 +0530 Subject: [PATCH 36/88] remove warning from tests/test_classification.py --- sklearn/metrics/tests/test_classification.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 9c5e719e7d3e5..845818b4e52a7 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -939,7 +939,6 @@ def test_multilabel_hamming_loss(): assert_warns(DeprecationWarning, hamming_loss, y1, y2, classes=[0, 1]) -@ignore_warnings def test_jaccard_similarity_score(): y_true = np.array([0, 0]) y_pred = np.array([0, 0]) From 9ee4c11831334d705868a97e5932a8840ae12727 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 10 Jan 2018 18:51:10 +0530 Subject: [PATCH 37/88] simplify code for multilabel jaccard --- sklearn/metrics/classification.py | 94 +++++++++----------- sklearn/metrics/tests/test_classification.py | 6 +- sklearn/metrics/tests/test_common.py | 12 +-- 3 files changed, 45 insertions(+), 67 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 4663e9e705b92..2e9f5391a4569 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -482,6 +482,13 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, >>> jaccard_similarity_score(y_true, y_pred, average='weighted') ... # doctest: +ELLIPSIS 0.4722... + >>> jaccard_similarity_score(y_true, y_pred) + ... # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE + array([ 0.66..., 0. , 0.5 ]) + >>> jaccard_similarity_score(y_true, y_pred, + ... labels=['ant', 'cat', 'bird']) + ... # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE + array([ 0.66..., 0.5 , 0. ]) """ average_options = (None, 'micro', 'macro', 'weighted', 'samples', @@ -546,61 +553,44 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, raise ValueError('All labels must be in [0, n, labels). ' 'Got %d < 0' % np.min(labels)) - # wait for response on 'prf-bug' PR, since I'm less than 90% sure if n_labels is not None: y_true = y_true[:, labels[:n_labels]] y_pred = y_pred[:, labels[:n_labels]] with np.errstate(divide='ignore', invalid='ignore'): - if average == 'samples': - pred_or_true = count_nonzero(y_true + y_pred, axis=1) - pred_and_true = count_nonzero(y_true.multiply(y_pred), - axis=1) - score = pred_and_true / pred_or_true - score[pred_or_true == 0.0] == 1.0 - return _weighted_sum(score, sample_weight, normalize=True) - elif average == 'none-samples': - pred_or_true = count_nonzero(y_true + y_pred, axis=1) - pred_and_true = count_nonzero(y_true.multiply(y_pred), - axis=1) - score = pred_and_true / pred_or_true - score[pred_or_true == 0.0] == 1.0 - return _weighted_sum(score, sample_weight, normalize=False) + if average == 'samples' or average == 'none-samples': + sum_axis = 1 + class_weight = sample_weight + weights = None elif average == 'micro': - pred_or_true = count_nonzero(y_true + y_pred, axis=1, - sample_weight=sample_weight) - pred_and_true = count_nonzero(y_true.multiply(y_pred), - axis=1, - sample_weight=sample_weight) - if np.sum(pred_or_true): - score = np.sum(pred_and_true) / np.sum(pred_or_true) - else: - score = 1. - return score + sum_axis = 1 + class_weight = None + weights = sample_weight elif average == 'macro': - pred_or_true = count_nonzero(y_true + y_pred, axis=0, - sample_weight=sample_weight) - pred_and_true = count_nonzero(y_true.multiply(y_pred), - axis=0, - sample_weight=sample_weight) - score = pred_and_true / pred_or_true - score[pred_or_true == 0.0] == 1.0 - n_features = y_true.shape[1] - return np.sum(score) / n_features - else: - # average = 'weighted' - pred_or_true = count_nonzero(y_true + y_pred, axis=0, - sample_weight=sample_weight) - pred_and_true = count_nonzero(y_true.multiply(y_pred), - axis=0, - sample_weight=sample_weight) - score = pred_and_true / pred_or_true - score[pred_or_true == 0.0] == 1.0 - weights = y_true.toarray().sum(axis=0) - score = _weighted_sum(score, sample_weight=weights, - normalize=True) - return score + sum_axis = 0 + class_weight = None + weights = sample_weight + elif average == 'weighted': + sum_axis = 0 + class_weight = y_true.toarray().sum(axis=0) + weights = sample_weight + + pred_or_true = count_nonzero(y_true + y_pred, axis=sum_axis, + sample_weight=weights) + pred_and_true = count_nonzero(y_true.multiply(y_pred), + axis=sum_axis, + sample_weight=weights) + if average == 'micro': + pred_or_true = np.array([pred_or_true.sum()]) + pred_and_true = np.array([pred_and_true.sum()]) + + score = pred_and_true / pred_or_true + score[pred_or_true == 0.0] == 1.0 + + if average != 'none-samples': + score = np.average(score, weights=class_weight) + return score elif average == 'samples': raise ValueError("Sample-based jaccard similarity score is " "not meaningful outside multilabel " @@ -648,12 +638,12 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, weights = None elif average == 'weighted': weights = true_sum - else: - raise ValueError("In multiclass classification average must be " - "one of ('micro', 'macro', 'weighted'), got " - "average=%s." % average) + score = tp_sum / (true_sum + pred_sum - tp_sum) - return np.average(score, weights=weights) + + if average is not None: + score = np.average(score, weights=weights) + return score def matthews_corrcoef(y_true, y_pred, sample_weight=None): diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 845818b4e52a7..3ecfaa3941533 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1049,10 +1049,8 @@ def test_multiclass_jaccard_similarity_score(): labels=['ant', 'bird'], sample_weight=weight), 6. / 11) - msg = ("In multiclass classification average must be one of " - "('micro', 'macro', 'weighted'), got average=None.") - assert_raise_message(ValueError, msg, jaccard_similarity_score, y_true, - y_pred) + assert_array_equal(jaccard_similarity_score(y_true, y_pred), + np.array([2. / 3, 1. / 3, 2. / 5])) y_true = np.array([1, 0, 1, 1, 0]) y_pred = np.array([1, 0, 1, 1, 1]) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index ff03ac4ce0cbd..83cffc2e4fca0 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -224,11 +224,6 @@ "label_ranking_average_precision_score", ] -# Those metrics don't support average=None for multiclass input -NONE_AVERAGE_UNDEFINED_MULTICLASS = [ - "jaccard_similarity_score" -] - # Those metrics don't support multiclass inputs METRIC_UNDEFINED_MULTICLASS = [ "brier_score_loss", @@ -1147,14 +1142,9 @@ def test_no_averaging_labels(): [y_true_multilabel, y_pred_multilabel]]: if name not in MULTILABELS_METRICS and y_pred.ndim > 1: continue - if name in NONE_AVERAGE_UNDEFINED_MULTICLASS and y_pred.ndim < 2: - continue metric = ALL_METRICS[name] score_labels = metric(y_true, y_pred, labels=labels, average=None) score = metric(y_true, y_pred, average=None) - if isinstance(score, np.ndarray): - assert_array_equal(score_labels, score[inverse_labels]) - else: - assert_almost_equal(score_labels, score) + assert_array_equal(score_labels, score[inverse_labels]) From d1311c7ad6e9d8143705234afc6e9f258f7b46f9 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Thu, 11 Jan 2018 15:35:14 +0530 Subject: [PATCH 38/88] address Joel's comments --- sklearn/metrics/classification.py | 52 +++++++++++--------- sklearn/metrics/tests/test_classification.py | 17 ++++++- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 2e9f5391a4569..d74d6ac9dc5fc 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -373,7 +373,8 @@ class labels [2]_. def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, - average=None, normalize=None, sample_weight=None): + average='samples', normalize=None, + sample_weight=None): """Jaccard similarity coefficient score The Jaccard index [1], or Jaccard similarity coefficient, defined as @@ -406,7 +407,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None (default), 'binary', 'micro', 'macro', 'samples', \ + average : string, ['samples' (default), 'binary', 'micro', 'macro', None, \ 'weighted'] If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -428,16 +429,20 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, Calculate metrics for each instance, and find their average (only meaningful for multilabel classification). ``'none-samples'``: + Calculate metrics for each instance, (only meaningful for + multilabel classification). This differs from 'samples' to return + array of class-wise jaccard index, instead of normalzing the array. - normalize : bool, optional (defaul=True) + normalize : None, bool, optional (defaul=True) If ``False``, return the sum of the Jaccard similarity coefficient over the sample set. Otherwise, return the average of Jaccard similarity coefficient. This is only to be specified in case `average='samples'`. .. versionchanged: 0.20 - 'normalize' is deprecated and will be removed in 0.22, instead use - `average='none-samples'` + 'normalize' is deprecated and will be removed in 0.22, instead of + `normalize=True` use instead just `average='samples'` and for + `normalize=False` use instead `average='none-samples'`. sample_weight : array-like of shape = [n_samples], optional Sample weights. @@ -528,21 +533,18 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, assume_unique=True)]) if y_type.startswith('multilabel'): - if average in (None, 'samples'): - if normalize is None: - # default is average='samples' - average = 'samples' - else: - if normalize: - average = 'samples' - else: + if normalize is not None: + if average == 'samples': + if not normalize: average = 'none-samples' - warn_message = ("'normalize' was removed in version 0.20 and " - "will be removed in 0.22, instead use " - "`average='%s'`." % normalize) - warnings.warn(warn_message, DeprecationWarning) - # else: - # otherwise what should we do? raise warning or ValueError? + else: + raise ValueError("normalize != None' is only meaningful with " + "`average='samples'`, got `average='%s'`." + % average) + warn_message = ("'normalize' was deprecated in version 0.20 and " + "will be removed in 0.22, instead use " + "`average='%s'`." % average) + warnings.warn(warn_message, DeprecationWarning) if not np.all(labels == present_labels): if np.max(labels) > np.max(present_labels): @@ -575,6 +577,9 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, sum_axis = 0 class_weight = y_true.toarray().sum(axis=0) weights = sample_weight + else: + sum_axis = 0 + weights = sample_weight pred_or_true = count_nonzero(y_true + y_pred, axis=sum_axis, sample_weight=weights) @@ -588,10 +593,13 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, score = pred_and_true / pred_or_true score[pred_or_true == 0.0] == 1.0 - if average != 'none-samples': - score = np.average(score, weights=class_weight) + if average is not None: + if average == 'none-samples': + score = np.sum(score) + else: + score = np.average(score, weights=class_weight) return score - elif average == 'samples': + elif average == 'samples' or average == 'none-samples': raise ValueError("Sample-based jaccard similarity score is " "not meaningful outside multilabel " "classification. See the accuracy_score instead.") diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 3ecfaa3941533..fe389f3da81e6 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -951,6 +951,11 @@ def test_jaccard_similarity_score(): "Please choose another average setting.") assert_raise_message(ValueError, msg1, jaccard_similarity_score, y_true, y_pred, average='binary', pos_label=-1) + assert_warns_message(DeprecationWarning, + "'normalize' was deprecated in version 0.20 and will " + "be removed in 0.22, instead use `average='samples'`." + , jaccard_similarity_score, y_true, y_pred, + average='samples', normalize=True) y_true = np.array([0, 1, 1, 0, 2]) y_pred = np.array([1, 1, 1, 1, 0]) @@ -962,6 +967,8 @@ def test_jaccard_similarity_score(): "multilabel classification. See the accuracy_score instead.") assert_raise_message(ValueError, msg3, jaccard_similarity_score, y_true, y_pred, average='samples') + assert_raise_message(ValueError, msg3, jaccard_similarity_score, y_true, + y_pred, average='none-samples') assert_warns_message(UserWarning, "Note that pos_label (set to 3) is ignored when " @@ -1008,6 +1015,11 @@ def test_multilabel_jaccard_similarity_score(): assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='samples', sample_weight=[1, 2]), 5. / 9) + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='none-samples'), + 35. / 30) + assert_array_equal(jaccard_similarity_score(y_true, y_pred, average=None), + np.array([1. / 2, 1., 1. / 2])) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='micro', sample_weight=[1, 2]), 4. / 7) @@ -1049,8 +1061,11 @@ def test_multiclass_jaccard_similarity_score(): labels=['ant', 'bird'], sample_weight=weight), 6. / 11) - assert_array_equal(jaccard_similarity_score(y_true, y_pred), + assert_array_equal(jaccard_similarity_score(y_true, y_pred, average=None), np.array([2. / 3, 1. / 3, 2. / 5])) + + +def test_average_binary_jaccard_similarity_score(): y_true = np.array([1, 0, 1, 1, 0]) y_pred = np.array([1, 0, 1, 1, 1]) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, From d3f76d5136bc53fa2278efd111cdaeabb2fab02d Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Thu, 11 Jan 2018 16:15:08 +0530 Subject: [PATCH 39/88] fix doc and add average=None test for multiclass now, need to fix common test --- doc/modules/model_evaluation.rst | 4 ++-- sklearn/metrics/tests/test_classification.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 784b505dddbae..c076c0081c6e6 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -682,8 +682,8 @@ score is equal to the classification accuracy. 0.33... >>> jaccard_similarity_score(y_true, y_pred, average='weighted') 0.5 - >>> jaccard_similarity_score(y_true, y_pred) - array([ 1., 0., 0., 1. ]) + >>> jaccard_similarity_score(y_true, y_pred, average=None) + array([ 1., 0., 0., 1.]) In the multilabel case with binary label indicators: :: diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index fe389f3da81e6..f4a3e2a1037d1 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1049,7 +1049,9 @@ def test_multiclass_jaccard_similarity_score(): multi_labels_list = [['ant', 'bird'], ['ant', 'cat'], ['cat', 'bird'], ['ant'], ['bird'], ['cat'], None] bin_labels_list = [[0, 1], [0, 2], [2, 1], [0], [1], [2], None] - for average in ('macro', 'weighted', 'micro'): + + # other than average='samples'/'none-samples', test everything else here + for average in ('macro', 'weighted', 'micro', None): for m_label, b_label in zip(multi_labels_list, bin_labels_list): assert_almost_equal(multi_jaccard_similarity_score(average=average, labels=m_label), From 319b5d3bcda45c96e33e5b72354fe045a6757c3b Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Fri, 12 Jan 2018 11:56:57 +0530 Subject: [PATCH 40/88] fix none-samples jaccard_similarity score to return array of scores --- sklearn/metrics/classification.py | 18 ++++++++++-------- sklearn/metrics/tests/test_classification.py | 4 ++-- sklearn/metrics/tests/test_common.py | 10 ++++++---- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index d74d6ac9dc5fc..543ec8561f6c1 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -431,13 +431,14 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, ``'none-samples'``: Calculate metrics for each instance, (only meaningful for multilabel classification). This differs from 'samples' to return - array of class-wise jaccard index, instead of normalzing the array. + an array of sample-wise jaccard index, instead of normalizing the + array. - normalize : None, bool, optional (defaul=True) - If ``False``, return the sum of the Jaccard similarity coefficient - over the sample set. Otherwise, return the average of Jaccard - similarity coefficient. This is only to be specified in case - `average='samples'`. + normalize : bool, optional (default=True) + If ``False``, return an array of Jaccard similarity coefficient for + each samples over the sample set. Otherwise, return the average of + Jaccard similarity coefficient. 'normalize' is only to be specified in + case `average='samples'`. .. versionchanged: 0.20 'normalize' is deprecated and will be removed in 0.22, instead of @@ -591,11 +592,12 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, pred_and_true = np.array([pred_and_true.sum()]) score = pred_and_true / pred_or_true - score[pred_or_true == 0.0] == 1.0 + score[pred_or_true == 0.0] = 1.0 if average is not None: if average == 'none-samples': - score = np.sum(score) + if class_weight is not None: + score = score * class_weight else: score = np.average(score, weights=class_weight) return score diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index f4a3e2a1037d1..a28bb9cb7be04 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1015,9 +1015,9 @@ def test_multilabel_jaccard_similarity_score(): assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='samples', sample_weight=[1, 2]), 5. / 9) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + assert_array_equal(jaccard_similarity_score(y_true, y_pred, average='none-samples'), - 35. / 30) + np.array([2. / 3, 1. / 2])) assert_array_equal(jaccard_similarity_score(y_true, y_pred, average=None), np.array([1. / 2, 1., 1. / 2])) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 83cffc2e4fca0..6c829dbdaa678 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -475,10 +475,12 @@ def test_sample_order_invariance_multilabel_and_multioutput(): for name in MULTILABELS_METRICS: metric = ALL_METRICS[name] - assert_almost_equal(metric(y_true, y_pred), - metric(y_true_shuffle, y_pred_shuffle), - err_msg="%s is not sample order invariant" - % name) + if name != 'unnormalized_jaccard_similarity_score' and \ + name != 'none-samples_jaccard_similarity_score': + assert_almost_equal(metric(y_true, y_pred), + metric(y_true_shuffle, y_pred_shuffle), + err_msg="%s is not sample order invariant" + % name) for name in THRESHOLDED_MULTILABEL_METRICS: metric = ALL_METRICS[name] From 07a05e6e575edd9fe20cd3bc3bb61c35ce8cad61 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sat, 13 Jan 2018 13:34:39 +0530 Subject: [PATCH 41/88] take of care of zero weights for average='weighted' --- sklearn/metrics/classification.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 543ec8561f6c1..91fb857f13598 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -578,6 +578,8 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, sum_axis = 0 class_weight = y_true.toarray().sum(axis=0) weights = sample_weight + if class_weight.sum() == 0: + return 0 else: sum_axis = 0 weights = sample_weight @@ -620,7 +622,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, tp = y_true == y_pred tp_bins = y_true[tp] if sample_weight is not None: - sample_weight = sample_weight[indices] + sample_weight = np.array(sample_weight)[indices] tp_bins_weights = np.asarray(sample_weight)[tp] else: tp_bins_weights = None @@ -648,6 +650,8 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, weights = None elif average == 'weighted': weights = true_sum + if weights.sum() == 0: + return 0 score = tp_sum / (true_sum + pred_sum - tp_sum) From 3a312a3f8705ab84880b6414040547cdb91d6b89 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sat, 13 Jan 2018 23:47:15 +0530 Subject: [PATCH 42/88] fix test_common --- sklearn/metrics/classification.py | 2 +- sklearn/metrics/tests/test_common.py | 83 +++++++++++++++++++--------- 2 files changed, 58 insertions(+), 27 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 91fb857f13598..33c373389bee7 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -511,7 +511,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, if y_type == 'binary': if pos_label not in present_labels: if len(present_labels) < 2: - return 0. + return 1. else: raise ValueError("pos_label=%r is not a valid label: " "%r" % (pos_label, present_labels)) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 6c829dbdaa678..a157e7c198265 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -151,13 +151,14 @@ "samples_f2_score": partial(fbeta_score, average="samples", beta=2), "samples_precision_score": partial(precision_score, average="samples"), "samples_recall_score": partial(recall_score, average="samples"), - "samples_jaccard_similarity_score": - partial(jaccard_similarity_score, average="samples"), "cohen_kappa_score": cohen_kappa_score, "none-samples_jaccard_similarity_score": - partial(jaccard_similarity_score, average='none-samples') + partial(jaccard_similarity_score, average='none-samples'), + + "binary_jaccard_similarity_score": + partial(jaccard_similarity_score, average="binary") } THRESHOLDED_METRICS = { @@ -211,8 +212,9 @@ "samples_precision_score", "samples_recall_score", "coverage_error", - "samples_jaccard_similarity_score", + "jaccard_similarity_score", "none-samples_jaccard_similarity_score", + "unnormalized_jaccard_similarity_score", "average_precision_score", "weighted_average_precision_score", @@ -235,8 +237,10 @@ "macro_roc_auc", "samples_roc_auc", - "samples_jaccard_similarity_score", + "jaccard_similarity_score", "none-samples_jaccard_similarity_score", + "unnormalized_jaccard_similarity_score", + "binary_jaccard_similarity_score", # with default average='binary', multiclass is prohibited "precision_score", @@ -253,7 +257,7 @@ # Metrics with an "average" argument METRICS_WITH_AVERAGING = [ "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score", - "jaccard_similarity_score" + "binary_jaccard_similarity_score" ] # Threshold-based metrics with an "average" argument @@ -303,6 +307,11 @@ "macro_precision_score", "macro_recall_score", "macro_jaccard_similarity_score", + "none-samples_jaccard_similarity_score", + "unnormalized_jaccard_similarity_score", + + "binary_jaccard_similarity_score", + "cohen_kappa_score", ] @@ -310,7 +319,6 @@ METRICS_WITH_NORMALIZE_OPTION = [ "accuracy_score", "jaccard_similarity_score", - "samples_jaccard_similarity_score", "zero_one_loss", ] @@ -333,7 +341,8 @@ MULTILABELS_METRICS = [ "accuracy_score", "unnormalized_accuracy_score", "hamming_loss", - "jaccard_similarity_score", "unnormalized_jaccard_similarity_score", + "jaccard_similarity_score", "none-samples_jaccard_similarity_score", + "unnormalized_jaccard_similarity_score", "zero_one_loss", "unnormalized_zero_one_loss", "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score", @@ -350,8 +359,6 @@ "samples_f0.5_score", "samples_f1_score", "samples_f2_score", "samples_precision_score", "samples_recall_score", - "samples_jaccard_similarity_score", - "none-samples_jaccard_similarity_score" ] # Regression metrics with "multioutput-continuous" format support @@ -365,7 +372,8 @@ SYMMETRIC_METRICS = [ "accuracy_score", "unnormalized_accuracy_score", "hamming_loss", - "jaccard_similarity_score", "unnormalized_jaccard_similarity_score", + "jaccard_similarity_score", "none-samples_jaccard_similarity_score", + "unnormalized_jaccard_similarity_score", "zero_one_loss", "unnormalized_zero_one_loss", "micro_jaccard_similarity_score", "macro_jaccard_similarity_score", @@ -376,6 +384,8 @@ "micro_f0.5_score", "micro_f1_score", "micro_f2_score", "micro_precision_score", "micro_recall_score", + "binary_jaccard_similarity_score", + "matthews_corrcoef_score", "mean_absolute_error", "mean_squared_error", "median_absolute_error", @@ -393,7 +403,7 @@ "precision_score", "recall_score", "f2_score", "f0.5_score", "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score", - "weighted_precision_score", + "weighted_precision_score", "weighted_jaccard_similarity_score", "macro_f0.5_score", "macro_f2_score", "macro_precision_score", "macro_recall_score", "log_loss", "hinge_loss" @@ -418,6 +428,9 @@ def test_symmetry(): y_true = random_state.randint(0, 2, size=(20, )) y_pred = random_state.randint(0, 2, size=(20, )) + y_true_bin = random_state.randint(0, 2, size=(20, 25)) + y_pred_bin = random_state.randint(0, 2, size=(20, 25)) + # We shouldn't forget any metrics assert_equal(set(SYMMETRIC_METRICS).union( NOT_SYMMETRIC_METRICS, THRESHOLDED_METRICS, @@ -431,9 +444,15 @@ def test_symmetry(): # Symmetric metric for name in SYMMETRIC_METRICS: metric = ALL_METRICS[name] - assert_almost_equal(metric(y_true, y_pred), - metric(y_pred, y_true), - err_msg="%s is not symmetric" % name) + if (name in METRIC_UNDEFINED_BINARY and + name in METRIC_UNDEFINED_BINARY): + assert_almost_equal(metric(y_true_bin, y_pred_bin), + metric(y_pred_bin, y_true_bin), + err_msg="%s is not symmetric" % name) + else: + assert_almost_equal(metric(y_true, y_pred), + metric(y_pred, y_true), + err_msg="%s is not symmetric" % name) # Not symmetric metrics for name in NOT_SYMMETRIC_METRICS: @@ -799,6 +818,8 @@ def test_normalize_option_binary_classification(n_samples=20): y_pred = random_state.randint(0, 2, size=(n_samples, )) for name in METRICS_WITH_NORMALIZE_OPTION: + if name in METRIC_UNDEFINED_BINARY: + continue metrics = ALL_METRICS[name] measure = metrics(y_true, y_pred, normalize=True) assert_greater(measure, 0, @@ -815,6 +836,8 @@ def test_normalize_option_multiclass_classification(): n_samples = y_true.shape[0] for name in METRICS_WITH_NORMALIZE_OPTION: + if name in METRIC_UNDEFINED_MULTICLASS: + continue metrics = ALL_METRICS[name] measure = metrics(y_true, y_pred, normalize=True) assert_greater(measure, 0, @@ -850,8 +873,10 @@ def test_normalize_option_multilabel_classification(): measure = metrics(y_true, y_pred, normalize=True) assert_greater(measure, 0, msg="We failed to test correctly the normalize option") - assert_almost_equal(metrics(y_true, y_pred, normalize=False) - / n_samples, measure, + unnormalize_measure = metrics(y_true, y_pred, normalize=False) + if isinstance(unnormalize_measure, np.ndarray): + unnormalize_measure = np.sum(unnormalize_measure) + assert_almost_equal(unnormalize_measure / n_samples, measure, err_msg="Failed with %s" % name) @@ -991,19 +1016,25 @@ def check_sample_weight_invariance(name, metric, y1, y2): # check that the weighted and unweighted scores are unequal weighted_score = metric(y1, y2, sample_weight=sample_weight) - assert_not_equal( - unweighted_score, weighted_score, - msg="Unweighted and weighted scores are unexpectedly " - "equal (%f) for %s" % (weighted_score, name)) + if isinstance(weighted_score, np.ndarray): + assert(not np.allclose(weighted_score, unweighted_score)) + else: + assert_not_equal( + unweighted_score, weighted_score, + msg="Unweighted and weighted scores are unexpectedly " + "equal (%r) for %s" % (weighted_score, name)) # check that sample_weight can be a list weighted_score_list = metric(y1, y2, sample_weight=sample_weight.tolist()) - assert_almost_equal( - weighted_score, weighted_score_list, - err_msg=("Weighted scores for array and list " - "sample_weight input are not equal (%f != %f) for %s") % ( - weighted_score, weighted_score_list, name)) + if isinstance(weighted_score, np.ndarray): + assert(np.allclose(weighted_score, weighted_score_list)) + else: + assert_almost_equal( + weighted_score, weighted_score_list, + err_msg=("Weighted scores for array and list " + "sample_weight input are not equal (%f != %f) for %s") % ( + weighted_score, weighted_score_list, name)) # check that integer weights is the same as repeated samples repeat_weighted_score = metric( From 9251d29710a4df71a98f351d06f72f37b76fe303 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Sat, 13 Jan 2018 23:56:48 +0530 Subject: [PATCH 43/88] don't bother testing 'sample_weight' and fix test --- sklearn/metrics/tests/test_classification.py | 25 ++++---------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index a28bb9cb7be04..0ac1ba3cb6558 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -943,7 +943,7 @@ def test_jaccard_similarity_score(): y_true = np.array([0, 0]) y_pred = np.array([0, 0]) assert_equal(jaccard_similarity_score(y_true, y_pred, average='binary', - pos_label=-1), 0.) + pos_label=-1), 1.) y_true = np.array([[0, 1, 1], [1, 0, 0]]) y_pred = np.array([[1, 1, 1], [1, 0, 1]]) @@ -1004,32 +1004,25 @@ def test_multilabel_jaccard_similarity_score(): average='micro'), 3. / 5) # average='samples' (default) assert_almost_equal(jaccard_similarity_score(y_true, y_pred), 7. / 12) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - sample_weight=np.array([0.1, 0.9])), 31. / 60) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='samples', labels=[0, 2]), 1. / 2) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='samples', labels=[1, 2]), 1. / 2) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='samples', - sample_weight=[1, 2]), 5. / 9) + # average='none-samples' assert_array_equal(jaccard_similarity_score(y_true, y_pred, average='none-samples'), np.array([2. / 3, 1. / 2])) + # average=None assert_array_equal(jaccard_similarity_score(y_true, y_pred, average=None), np.array([1. / 2, 1., 1. / 2])) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='micro', - sample_weight=[1, 2]), 4. / 7) + y_true = np.array([[0, 1, 1], [1, 0, 1]]) y_pred = np.array([[1, 1, 1], [1, 0, 1]]) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='macro'), 5. / 6) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='macro', - sample_weight=[1, 2]), 8. / 9) + # average='weighted' assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='weighted'), 7. / 8) @@ -1057,14 +1050,6 @@ def test_multiclass_jaccard_similarity_score(): labels=m_label), bin_jaccard_similarity_score(average=average, labels=b_label)) - weight = np.array([1, 2, 1, 1, 2, 1, 2, 3]) - assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='micro', - labels=['ant', 'bird'], - sample_weight=weight), - 6. / 11) - assert_array_equal(jaccard_similarity_score(y_true, y_pred, average=None), - np.array([2. / 3, 1. / 3, 2. / 5])) def test_average_binary_jaccard_similarity_score(): From 225c0f28c2978e911b37e6bdf1326097e4fed4b9 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 16 Jan 2018 14:12:20 +0530 Subject: [PATCH 44/88] remove average='none-samples' as a possibility --- sklearn/metrics/classification.py | 45 +++++++------------- sklearn/metrics/tests/test_classification.py | 18 +++----- sklearn/metrics/tests/test_common.py | 26 +++-------- 3 files changed, 29 insertions(+), 60 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 33c373389bee7..4ea184980676a 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -428,11 +428,6 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification). - ``'none-samples'``: - Calculate metrics for each instance, (only meaningful for - multilabel classification). This differs from 'samples' to return - an array of sample-wise jaccard index, instead of normalizing the - array. normalize : bool, optional (default=True) If ``False``, return an array of Jaccard similarity coefficient for @@ -440,11 +435,6 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, Jaccard similarity coefficient. 'normalize' is only to be specified in case `average='samples'`. - .. versionchanged: 0.20 - 'normalize' is deprecated and will be removed in 0.22, instead of - `normalize=True` use instead just `average='samples'` and for - `normalize=False` use instead `average='none-samples'`. - sample_weight : array-like of shape = [n_samples], optional Sample weights. @@ -497,8 +487,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, array([ 0.66..., 0.5 , 0. ]) """ - average_options = (None, 'micro', 'macro', 'weighted', 'samples', - 'none-samples') + average_options = (None, 'micro', 'macro', 'weighted', 'samples') if average not in average_options and average != 'binary': raise ValueError("average has to be one of " + str(average_options)) @@ -534,18 +523,13 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, assume_unique=True)]) if y_type.startswith('multilabel'): - if normalize is not None: - if average == 'samples': - if not normalize: - average = 'none-samples' - else: - raise ValueError("normalize != None' is only meaningful with " - "`average='samples'`, got `average='%s'`." - % average) - warn_message = ("'normalize' was deprecated in version 0.20 and " - "will be removed in 0.22, instead use " - "`average='%s'`." % average) - warnings.warn(warn_message, DeprecationWarning) + if average == 'samples': + if normalize is None: + normalize = True + elif normalize is not None: + raise ValueError("'normalize' is only meaningful with " + "`average='samples'`, got `average='%s'`." + % average) if not np.all(labels == present_labels): if np.max(labels) > np.max(present_labels): @@ -562,7 +546,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, with np.errstate(divide='ignore', invalid='ignore'): - if average == 'samples' or average == 'none-samples': + if average == 'samples': sum_axis = 1 class_weight = sample_weight weights = None @@ -597,13 +581,14 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, score[pred_or_true == 0.0] = 1.0 if average is not None: - if average == 'none-samples': + if normalize == False: if class_weight is not None: - score = score * class_weight - else: - score = np.average(score, weights=class_weight) + score = np.dot(score, class_weight) + else: + score = score.sum() + score = np.average(score, weights=class_weight) return score - elif average == 'samples' or average == 'none-samples': + elif average == 'samples': raise ValueError("Sample-based jaccard similarity score is " "not meaningful outside multilabel " "classification. See the accuracy_score instead.") diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 0ac1ba3cb6558..003379e1df9a7 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -951,11 +951,6 @@ def test_jaccard_similarity_score(): "Please choose another average setting.") assert_raise_message(ValueError, msg1, jaccard_similarity_score, y_true, y_pred, average='binary', pos_label=-1) - assert_warns_message(DeprecationWarning, - "'normalize' was deprecated in version 0.20 and will " - "be removed in 0.22, instead use `average='samples'`." - , jaccard_similarity_score, y_true, y_pred, - average='samples', normalize=True) y_true = np.array([0, 1, 1, 0, 2]) y_pred = np.array([1, 1, 1, 1, 0]) @@ -968,7 +963,7 @@ def test_jaccard_similarity_score(): assert_raise_message(ValueError, msg3, jaccard_similarity_score, y_true, y_pred, average='samples') assert_raise_message(ValueError, msg3, jaccard_similarity_score, y_true, - y_pred, average='none-samples') + y_pred, average='samples', normalize=False) assert_warns_message(UserWarning, "Note that pos_label (set to 3) is ignored when " @@ -998,7 +993,7 @@ def test_multilabel_jaccard_similarity_score(): y_pred = np.array([[1, 1, 1], [1, 0, 1]]) # average='macro' assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='macro'), 2. / 3) + average='macro'), 2. / 3) # average='micro' assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='micro'), 3. / 5) @@ -1010,10 +1005,11 @@ def test_multilabel_jaccard_similarity_score(): assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='samples', labels=[1, 2]), 1. / 2) - # average='none-samples' - assert_array_equal(jaccard_similarity_score(y_true, y_pred, - average='none-samples'), - np.array([2. / 3, 1. / 2])) + # average='samples', normalize=False + assert_almost_equal(jaccard_similarity_score(y_true, y_pred, + average='samples', + normalize=False), + 7. / 6) # average=None assert_array_equal(jaccard_similarity_score(y_true, y_pred, average=None), np.array([1. / 2, 1., 1. / 2])) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index a157e7c198265..19113172b7559 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -154,9 +154,6 @@ "cohen_kappa_score": cohen_kappa_score, - "none-samples_jaccard_similarity_score": - partial(jaccard_similarity_score, average='none-samples'), - "binary_jaccard_similarity_score": partial(jaccard_similarity_score, average="binary") } @@ -213,7 +210,6 @@ "samples_recall_score", "coverage_error", "jaccard_similarity_score", - "none-samples_jaccard_similarity_score", "unnormalized_jaccard_similarity_score", "average_precision_score", @@ -238,7 +234,6 @@ "samples_roc_auc", "jaccard_similarity_score", - "none-samples_jaccard_similarity_score", "unnormalized_jaccard_similarity_score", "binary_jaccard_similarity_score", @@ -307,7 +302,6 @@ "macro_precision_score", "macro_recall_score", "macro_jaccard_similarity_score", - "none-samples_jaccard_similarity_score", "unnormalized_jaccard_similarity_score", "binary_jaccard_similarity_score", @@ -341,8 +335,7 @@ MULTILABELS_METRICS = [ "accuracy_score", "unnormalized_accuracy_score", "hamming_loss", - "jaccard_similarity_score", "none-samples_jaccard_similarity_score", - "unnormalized_jaccard_similarity_score", + "jaccard_similarity_score", "unnormalized_jaccard_similarity_score", "zero_one_loss", "unnormalized_zero_one_loss", "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score", @@ -372,8 +365,7 @@ SYMMETRIC_METRICS = [ "accuracy_score", "unnormalized_accuracy_score", "hamming_loss", - "jaccard_similarity_score", "none-samples_jaccard_similarity_score", - "unnormalized_jaccard_similarity_score", + "jaccard_similarity_score", "unnormalized_jaccard_similarity_score", "zero_one_loss", "unnormalized_zero_one_loss", "micro_jaccard_similarity_score", "macro_jaccard_similarity_score", @@ -494,11 +486,9 @@ def test_sample_order_invariance_multilabel_and_multioutput(): for name in MULTILABELS_METRICS: metric = ALL_METRICS[name] - if name != 'unnormalized_jaccard_similarity_score' and \ - name != 'none-samples_jaccard_similarity_score': - assert_almost_equal(metric(y_true, y_pred), - metric(y_true_shuffle, y_pred_shuffle), - err_msg="%s is not sample order invariant" + assert_almost_equal(metric(y_true, y_pred), + metric(y_true_shuffle, y_pred_shuffle), + err_msg="%s is not sample order invariant" % name) for name in THRESHOLDED_MULTILABEL_METRICS: @@ -873,10 +863,8 @@ def test_normalize_option_multilabel_classification(): measure = metrics(y_true, y_pred, normalize=True) assert_greater(measure, 0, msg="We failed to test correctly the normalize option") - unnormalize_measure = metrics(y_true, y_pred, normalize=False) - if isinstance(unnormalize_measure, np.ndarray): - unnormalize_measure = np.sum(unnormalize_measure) - assert_almost_equal(unnormalize_measure / n_samples, measure, + assert_almost_equal(metrics(y_true, y_pred, normalize=False) + / n_samples, measure, err_msg="Failed with %s" % name) From d7fe5caf0f9d8c82e6c9252ba3b707bba53e3f7c Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 16 Jan 2018 18:53:27 +0530 Subject: [PATCH 45/88] fix average='weighted' --- sklearn/metrics/classification.py | 12 ++++++++---- sklearn/metrics/tests/test_common.py | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 4ea184980676a..7455d0e2eab41 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -478,10 +478,10 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, >>> jaccard_similarity_score(y_true, y_pred, average='weighted') ... # doctest: +ELLIPSIS 0.4722... - >>> jaccard_similarity_score(y_true, y_pred) + >>> jaccard_similarity_score(y_true, y_pred, average=None) ... # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE array([ 0.66..., 0. , 0.5 ]) - >>> jaccard_similarity_score(y_true, y_pred, + >>> jaccard_similarity_score(y_true, y_pred, average=None, ... labels=['ant', 'cat', 'bird']) ... # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE array([ 0.66..., 0.5 , 0. ]) @@ -560,7 +560,10 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, weights = sample_weight elif average == 'weighted': sum_axis = 0 - class_weight = y_true.toarray().sum(axis=0) + if sample_weight is None: + class_weight = y_true.toarray().sum(axis=0) + else: + class_weight = (y_true.toarray().T).dot(sample_weight) weights = sample_weight if class_weight.sum() == 0: return 0 @@ -586,7 +589,8 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, score = np.dot(score, class_weight) else: score = score.sum() - score = np.average(score, weights=class_weight) + else: + score = np.average(score, weights=class_weight) return score elif average == 'samples': raise ValueError("Sample-based jaccard similarity score is " diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 19113172b7559..148b7bab77a59 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -489,7 +489,7 @@ def test_sample_order_invariance_multilabel_and_multioutput(): assert_almost_equal(metric(y_true, y_pred), metric(y_true_shuffle, y_pred_shuffle), err_msg="%s is not sample order invariant" - % name) + % name) for name in THRESHOLDED_MULTILABEL_METRICS: metric = ALL_METRICS[name] From 414ae8badb14323721fed224bbc90222aab1e241 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 17 Jan 2018 04:00:41 +0530 Subject: [PATCH 46/88] use np.in1d instead of np.isin (unavailable in version < 1.13.0) --- sklearn/metrics/classification.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 7455d0e2eab41..1c56930ab1976 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -603,8 +603,11 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, y_pred = le.transform(y_pred) labels = le.transform(labels)[:n_labels] - indices = np.where(np.isin(y_true, labels) + - np.isin(y_pred, labels) == True)[0] + # use 'np.in1d' instead of 'np.isin' (unavailable in version < 1.13.0) + indices = np.where(np.in1d(y_true, labels, assume_unique=False, + invert=False) + + np.in1d(y_pred, labels, assume_unique=False, + invert=False) == True)[0] y_true = y_true[indices] y_pred = y_pred[indices] From 3ac79bdcdbd216f29cd1b9abb62064282edd96e2 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 17 Jan 2018 15:36:38 +0530 Subject: [PATCH 47/88] address Joel's comments --- sklearn/metrics/classification.py | 35 ++++++++++++++++++------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 1c56930ab1976..b65bc0ad7e58a 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -545,6 +545,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, y_pred = y_pred[:, labels[:n_labels]] with np.errstate(divide='ignore', invalid='ignore'): + class_weight = None if average == 'samples': sum_axis = 1 @@ -560,11 +561,11 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, weights = sample_weight elif average == 'weighted': sum_axis = 0 + weights = sample_weight if sample_weight is None: class_weight = y_true.toarray().sum(axis=0) else: class_weight = (y_true.toarray().T).dot(sample_weight) - weights = sample_weight if class_weight.sum() == 0: return 0 else: @@ -622,33 +623,39 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, if len(tp_bins): tp_sum = np.bincount(tp_bins, weights=tp_bins_weights, minlength=len(labels))[labels] - else: - # pathological case - true_sum = pred_sum = tp_sum = np.zeros(len(labels)) - - if len(y_pred): - pred_sum = np.bincount(y_pred, weights=sample_weight, - minlength=len(labels))[labels] - if len(y_true): true_sum = np.bincount(y_true, weights=sample_weight, minlength=len(labels))[labels] + pred_sum = np.bincount(y_pred, weights=sample_weight, + minlength=len(labels))[labels] + else: + tp_sum = np.zeros(len(labels)) + if len(y_true): + true_sum = np.bincount(y_true, weights=sample_weight, + minlength=len(labels))[labels] + else: + true_sum = np.zeros(len(labels)) + if len(y_pred): + pred_sum = np.bincount(y_pred, weights=sample_weight, + minlength=len(labels))[labels] + else: + pred_sum = np.zeros(len(labels)) if average == 'micro' or average == 'binary': tp_sum = np.array([tp_sum.sum()]) true_sum = np.array([true_sum.sum()]) pred_sum = np.array([pred_sum.sum()]) - weights = None + class_weight = None elif average == 'macro': - weights = None + class_weight = None elif average == 'weighted': - weights = true_sum - if weights.sum() == 0: + class_weight = true_sum + if class_weight.sum() == 0: return 0 score = tp_sum / (true_sum + pred_sum - tp_sum) if average is not None: - score = np.average(score, weights=weights) + score = np.average(score, weights=class_weight) return score From 3673407a464f7c23fe59955e7ffb3e6ef2ef3afb Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 17 Jan 2018 17:59:56 +0530 Subject: [PATCH 48/88] fix lgtm error --- sklearn/metrics/classification.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index b65bc0ad7e58a..7d34c24711759 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -647,7 +647,8 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, class_weight = None elif average == 'macro': class_weight = None - elif average == 'weighted': + else: + # average='weighted' class_weight = true_sum if class_weight.sum() == 0: return 0 From d3d7ca931bd4514648b1dd8dceda970e783fb822 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 17 Jan 2018 18:02:43 +0530 Subject: [PATCH 49/88] fix lgtm --- sklearn/metrics/classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 7d34c24711759..74d342208bee0 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -640,6 +640,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, else: pred_sum = np.zeros(len(labels)) + class_weight = None if average == 'micro' or average == 'binary': tp_sum = np.array([tp_sum.sum()]) true_sum = np.array([true_sum.sum()]) @@ -647,8 +648,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, class_weight = None elif average == 'macro': class_weight = None - else: - # average='weighted' + elif average == 'weighted': class_weight = true_sum if class_weight.sum() == 0: return 0 From 04768c50e13b2d54d3c1b502fd4bf988450a46fe Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Thu, 18 Jan 2018 10:31:22 +0530 Subject: [PATCH 50/88] Fix flake8 errors --- sklearn/metrics/classification.py | 14 ++++++------- sklearn/metrics/tests/test_classification.py | 22 ++++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 74d342208bee0..587da76a4529b 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -503,11 +503,11 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, return 1. else: raise ValueError("pos_label=%r is not a valid label: " - "%r" % (pos_label, present_labels)) + "%r" % (pos_label, present_labels)) labels = [pos_label] else: raise ValueError("Target is %s but average='binary'. Please " - "choose another average setting." % y_type) + "choose another average setting." % y_type) elif pos_label not in (None, 1): warnings.warn("Note that pos_label (set to %r) is ignored when " "average != 'binary' (got %r). You may use " @@ -585,7 +585,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, score[pred_or_true == 0.0] = 1.0 if average is not None: - if normalize == False: + if normalize is False: if class_weight is not None: score = np.dot(score, class_weight) else: @@ -608,7 +608,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, indices = np.where(np.in1d(y_true, labels, assume_unique=False, invert=False) + np.in1d(y_pred, labels, assume_unique=False, - invert=False) == True)[0] + invert=False))[0] y_true = y_true[indices] y_pred = y_pred[indices] @@ -1307,16 +1307,16 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, if len(tp_bins): tp_sum = np.bincount(tp_bins, weights=tp_bins_weights, - minlength=len(labels)) + minlength=len(labels)) else: # Pathological case true_sum = pred_sum = tp_sum = np.zeros(len(labels)) if len(y_pred): pred_sum = np.bincount(y_pred, weights=sample_weight, - minlength=len(labels)) + minlength=len(labels)) if len(y_true): true_sum = np.bincount(y_true, weights=sample_weight, - minlength=len(labels)) + minlength=len(labels)) # Retain only selected labels indices = np.searchsorted(sorted_labels, labels[:n_labels]) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 003379e1df9a7..e677d28def36c 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -966,11 +966,11 @@ def test_jaccard_similarity_score(): y_pred, average='samples', normalize=False) assert_warns_message(UserWarning, - "Note that pos_label (set to 3) is ignored when " - "average != 'binary' (got 'micro'). You may use " - "labels=[pos_label] to specify a single positive " - "class.", jaccard_similarity_score, y_true, y_pred, - average='micro', pos_label=3) + "Note that pos_label (set to 3) is ignored when " + "average != 'binary' (got 'micro'). You may use " + "labels=[pos_label] to specify a single positive " + "class.", jaccard_similarity_score, y_true, y_pred, + average='micro', pos_label=3) def test_multilabel_jaccard_similarity_score(): @@ -996,7 +996,7 @@ def test_multilabel_jaccard_similarity_score(): average='macro'), 2. / 3) # average='micro' assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='micro'), 3. / 5) + average='micro'), 3. / 5) # average='samples' (default) assert_almost_equal(jaccard_similarity_score(y_true, y_pred), 7. / 12) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, @@ -1007,9 +1007,9 @@ def test_multilabel_jaccard_similarity_score(): labels=[1, 2]), 1. / 2) # average='samples', normalize=False assert_almost_equal(jaccard_similarity_score(y_true, y_pred, - average='samples', - normalize=False), - 7. / 6) + average='samples', + normalize=False), + 7. / 6) # average=None assert_array_equal(jaccard_similarity_score(y_true, y_pred, average=None), np.array([1. / 2, 1., 1. / 2])) @@ -1033,8 +1033,8 @@ def test_multiclass_jaccard_similarity_score(): y_pred_bin = lb.transform(y_pred) multi_jaccard_similarity_score = partial(jaccard_similarity_score, y_true, y_pred) - bin_jaccard_similarity_score = partial(jaccard_similarity_score, y_true_bin - , y_pred_bin) + bin_jaccard_similarity_score = partial(jaccard_similarity_score, + y_true_bin, y_pred_bin) multi_labels_list = [['ant', 'bird'], ['ant', 'cat'], ['cat', 'bird'], ['ant'], ['bird'], ['cat'], None] bin_labels_list = [[0, 1], [0, 2], [2, 1], [0], [1], [2], None] From 54fe344e3ed8f2ff1ab54db057ce7b07608884d3 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Thu, 18 Jan 2018 12:19:15 +0530 Subject: [PATCH 51/88] code coverage --- sklearn/metrics/classification.py | 41 ++++++++++---------- sklearn/metrics/tests/test_classification.py | 25 ++++++++++-- sklearn/metrics/tests/test_common.py | 24 +++++------- 3 files changed, 52 insertions(+), 38 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 587da76a4529b..28cdb8ad58843 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -430,10 +430,10 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, meaningful for multilabel classification). normalize : bool, optional (default=True) - If ``False``, return an array of Jaccard similarity coefficient for - each samples over the sample set. Otherwise, return the average of - Jaccard similarity coefficient. 'normalize' is only to be specified in - case `average='samples'`. + If ``False``, return the sum of the Jaccard similarity coefficient + over the sample set. Otherwise, return the average of Jaccard + similarity coefficient. ``normalize`` is only meaningful when + ``average='samples'``. sample_weight : array-like of shape = [n_samples], optional Sample weights. @@ -640,24 +640,25 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, else: pred_sum = np.zeros(len(labels)) - class_weight = None - if average == 'micro' or average == 'binary': - tp_sum = np.array([tp_sum.sum()]) - true_sum = np.array([true_sum.sum()]) - pred_sum = np.array([pred_sum.sum()]) - class_weight = None - elif average == 'macro': + with np.errstate(divide='ignore', invalid='ignore'): class_weight = None - elif average == 'weighted': - class_weight = true_sum - if class_weight.sum() == 0: - return 0 + if average == 'micro' or average == 'binary': + tp_sum = np.array([tp_sum.sum()]) + true_sum = np.array([true_sum.sum()]) + pred_sum = np.array([pred_sum.sum()]) + class_weight = None + elif average == 'macro': + class_weight = None + elif average == 'weighted': + class_weight = true_sum + if class_weight.sum() == 0: + return 0 - score = tp_sum / (true_sum + pred_sum - tp_sum) + score = tp_sum / (true_sum + pred_sum - tp_sum) - if average is not None: - score = np.average(score, weights=class_weight) - return score + if average is not None: + score = np.average(score, weights=class_weight) + return score def matthews_corrcoef(y_true, y_pred, sample_weight=None): @@ -1267,7 +1268,7 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, # Select labels: if not np.all(labels == present_labels): if np.max(labels) > np.max(present_labels): - raise ValueError('All labels must be in [0, n labels). ' + raise ValueError('All labels must be in [0, n, labels). ' 'Got %d > %d' % (np.max(labels), np.max(present_labels))) if np.min(labels) < 0: diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index e677d28def36c..0ae50e99c1410 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -940,10 +940,13 @@ def test_multilabel_hamming_loss(): def test_jaccard_similarity_score(): - y_true = np.array([0, 0]) - y_pred = np.array([0, 0]) + y_true = np.array([0, 1, 0, 1, 1]) + y_pred = np.array([0, 1, 0, 1, 1]) assert_equal(jaccard_similarity_score(y_true, y_pred, average='binary', - pos_label=-1), 1.) + pos_label=0), 1.) + assert_raise_message(ValueError, "pos_label=2 is not a valid label: " + "array([0, 1])", jaccard_similarity_score, y_true, + y_pred, average='binary', pos_label=2) y_true = np.array([[0, 1, 1], [1, 0, 0]]) y_pred = np.array([[1, 1, 1], [1, 0, 1]]) @@ -1021,6 +1024,17 @@ def test_multilabel_jaccard_similarity_score(): # average='weighted' assert_almost_equal(jaccard_similarity_score(y_true, y_pred, average='weighted'), 7. / 8) + # normalize error + msg1 = ("'normalize' is only meaningful with `average='samples'`, got " + "`average='macro'`.") + assert_raise_message(ValueError, msg1, jaccard_similarity_score, y_true, + y_pred, average='macro', normalize=False) + msg2 = 'All labels must be in [0, n, labels). Got 4 > 2' + assert_raise_message(ValueError, msg2, jaccard_similarity_score, y_true, + y_pred, labels=[4]) + msg3 = 'All labels must be in [0, n, labels). Got -1 < 0' + assert_raise_message(ValueError, msg3, jaccard_similarity_score, y_true, + y_pred, labels=[-1]) def test_multiclass_jaccard_similarity_score(): @@ -1047,6 +1061,11 @@ def test_multiclass_jaccard_similarity_score(): bin_jaccard_similarity_score(average=average, labels=b_label)) + y_true = np.array([]) + y_pred = np.array([]) + assert_equal(jaccard_similarity_score(y_true, y_pred, average='weighted'), + 0.) + def test_average_binary_jaccard_similarity_score(): y_true = np.array([1, 0, 1, 1, 0]) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 148b7bab77a59..018a7ea8d9959 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -1004,25 +1004,19 @@ def check_sample_weight_invariance(name, metric, y1, y2): # check that the weighted and unweighted scores are unequal weighted_score = metric(y1, y2, sample_weight=sample_weight) - if isinstance(weighted_score, np.ndarray): - assert(not np.allclose(weighted_score, unweighted_score)) - else: - assert_not_equal( - unweighted_score, weighted_score, - msg="Unweighted and weighted scores are unexpectedly " - "equal (%r) for %s" % (weighted_score, name)) + assert_not_equal( + unweighted_score, weighted_score, + msg="Unweighted and weighted scores are unexpectedly " + "equal (%f) for %s" % (weighted_score, name)) # check that sample_weight can be a list weighted_score_list = metric(y1, y2, sample_weight=sample_weight.tolist()) - if isinstance(weighted_score, np.ndarray): - assert(np.allclose(weighted_score, weighted_score_list)) - else: - assert_almost_equal( - weighted_score, weighted_score_list, - err_msg=("Weighted scores for array and list " - "sample_weight input are not equal (%f != %f) for %s") % ( - weighted_score, weighted_score_list, name)) + assert_almost_equal( + weighted_score, weighted_score_list, + err_msg=("Weighted scores for array and list " + "sample_weight input are not equal (%f != %f) for %s") % ( + weighted_score, weighted_score_list, name)) # check that integer weights is the same as repeated samples repeat_weighted_score = metric( From ee548532f4bd1c6b57ee7d078353ae8dcd27703d Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Thu, 18 Jan 2018 13:04:38 +0530 Subject: [PATCH 52/88] fix flake8 --- sklearn/metrics/tests/test_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 0ae50e99c1410..846781fb4b618 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1026,7 +1026,7 @@ def test_multilabel_jaccard_similarity_score(): average='weighted'), 7. / 8) # normalize error msg1 = ("'normalize' is only meaningful with `average='samples'`, got " - "`average='macro'`.") + "`average='macro'`.") assert_raise_message(ValueError, msg1, jaccard_similarity_score, y_true, y_pred, average='macro', normalize=False) msg2 = 'All labels must be in [0, n, labels). Got 4 > 2' From 551804d9c2a251b54dc7888ab5d068e2533a24ce Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Thu, 18 Jan 2018 16:54:48 +0530 Subject: [PATCH 53/88] improve doc --- doc/modules/model_evaluation.rst | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index c076c0081c6e6..829b674861d77 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -70,6 +70,7 @@ Scoring Function 'neg_log_loss' :func:`metrics.log_loss` requires ``predict_proba`` support 'precision' etc. :func:`metrics.precision_score` suffixes apply as with 'f1' 'recall' etc. :func:`metrics.recall_score` suffixes apply as with 'f1' +'jaccard' etc. :func:`metric.jaccard_similarity_score` suffixes apply as with 'f1' 'roc_auc' :func:`metrics.roc_auc_score` **Clustering** @@ -667,29 +668,33 @@ with a ground truth label set :math:`y_i` and predicted label set J(y_i, \hat{y}_i) = \frac{|y_i \cap \hat{y}_i|}{|y_i \cup \hat{y}_i|}. -In binary and multiclass classification, the Jaccard similarity coefficient -score is equal to the classification accuracy. +:func:`jaccard_similarity_score` works like :func:`precision_recall_fscore_support` +as a naively set-wise measure applying only to binary and multilabel targets. -:: +In the multilabel case with binary label indicators: :: >>> import numpy as np >>> from sklearn.metrics import jaccard_similarity_score + >>> y_true = np.array([[0, 1], [1, 1]]) + >>> y_pred = np.ones((2, 2)) + >>> jaccard_similarity_score(y_true, y_pred) + 0.75 + >>> jaccard_similarity_score(y_true, y_pred, normalize=False) + 1.5 + +For which multiclass problems are binarized. + +:: + >>> y_pred = [0, 2, 1, 3] >>> y_true = [0, 1, 2, 3] >>> jaccard_similarity_score(y_true, y_pred, average='macro') 0.5 >>> jaccard_similarity_score(y_true, y_pred, average='micro') 0.33... - >>> jaccard_similarity_score(y_true, y_pred, average='weighted') - 0.5 >>> jaccard_similarity_score(y_true, y_pred, average=None) array([ 1., 0., 0., 1.]) -In the multilabel case with binary label indicators: :: - - >>> jaccard_similarity_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2))) - 0.75 - .. _precision_recall_f_measure_metrics: Precision, recall and F-measures From 0d45a444e039a3f0e00d60a68e4f8e672ed1316c Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Mon, 22 Jan 2018 21:27:35 +0530 Subject: [PATCH 54/88] add what's new entry and address Joel's comments --- doc/modules/model_evaluation.rst | 4 +-- doc/whats_new/v0.20.rst | 11 ++++++ sklearn/metrics/classification.py | 51 +++++++++++----------------- sklearn/metrics/tests/test_common.py | 2 +- sklearn/svm/base.py | 2 +- 5 files changed, 34 insertions(+), 36 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index b9e77651568cf..cebd1b45dac31 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -683,9 +683,7 @@ In the multilabel case with binary label indicators: :: >>> jaccard_similarity_score(y_true, y_pred, normalize=False) 1.5 -For which multiclass problems are binarized. - -:: +Multiclass problems are binarized: :: >>> y_pred = [0, 2, 1, 3] >>> y_true = [0, 1, 2, 3] diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 7e7d39dbf1759..65ceaa24c1614 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -149,6 +149,12 @@ Metrics - :func:`metrics.roc_auc_score` now supports binary ``y_true`` other than ``{0, 1}`` or ``{-1, 1}``. :issue:`9828` by :user:`Hanmin Qin `. +- :func:`metrics.jaccard_similarity_score` now accepts ``average`` argument + like :func:`metrics.precision_recall_fscore_support` as a naively set-wise + measure applying only to binary, multilabel targets and binarizing + multiclass input. + :issue:`10083` by :user:`Gaurav Dhingra `. + Linear, kernelized and related models - Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the @@ -266,6 +272,11 @@ Decomposition, manifold learning and clustering Metrics +- Fixed a bug in :func:`metrics.jaccard_similarity_score`, to disallow + sample-wise averaging for 1d input, since it is redundantly equal to + :func:`metrics.accuracy_score`. + :issue:`10083` by :user:`Gaurav Dhingra `. + - Fixed a bug in :func:`metrics.precision_precision_recall_fscore_support` when truncated `range(n_labels)` is passed as value for `labels`. :issue:`10377` by :user:`Gaurav Dhingra `. diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 290c5be87720e..34696fbca4c60 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -373,7 +373,7 @@ class labels [2]_. def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, - average='samples', normalize=None, + average='samples', normalize=True, sample_weight=None): """Jaccard similarity coefficient score @@ -440,7 +440,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, Returns ------- - score: float (if average is not None) or array of float, shape =\ + score: float (if average is not None) or array of floats, shape =\ [n_unique_labels] See also @@ -459,32 +459,24 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, In the multilabel case with binary label indicators: - >>> jaccard_similarity_score(np.array([[0, 1], [1, 1]]), - ... np.ones((2, 2))) - 0.75 - - In the multiclass case: - - >>> y_pred = [0, 2, 1, 3] - >>> y_true = [0, 1, 2, 3] - >>> jaccard_similarity_score(y_true, y_pred, average='macro') - 0.5 + >>> y_true = np.array([[1, 0, 1], [0, 0, 1], [1, 1, 1]]) + >>> y_pred = np.array([[0, 1, 1], [1, 1, 1], [0, 0, 1]]) + >>> jaccard_similarity_score(y_true, y_pred) + ... # doctest: +ELLIPSIS + 0.33... >>> jaccard_similarity_score(y_true, y_pred, average='micro') ... # doctest: +ELLIPSIS - 0.333... - - >>> y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat'] - >>> y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird'] + 0.33... >>> jaccard_similarity_score(y_true, y_pred, average='weighted') - ... # doctest: +ELLIPSIS - 0.4722... + 0.5 >>> jaccard_similarity_score(y_true, y_pred, average=None) - ... # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE - array([ 0.66..., 0. , 0.5 ]) - >>> jaccard_similarity_score(y_true, y_pred, average=None, - ... labels=['ant', 'cat', 'bird']) - ... # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE - array([ 0.66..., 0.5 , 0. ]) + array([ 0., 0., 1.]) + + In the multiclass case: + + >>> jaccard_similarity_score(np.array([0, 2, 1, 3]), + ... np.array([0, 1, 2, 3]), average='macro') + 0.5 """ average_options = (None, 'micro', 'macro', 'weighted', 'samples') @@ -523,10 +515,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, assume_unique=True)]) if y_type.startswith('multilabel'): - if average == 'samples': - if normalize is None: - normalize = True - elif normalize is not None: + if average != 'samples' and not normalize: raise ValueError("'normalize' is only meaningful with " "`average='samples'`, got `average='%s'`." % average) @@ -585,7 +574,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, score[pred_or_true == 0.0] = 1.0 if average is not None: - if normalize is False: + if not normalize: if class_weight is not None: score = np.dot(score, class_weight) else: @@ -640,7 +629,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, else: pred_sum = np.zeros(len(labels)) - with np.errstate(divide='ignore', invalid='ignore'): + with np.errstate(divide='ignore'): class_weight = None if average == 'micro' or average == 'binary': tp_sum = np.array([tp_sum.sum()]) @@ -1268,7 +1257,7 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, # Select labels: if not np.all(labels == present_labels): if np.max(labels) > np.max(present_labels): - raise ValueError('All labels must be in [0, n, labels). ' + raise ValueError('All labels must be in [0, n labels). ' 'Got %d > %d' % (np.max(labels), np.max(present_labels))) if np.min(labels) < 0: diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 1570479150d97..11951076d92db 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -1015,7 +1015,7 @@ def check_sample_weight_invariance(name, metric, y1, y2): weighted_score, weighted_score_list, err_msg=("Weighted scores for array and list " "sample_weight input are not equal (%f != %f) for %s") % ( - weighted_score, weighted_score_list, name)) + weighted_score, weighted_score_list, name)) # check that integer weights is the same as repeated samples repeat_weighted_score = metric( diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py index 60442d6bbeee1..eb5bb01508953 100644 --- a/sklearn/svm/base.py +++ b/sklearn/svm/base.py @@ -543,7 +543,7 @@ def predict(self, X): # estimators. def _check_proba(self): if not self.probability: - raise AttributeError("predict_proba is not available when" + raise AttributeError("predict_proba is not available when " " probability=False") if self._impl not in ('c_svc', 'nu_svc'): raise AttributeError("predict_proba only implemented for SVC" From 785bb36c06c26a9446f9b2c10b547ad445ab05bf Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Mon, 22 Jan 2018 21:48:36 +0530 Subject: [PATCH 55/88] improve doc's entry --- doc/whats_new/v0.20.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 65ceaa24c1614..b487b4473d4be 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -273,7 +273,7 @@ Decomposition, manifold learning and clustering Metrics - Fixed a bug in :func:`metrics.jaccard_similarity_score`, to disallow - sample-wise averaging for 1d input, since it is redundantly equal to + sample-wise averaging of multiclass input, since it is redundantly equal to :func:`metrics.accuracy_score`. :issue:`10083` by :user:`Gaurav Dhingra `. From 7c1314a90f285e2773c952282096fafc7ab23a8a Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 23 Jan 2018 11:31:26 +0530 Subject: [PATCH 56/88] use normalize='true-if-samples' for internal use --- sklearn/metrics/classification.py | 4 ++-- sklearn/metrics/tests/test_classification.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 34696fbca4c60..a9bc1a2ccd63f 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -373,7 +373,7 @@ class labels [2]_. def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, - average='samples', normalize=True, + average='samples', normalize='true-if-samples', sample_weight=None): """Jaccard similarity coefficient score @@ -515,7 +515,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, assume_unique=True)]) if y_type.startswith('multilabel'): - if average != 'samples' and not normalize: + if average != 'samples' and normalize != 'true-if-samples': raise ValueError("'normalize' is only meaningful with " "`average='samples'`, got `average='%s'`." % average) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index ce28406d214a1..d12f17edbe7b5 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1051,6 +1051,8 @@ def test_multilabel_jaccard_similarity_score(): "`average='macro'`.") assert_raise_message(ValueError, msg1, jaccard_similarity_score, y_true, y_pred, average='macro', normalize=False) + assert_raise_message(ValueError, msg1, jaccard_similarity_score, y_true, + y_pred, average='macro', normalize=True) msg2 = 'All labels must be in [0, n, labels). Got 4 > 2' assert_raise_message(ValueError, msg2, jaccard_similarity_score, y_true, y_pred, labels=[4]) From 90e0c5c5e5709f3249db01c367ad7e3030adfbd7 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Thu, 25 Jan 2018 05:13:16 +0530 Subject: [PATCH 57/88] address Joel's comments all, but one --- doc/modules/model_evaluation.rst | 5 +- doc/whats_new/v0.20.rst | 4 +- sklearn/metrics/classification.py | 97 ++++++++++++++-------------- sklearn/metrics/tests/test_common.py | 3 +- 4 files changed, 55 insertions(+), 54 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index cebd1b45dac31..4bd65bd87176e 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -70,7 +70,7 @@ Scoring Function 'neg_log_loss' :func:`metrics.log_loss` requires ``predict_proba`` support 'precision' etc. :func:`metrics.precision_score` suffixes apply as with 'f1' 'recall' etc. :func:`metrics.recall_score` suffixes apply as with 'f1' -'jaccard' etc. :func:`metric.jaccard_similarity_score` suffixes apply as with 'f1' +'jaccard' etc. :func:`metrics.jaccard_similarity_score` suffixes apply as with 'f1' 'roc_auc' :func:`metrics.roc_auc_score` **Clustering** @@ -683,7 +683,8 @@ In the multilabel case with binary label indicators: :: >>> jaccard_similarity_score(y_true, y_pred, normalize=False) 1.5 -Multiclass problems are binarized: :: +Multiclass problems are binarized and treated like the corresponding +multilabel problem: :: >>> y_pred = [0, 2, 1, 3] >>> y_true = [0, 1, 2, 3] diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index b487b4473d4be..2eab043ecf3c5 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -151,8 +151,8 @@ Metrics - :func:`metrics.jaccard_similarity_score` now accepts ``average`` argument like :func:`metrics.precision_recall_fscore_support` as a naively set-wise - measure applying only to binary, multilabel targets and binarizing - multiclass input. + measure applying only to binary, multilabel targets and it binarizes + multiclass input and treats them like the corresponding multilabel problem. :issue:`10083` by :user:`Gaurav Dhingra `. Linear, kernelized and related models diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index a9bc1a2ccd63f..26a247475f9be 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -533,43 +533,44 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, y_true = y_true[:, labels[:n_labels]] y_pred = y_pred[:, labels[:n_labels]] - with np.errstate(divide='ignore', invalid='ignore'): + class_weight = None + + if average == 'samples': + sum_axis = 1 + class_weight = sample_weight + weights = None + elif average == 'micro': + sum_axis = 1 class_weight = None - - if average == 'samples': - sum_axis = 1 - class_weight = sample_weight - weights = None - elif average == 'micro': - sum_axis = 1 - class_weight = None - weights = sample_weight - elif average == 'macro': - sum_axis = 0 - class_weight = None - weights = sample_weight - elif average == 'weighted': - sum_axis = 0 - weights = sample_weight - if sample_weight is None: - class_weight = y_true.toarray().sum(axis=0) - else: - class_weight = (y_true.toarray().T).dot(sample_weight) - if class_weight.sum() == 0: - return 0 + weights = sample_weight + elif average == 'macro': + sum_axis = 0 + class_weight = None + weights = sample_weight + elif average == 'weighted': + sum_axis = 0 + weights = sample_weight + if sample_weight is None: + class_weight = y_true.toarray().sum(axis=0) else: - sum_axis = 0 - weights = sample_weight - - pred_or_true = count_nonzero(y_true + y_pred, axis=sum_axis, - sample_weight=weights) - pred_and_true = count_nonzero(y_true.multiply(y_pred), - axis=sum_axis, - sample_weight=weights) - if average == 'micro': - pred_or_true = np.array([pred_or_true.sum()]) - pred_and_true = np.array([pred_and_true.sum()]) + class_weight = (y_true.toarray().T).dot(sample_weight) + if class_weight.sum() == 0: + return 0 + else: + # average=None + sum_axis = 0 + weights = sample_weight + + pred_or_true = count_nonzero(y_true + y_pred, axis=sum_axis, + sample_weight=weights) + pred_and_true = count_nonzero(y_true.multiply(y_pred), + axis=sum_axis, + sample_weight=weights) + if average == 'micro': + pred_or_true = np.array([pred_or_true.sum()]) + pred_and_true = np.array([pred_and_true.sum()]) + with np.errstate(divide='ignore', invalid='ignore'): score = pred_and_true / pred_or_true score[pred_or_true == 0.0] = 1.0 @@ -581,7 +582,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, score = score.sum() else: score = np.average(score, weights=class_weight) - return score + return score elif average == 'samples': raise ValueError("Sample-based jaccard similarity score is " "not meaningful outside multilabel " @@ -629,25 +630,25 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, else: pred_sum = np.zeros(len(labels)) - with np.errstate(divide='ignore'): + class_weight = None + if average == 'micro' or average == 'binary': + tp_sum = np.array([tp_sum.sum()]) + true_sum = np.array([true_sum.sum()]) + pred_sum = np.array([pred_sum.sum()]) class_weight = None - if average == 'micro' or average == 'binary': - tp_sum = np.array([tp_sum.sum()]) - true_sum = np.array([true_sum.sum()]) - pred_sum = np.array([pred_sum.sum()]) - class_weight = None - elif average == 'macro': - class_weight = None - elif average == 'weighted': - class_weight = true_sum - if class_weight.sum() == 0: - return 0 + elif average == 'macro': + class_weight = None + elif average == 'weighted': + class_weight = true_sum + if class_weight.sum() == 0: + return 0 + with np.errstate(divide='ignore', invalid='ignore'): score = tp_sum / (true_sum + pred_sum - tp_sum) if average is not None: score = np.average(score, weights=class_weight) - return score + return score def matthews_corrcoef(y_true, y_pred, sample_weight=None): diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 11951076d92db..b18cc76c7078d 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -435,8 +435,7 @@ def test_symmetry(): # Symmetric metric for name in SYMMETRIC_METRICS: metric = ALL_METRICS[name] - if (name in METRIC_UNDEFINED_BINARY and - name in METRIC_UNDEFINED_BINARY): + if name in METRIC_UNDEFINED_BINARY: assert_almost_equal(metric(y_true_bin, y_pred_bin), metric(y_pred_bin, y_true_bin), err_msg="%s is not symmetric" % name) From 8ff62bc76d5f80b1ab35c710d2e0389193efa123 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 30 Jan 2018 04:54:40 +0530 Subject: [PATCH 58/88] add jaccard similarity score to scorers --- sklearn/metrics/scorer.py | 8 ++++++-- sklearn/metrics/tests/test_score_objects.py | 17 +++++++++++++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 05231826a8998..67df2b5482808 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -27,7 +27,8 @@ mean_squared_error, mean_squared_log_error, accuracy_score, f1_score, roc_auc_score, average_precision_score, precision_score, recall_score, log_loss, balanced_accuracy_score, - explained_variance_score, brier_score_loss) + explained_variance_score, brier_score_loss, + jaccard_similarity_score) from .cluster import adjusted_rand_score from .cluster import homogeneity_score @@ -41,6 +42,7 @@ from ..utils.multiclass import type_of_target from ..externals import six from ..base import is_regressor +from functools import partial class _BaseScorer(six.with_metaclass(ABCMeta, object)): @@ -501,6 +503,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, accuracy_scorer = make_scorer(accuracy_score) f1_scorer = make_scorer(f1_score) balanced_accuracy_scorer = make_scorer(balanced_accuracy_score) +jaccard_similarity_scorer = make_scorer(jaccard_similarity_score) # Score functions that need decision values roc_auc_scorer = make_scorer(roc_auc_score, greater_is_better=True, @@ -561,7 +564,8 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, for name, metric in [('precision', precision_score), - ('recall', recall_score), ('f1', f1_score)]: + ('recall', recall_score), ('f1', f1_score), + ('jaccard_similarity', partial(jaccard_similarity_score, average='binary'))]: SCORERS[name] = make_scorer(metric) for average in ['macro', 'micro', 'samples', 'weighted']: qualified_name = '{0}_{1}'.format(name, average) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 6af6418635d59..6e2de82a8b503 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -19,7 +19,8 @@ from sklearn.base import BaseEstimator from sklearn.metrics import (f1_score, r2_score, roc_auc_score, fbeta_score, - log_loss, precision_score, recall_score) + log_loss, precision_score, recall_score, + jaccard_similarity_score) from sklearn.metrics import cluster as cluster_module from sklearn.metrics.scorer import (check_scoring, _PredictScorer, _passthrough_scorer) @@ -39,6 +40,10 @@ from sklearn.model_selection import GridSearchCV from sklearn.multiclass import OneVsRestClassifier from sklearn.externals import joblib +from functools import partial + +jaccard_similarity_score = partial(jaccard_similarity_score, average='binary') + REGRESSION_SCORERS = ['explained_variance', 'r2', @@ -52,7 +57,9 @@ 'roc_auc', 'average_precision', 'precision', 'precision_weighted', 'precision_macro', 'precision_micro', 'recall', 'recall_weighted', 'recall_macro', 'recall_micro', - 'neg_log_loss', 'log_loss', 'brier_score_loss'] + 'neg_log_loss', 'log_loss', 'brier_score_loss', 'jaccard_similarity', + 'jaccard_similarity_weighted', 'jaccard_similarity_macro', + 'jaccard_similarity_micro'] # All supervised cluster scorers (They behave like classification metric) CLUSTER_SCORERS = ["adjusted_rand_score", @@ -64,7 +71,8 @@ "normalized_mutual_info_score", "fowlkes_mallows_score"] -MULTILABEL_ONLY_SCORERS = ['precision_samples', 'recall_samples', 'f1_samples'] +MULTILABEL_ONLY_SCORERS = ['precision_samples', 'recall_samples', 'f1_samples', + 'jaccard_similarity_samples'] def _make_estimators(X_train, y_train, y_ml_train): @@ -283,7 +291,8 @@ def test_classification_scores(): clf.fit(X_train, y_train) for prefix, metric in [('f1', f1_score), ('precision', precision_score), - ('recall', recall_score)]: + ('recall', recall_score), + ('jaccard_similarity', jaccard_similarity_score)]: score1 = get_scorer('%s_weighted' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=None, From f5d03d0696cb749095373b816fd5f11774daba6f Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 30 Jan 2018 11:11:36 +0530 Subject: [PATCH 59/88] use make_scorer with average='binary' --- sklearn/metrics/scorer.py | 4 ++-- sklearn/metrics/tests/test_score_objects.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 67df2b5482808..2796302f911fd 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -565,8 +565,8 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, for name, metric in [('precision', precision_score), ('recall', recall_score), ('f1', f1_score), - ('jaccard_similarity', partial(jaccard_similarity_score, average='binary'))]: - SCORERS[name] = make_scorer(metric) + ('jaccard_similarity', jaccard_similarity_score)]: + SCORERS[name] = make_scorer(metric, average='binary') for average in ['macro', 'micro', 'samples', 'weighted']: qualified_name = '{0}_{1}'.format(name, average) SCORERS[qualified_name] = make_scorer(metric, pos_label=None, diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 6e2de82a8b503..f7725213d4549 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -42,8 +42,6 @@ from sklearn.externals import joblib from functools import partial -jaccard_similarity_score = partial(jaccard_similarity_score, average='binary') - REGRESSION_SCORERS = ['explained_variance', 'r2', @@ -292,7 +290,9 @@ def test_classification_scores(): for prefix, metric in [('f1', f1_score), ('precision', precision_score), ('recall', recall_score), - ('jaccard_similarity', jaccard_similarity_score)]: + ('jaccard_similarity', + partial(jaccard_similarity_score, + average='binary'))]: score1 = get_scorer('%s_weighted' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=None, From c3279ff63afe2919b1956b02d60a46e3cd03d64d Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 30 Jan 2018 11:13:09 +0530 Subject: [PATCH 60/88] fix import and pep8 --- sklearn/metrics/scorer.py | 1 - sklearn/metrics/tests/test_score_objects.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 2796302f911fd..32ad695027030 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -42,7 +42,6 @@ from ..utils.multiclass import type_of_target from ..externals import six from ..base import is_regressor -from functools import partial class _BaseScorer(six.with_metaclass(ABCMeta, object)): diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index f7725213d4549..03f7233aa09af 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -55,9 +55,9 @@ 'roc_auc', 'average_precision', 'precision', 'precision_weighted', 'precision_macro', 'precision_micro', 'recall', 'recall_weighted', 'recall_macro', 'recall_micro', - 'neg_log_loss', 'log_loss', 'brier_score_loss', 'jaccard_similarity', - 'jaccard_similarity_weighted', 'jaccard_similarity_macro', - 'jaccard_similarity_micro'] + 'neg_log_loss', 'log_loss', 'brier_score_loss', + 'jaccard_similarity', 'jaccard_similarity_weighted', + 'jaccard_similarity_macro', 'jaccard_similarity_micro'] # All supervised cluster scorers (They behave like classification metric) CLUSTER_SCORERS = ["adjusted_rand_score", From a6736831b08791db2e9270c184872a1eab0505ba Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 30 Jan 2018 11:36:00 +0530 Subject: [PATCH 61/88] fix doc --- doc/modules/model_evaluation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 4bd65bd87176e..21aa6c2a99da1 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -105,7 +105,7 @@ Usage examples: >>> model = svm.SVC() >>> cross_val_score(model, X, y, scoring='wrong_choice') Traceback (most recent call last): - ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'brier_score_loss', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] + ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'brier_score_loss', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'jaccard_similarity', 'jaccard_similarity_macro', 'jaccard_similarity_micro', 'jaccard_similarity_samples', 'jaccard_similarity_weighted', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] .. note:: From 0b507aa2756132c63670acf6539f5dd7370c6f94 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 30 Jan 2018 13:41:31 +0530 Subject: [PATCH 62/88] use 'jaccard' instead of 'jaccard_similarity' --- doc/modules/model_evaluation.rst | 2 +- sklearn/metrics/scorer.py | 2 +- sklearn/metrics/tests/test_score_objects.py | 11 +++++------ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 21aa6c2a99da1..55d442c601baa 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -105,7 +105,7 @@ Usage examples: >>> model = svm.SVC() >>> cross_val_score(model, X, y, scoring='wrong_choice') Traceback (most recent call last): - ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'brier_score_loss', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'jaccard_similarity', 'jaccard_similarity_macro', 'jaccard_similarity_micro', 'jaccard_similarity_samples', 'jaccard_similarity_weighted', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] + ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'brier_score_loss', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_weighted', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score'] .. note:: diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 32ad695027030..c29ecb58f0edd 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -564,7 +564,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, for name, metric in [('precision', precision_score), ('recall', recall_score), ('f1', f1_score), - ('jaccard_similarity', jaccard_similarity_score)]: + ('jaccard', jaccard_similarity_score)]: SCORERS[name] = make_scorer(metric, average='binary') for average in ['macro', 'micro', 'samples', 'weighted']: qualified_name = '{0}_{1}'.format(name, average) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 03f7233aa09af..5b31d3e60edce 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -56,8 +56,8 @@ 'precision_weighted', 'precision_macro', 'precision_micro', 'recall', 'recall_weighted', 'recall_macro', 'recall_micro', 'neg_log_loss', 'log_loss', 'brier_score_loss', - 'jaccard_similarity', 'jaccard_similarity_weighted', - 'jaccard_similarity_macro', 'jaccard_similarity_micro'] + 'jaccard', 'jaccard_weighted', 'jaccard_macro', + 'jaccard_micro'] # All supervised cluster scorers (They behave like classification metric) CLUSTER_SCORERS = ["adjusted_rand_score", @@ -70,7 +70,7 @@ "fowlkes_mallows_score"] MULTILABEL_ONLY_SCORERS = ['precision_samples', 'recall_samples', 'f1_samples', - 'jaccard_similarity_samples'] + 'jaccard_samples'] def _make_estimators(X_train, y_train, y_ml_train): @@ -290,9 +290,8 @@ def test_classification_scores(): for prefix, metric in [('f1', f1_score), ('precision', precision_score), ('recall', recall_score), - ('jaccard_similarity', - partial(jaccard_similarity_score, - average='binary'))]: + ('jaccard', partial(jaccard_similarity_score, + average='binary'))]: score1 = get_scorer('%s_weighted' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=None, From 5bb690d0e2ef7ac5cd80339fbf183b46d2ca3a08 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 30 Jan 2018 14:47:48 +0530 Subject: [PATCH 63/88] collect common validation code between prfs and jaccard --- sklearn/metrics/classification.py | 37 ++++++++++++++++++------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 26a247475f9be..7ad81a1222f53 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -478,15 +478,9 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, ... np.array([0, 1, 2, 3]), average='macro') 0.5 """ - - average_options = (None, 'micro', 'macro', 'weighted', 'samples') - if average not in average_options and average != 'binary': - raise ValueError("average has to be one of " + str(average_options)) - - # Compute accuracy for each possible representation - y_type, y_true, y_pred = _check_targets(y_true, y_pred) - check_consistent_length(y_true, y_pred, sample_weight) - present_labels = unique_labels(y_true, y_pred) + validate_average(average) + y_type, y_true, y_pred, present_labels = validate_input(y_true, y_pred, + sample_weight) if average == 'binary': if y_type == 'binary': @@ -1068,6 +1062,20 @@ def _prf_divide(numerator, denominator, metric, modifier, average, warn_for): return result +def validate_average(average): + average_options = (None, 'micro', 'macro', 'weighted', 'samples') + if average not in average_options and average != 'binary': + raise ValueError('average has to be one of ' + + str(average_options)) + + +def validate_input(y_true, y_pred, sample_weight): + y_type, y_true, y_pred = _check_targets(y_true, y_pred) + check_consistent_length(y_true, y_pred, sample_weight) + present_labels = unique_labels(y_true, y_pred) + return y_type, y_true, y_pred, present_labels + + def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, pos_label=1, average=None, warn_for=('precision', 'recall', @@ -1211,16 +1219,13 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, array([2, 2, 2])) """ - average_options = (None, 'micro', 'macro', 'weighted', 'samples') - if average not in average_options and average != 'binary': - raise ValueError('average has to be one of ' + - str(average_options)) + validate_average(average) + if beta <= 0: raise ValueError("beta should be >0 in the F-beta score") - y_type, y_true, y_pred = _check_targets(y_true, y_pred) - check_consistent_length(y_true, y_pred, sample_weight) - present_labels = unique_labels(y_true, y_pred) + y_type, y_true, y_pred, present_labels = validate_input(y_true, y_pred, + sample_weight) if average == 'binary': if y_type == 'binary': From f1e1b698a38b83aa7aa6d7c652f97a1a70594b07 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 30 Jan 2018 17:00:59 +0530 Subject: [PATCH 64/88] update docstring and name --- sklearn/metrics/classification.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 7ad81a1222f53..f4b1c315381c4 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -478,9 +478,9 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, ... np.array([0, 1, 2, 3]), average='macro') 0.5 """ - validate_average(average) - y_type, y_true, y_pred, present_labels = validate_input(y_true, y_pred, - sample_weight) + _validate_prfsj_average(average) + y_type, y_true, y_pred, present_labels = _validate_prfsj_input(y_true, + y_pred, sample_weight) if average == 'binary': if y_type == 'binary': @@ -1062,14 +1062,22 @@ def _prf_divide(numerator, denominator, metric, modifier, average, warn_for): return result -def validate_average(average): +def _validate_prfsj_average(average): + """Validate ``average`` as a valid average option for + functions :func:`metrics.precision_recall_fscore_support` and + :func:`metrics.jaccard_similarity_score`. + """ average_options = (None, 'micro', 'macro', 'weighted', 'samples') if average not in average_options and average != 'binary': raise ValueError('average has to be one of ' + str(average_options)) -def validate_input(y_true, y_pred, sample_weight): +def _validate_prfsj_input(y_true, y_pred, sample_weight): + """Validate input for consistent length and type for functions + :func:`metrics.precision_recall_fscore_support` and + :func:`metrics.jaccard_similarity_score`. + """ y_type, y_true, y_pred = _check_targets(y_true, y_pred) check_consistent_length(y_true, y_pred, sample_weight) present_labels = unique_labels(y_true, y_pred) @@ -1219,13 +1227,13 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, array([2, 2, 2])) """ - validate_average(average) + _validate_prfsj_average(average) if beta <= 0: raise ValueError("beta should be >0 in the F-beta score") - y_type, y_true, y_pred, present_labels = validate_input(y_true, y_pred, - sample_weight) + y_type, y_true, y_pred, present_labels = _validate_prfsj_input(y_true, + y_pred, sample_weight) if average == 'binary': if y_type == 'binary': From 9606d52443595f1a518e31f745b262b7994a5646 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 30 Jan 2018 17:31:04 +0530 Subject: [PATCH 65/88] fix pep8 --- sklearn/metrics/classification.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index f4b1c315381c4..27bb2a56541ef 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -479,9 +479,8 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, 0.5 """ _validate_prfsj_average(average) - y_type, y_true, y_pred, present_labels = _validate_prfsj_input(y_true, - y_pred, sample_weight) - + y_type, y_true, y_pred, present_labels = _validate_input(y_true, y_pred, + sample_weight) if average == 'binary': if y_type == 'binary': if pos_label not in present_labels: @@ -1073,7 +1072,7 @@ def _validate_prfsj_average(average): str(average_options)) -def _validate_prfsj_input(y_true, y_pred, sample_weight): +def _validate_input(y_true, y_pred, sample_weight): """Validate input for consistent length and type for functions :func:`metrics.precision_recall_fscore_support` and :func:`metrics.jaccard_similarity_score`. @@ -1228,13 +1227,10 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, """ _validate_prfsj_average(average) - if beta <= 0: raise ValueError("beta should be >0 in the F-beta score") - - y_type, y_true, y_pred, present_labels = _validate_prfsj_input(y_true, - y_pred, sample_weight) - + y_type, y_true, y_pred, present_labels = _validate_input(y_true, y_pred, + sample_weight) if average == 'binary': if y_type == 'binary': if pos_label not in present_labels: From e1d7e288e9b1b94007d822557abeee2393c6893f Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 31 Jan 2018 10:36:24 +0530 Subject: [PATCH 66/88] a little more refactoring --- sklearn/metrics/classification.py | 32 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 27bb2a56541ef..117fb410ff26b 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -513,14 +513,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, "`average='samples'`, got `average='%s'`." % average) - if not np.all(labels == present_labels): - if np.max(labels) > np.max(present_labels): - raise ValueError('All labels must be in [0, n, labels). ' - 'Got %d > %d' % - (np.max(labels), np.max(present_labels))) - if np.min(labels) < 0: - raise ValueError('All labels must be in [0, n, labels). ' - 'Got %d < 0' % np.min(labels)) + _validate_multilabels(labels, present_labels) if n_labels is not None: y_true = y_true[:, labels[:n_labels]] @@ -1083,6 +1076,18 @@ def _validate_input(y_true, y_pred, sample_weight): return y_type, y_true, y_pred, present_labels +def _validate_multilabels(labels, present_labels): + """All labels are index integers for multilabel.""" + if not np.all(labels == present_labels): + if np.max(labels) > np.max(present_labels): + raise ValueError('All labels must be in [0, n, labels). ' + 'Got %d > %d' % + (np.max(labels), np.max(present_labels))) + if np.min(labels) < 0: + raise ValueError('All labels must be in [0, n, labels). ' + 'Got %d < 0' % np.min(labels)) + + def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, pos_label=1, average=None, warn_for=('precision', 'recall', @@ -1263,16 +1268,7 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, if y_type.startswith('multilabel'): sum_axis = 1 if average == 'samples' else 0 - # All labels are index integers for multilabel. - # Select labels: - if not np.all(labels == present_labels): - if np.max(labels) > np.max(present_labels): - raise ValueError('All labels must be in [0, n labels). ' - 'Got %d > %d' % - (np.max(labels), np.max(present_labels))) - if np.min(labels) < 0: - raise ValueError('All labels must be in [0, n labels). ' - 'Got %d < 0' % np.min(labels)) + _validate_multilabels(labels, present_labels) if n_labels is not None: y_true = y_true[:, labels[:n_labels]] From a2a09da81b435ce02d6917633f9ca067d2268e31 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 31 Jan 2018 18:17:01 +0530 Subject: [PATCH 67/88] change answer for zeroed multiclass and binary averaging --- sklearn/metrics/classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 117fb410ff26b..ad949ab42143d 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -485,7 +485,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, if y_type == 'binary': if pos_label not in present_labels: if len(present_labels) < 2: - return 1. + return 0. else: raise ValueError("pos_label=%r is not a valid label: " "%r" % (pos_label, present_labels)) @@ -558,7 +558,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, with np.errstate(divide='ignore', invalid='ignore'): score = pred_and_true / pred_or_true - score[pred_or_true == 0.0] = 1.0 + score[pred_or_true == 0.0] = 0.0 if average is not None: if not normalize: From c873dcecb49b49aff233a58b84b800fe7f7dcfde Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Tue, 6 Feb 2018 16:15:23 +0530 Subject: [PATCH 68/88] fix for edge cases --- sklearn/metrics/classification.py | 5 +++-- sklearn/metrics/tests/test_classification.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index ad949ab42143d..56e45291c061c 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -579,6 +579,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, y_true = le.transform(y_true) y_pred = le.transform(y_pred) + Labels = labels labels = le.transform(labels)[:n_labels] # use 'np.in1d' instead of 'np.isin' (unavailable in version < 1.13.0) indices = np.where(np.in1d(y_true, labels, assume_unique=False, @@ -607,12 +608,12 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, tp_sum = np.zeros(len(labels)) if len(y_true): true_sum = np.bincount(y_true, weights=sample_weight, - minlength=len(labels))[labels] + minlength=len(Labels))[labels] else: true_sum = np.zeros(len(labels)) if len(y_pred): pred_sum = np.bincount(y_pred, weights=sample_weight, - minlength=len(labels))[labels] + minlength=len(Labels))[labels] else: pred_sum = np.zeros(len(labels)) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index d12f17edbe7b5..0bd93ece7fa43 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1092,6 +1092,19 @@ def test_multiclass_jaccard_similarity_score(): def test_average_binary_jaccard_similarity_score(): + # tp=0, fp=0, fn=1, tn=0 + y_true = np.array([1]) + y_pred = np.array([0]) + assert_equal(jaccard_similarity_score(y_true, y_pred, + average='binary'), 0.) + # tp=0, fp=0, fn=0, tn=1 + y_true = np.array([0]) + y_pred = np.array([0]) + assert_equal(jaccard_similarity_score(y_true, y_pred, + average='binary'), 0.) + # tp=1, fp=0, fn=0, tn=0 (pos_label=0) + assert_equal(jaccard_similarity_score(y_true, y_pred, pos_label=0, + average='binary'), 1.) y_true = np.array([1, 0, 1, 1, 0]) y_pred = np.array([1, 0, 1, 1, 1]) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, From 4978dfd19163330e0338eb2f13b6c1e8554a6478 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 7 Feb 2018 09:57:07 +0530 Subject: [PATCH 69/88] update (refactoring) function name and add doc example --- sklearn/metrics/classification.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 56e45291c061c..c19e723221b7b 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -447,6 +447,11 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, -------- accuracy_score, hamming_loss, zero_one_loss + Notes + ----- + :func:`jaccard_similarity_score` may be a poor metric if there are no + positives for some samples or classes. + References ---------- .. [1] `Wikipedia entry for the Jaccard index @@ -472,13 +477,21 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, >>> jaccard_similarity_score(y_true, y_pred, average=None) array([ 0., 0., 1.]) + It may be a poor indicator if there are no positives for some samples + or classes: + + >>> jaccard_similarity_score(np.array([0]), np.array([0]), + ... average='binary') + 0.0 + In the multiclass case: >>> jaccard_similarity_score(np.array([0, 2, 1, 3]), ... np.array([0, 1, 2, 3]), average='macro') 0.5 + """ - _validate_prfsj_average(average) + _validate_set_wise_average(average) y_type, y_true, y_pred, present_labels = _validate_input(y_true, y_pred, sample_weight) if average == 'binary': @@ -1055,7 +1068,7 @@ def _prf_divide(numerator, denominator, metric, modifier, average, warn_for): return result -def _validate_prfsj_average(average): +def _validate_set_wise_average(average): """Validate ``average`` as a valid average option for functions :func:`metrics.precision_recall_fscore_support` and :func:`metrics.jaccard_similarity_score`. @@ -1232,7 +1245,7 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, array([2, 2, 2])) """ - _validate_prfsj_average(average) + _validate_set_wise_average(average) if beta <= 0: raise ValueError("beta should be >0 in the F-beta score") y_type, y_true, y_pred, present_labels = _validate_input(y_true, y_pred, From 4fe8a1f94e1edaca0886cf2f1e34b04435f9e9a7 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Wed, 7 Nov 2018 00:22:36 +1100 Subject: [PATCH 70/88] Fix merge error --- sklearn/metrics/tests/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index fa9441c6e2bc0..11946573654ce 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -905,7 +905,7 @@ def test_normalize_option_multiclass_classification(name): metrics = ALL_METRICS[name] measure = metrics(y_true, y_pred, normalize=True) assert_array_less(-1.0 * measure, 0, - msg="We failed to test correctly the normalize option") + err_msg="We failed to test correctly the normalize option") assert_allclose(metrics(y_true, y_pred, normalize=False) / n_samples, measure) From 2c9b3562bdb165552694857883290fd787779600 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 13 Nov 2018 12:59:35 +1100 Subject: [PATCH 71/88] WIP --- sklearn/metrics/classification.py | 165 +++++---------------------- sklearn/metrics/tests/test_common.py | 3 + 2 files changed, 33 insertions(+), 135 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 2a5dc763ca696..197db3af81db6 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -717,144 +717,39 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, "average != 'binary' (got %r). You may use " "labels=[pos_label] to specify a single positive class." % (pos_label, average), UserWarning) + if average != 'samples' and normalize != 'true-if-samples': + raise ValueError("'normalize' is only meaningful with " + "`average='samples'`, got `average='%s'`." + % average) - if labels is None: - labels = present_labels - n_labels = None - else: - n_labels = len(labels) - labels = np.hstack([labels, np.setdiff1d(present_labels, labels, - assume_unique=True)]) - - if y_type.startswith('multilabel'): - if average != 'samples' and normalize != 'true-if-samples': - raise ValueError("'normalize' is only meaningful with " - "`average='samples'`, got `average='%s'`." - % average) - - _validate_multilabels(labels, present_labels) - - if n_labels is not None: - y_true = y_true[:, labels[:n_labels]] - y_pred = y_pred[:, labels[:n_labels]] + samplewise = average == 'samples' + MCM = multilabel_confusion_matrix(y_true, y_pred, + sample_weight=sample_weight, + labels=labels, samplewise=samplewise) + numerator = MCM[:, 1, 1] + denominator = MCM[:, 1, 1] + MCM[:, 0, 1] + MCM[:, 1, 0] - class_weight = None - - if average == 'samples': - sum_axis = 1 - class_weight = sample_weight - weights = None - elif average == 'micro': - sum_axis = 1 - class_weight = None - weights = sample_weight - elif average == 'macro': - sum_axis = 0 - class_weight = None - weights = sample_weight - elif average == 'weighted': - sum_axis = 0 - weights = sample_weight - if sample_weight is None: - class_weight = y_true.toarray().sum(axis=0) - else: - class_weight = (y_true.toarray().T).dot(sample_weight) - if class_weight.sum() == 0: - return 0 - else: - # average=None - sum_axis = 0 - weights = sample_weight - - pred_or_true = count_nonzero(y_true + y_pred, axis=sum_axis, - sample_weight=weights) - pred_and_true = count_nonzero(y_true.multiply(y_pred), - axis=sum_axis, - sample_weight=weights) - if average == 'micro': - pred_or_true = np.array([pred_or_true.sum()]) - pred_and_true = np.array([pred_and_true.sum()]) - - with np.errstate(divide='ignore', invalid='ignore'): - score = pred_and_true / pred_or_true - score[pred_or_true == 0.0] = 0.0 - - if average is not None: - if not normalize: - if class_weight is not None: - score = np.dot(score, class_weight) - else: - score = score.sum() - else: - score = np.average(score, weights=class_weight) - return score - elif average == 'samples': - raise ValueError("Sample-based jaccard similarity score is " - "not meaningful outside multilabel " - "classification. See the accuracy_score instead.") + if average == 'micro': + numerator = np.array([numerator.sum()]) + denominator = np.array([denominator.sum()]) + + if not np.all(denominator): + # TODO: warn that 0 will be returned + denominator[denominator == 0] == 1 + + jaccard = numerator / denominator + if average is None: + return jaccard + if not normalize: + return (jaccard * (1 if sample_weight is None + else sample_weight)).sum() + if average == 'weighted': + weights = MCM[:, 1, 0] + MCM[:, 1, 1] + elif average == 'samples' and sample_weight is not None: + weights = sample_weight else: - le = LabelEncoder() - le.fit(labels) - y_true = le.transform(y_true) - y_pred = le.transform(y_pred) - - Labels = labels - labels = le.transform(labels)[:n_labels] - # use 'np.in1d' instead of 'np.isin' (unavailable in version < 1.13.0) - indices = np.where(np.in1d(y_true, labels, assume_unique=False, - invert=False) - + np.in1d(y_pred, labels, assume_unique=False, - invert=False))[0] - - y_true = y_true[indices] - y_pred = y_pred[indices] - tp = y_true == y_pred - tp_bins = y_true[tp] - if sample_weight is not None: - sample_weight = np.array(sample_weight)[indices] - tp_bins_weights = np.asarray(sample_weight)[tp] - else: - tp_bins_weights = None - - if len(tp_bins): - tp_sum = np.bincount(tp_bins, weights=tp_bins_weights, - minlength=len(labels))[labels] - true_sum = np.bincount(y_true, weights=sample_weight, - minlength=len(labels))[labels] - pred_sum = np.bincount(y_pred, weights=sample_weight, - minlength=len(labels))[labels] - else: - tp_sum = np.zeros(len(labels)) - if len(y_true): - true_sum = np.bincount(y_true, weights=sample_weight, - minlength=len(Labels))[labels] - else: - true_sum = np.zeros(len(labels)) - if len(y_pred): - pred_sum = np.bincount(y_pred, weights=sample_weight, - minlength=len(Labels))[labels] - else: - pred_sum = np.zeros(len(labels)) - - class_weight = None - if average == 'micro' or average == 'binary': - tp_sum = np.array([tp_sum.sum()]) - true_sum = np.array([true_sum.sum()]) - pred_sum = np.array([pred_sum.sum()]) - class_weight = None - elif average == 'macro': - class_weight = None - elif average == 'weighted': - class_weight = true_sum - if class_weight.sum() == 0: - return 0 - - with np.errstate(divide='ignore', invalid='ignore'): - score = tp_sum / (true_sum + pred_sum - tp_sum) - - if average is not None: - score = np.average(score, weights=class_weight) - return score + weights = None + return np.average(jaccard, weights=weights) def matthews_corrcoef(y_true, y_pred, sample_weight=None): diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 11946573654ce..8895d05a679f5 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -165,6 +165,8 @@ "samples_f2_score": partial(fbeta_score, average="samples", beta=2), "samples_precision_score": partial(precision_score, average="samples"), "samples_recall_score": partial(recall_score, average="samples"), + "samples_jaccard_similarity_score": + partial(jaccard_similarity_score, average="macro"), "cohen_kappa_score": cohen_kappa_score, @@ -413,6 +415,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "samples_f0.5_score", "samples_f1_score", "samples_f2_score", "samples_precision_score", "samples_recall_score", + "samples_jaccard_similarity_score", } # Regression metrics with "multioutput-continuous" format support From 0e9e12d5d84665eb78ecd2bbe862210299579917 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 18 Nov 2018 18:00:18 +1100 Subject: [PATCH 72/88] Make tests pass --- sklearn/metrics/classification.py | 8 ++++++-- sklearn/metrics/tests/test_classification.py | 12 ++++++------ sklearn/metrics/tests/test_common.py | 7 +++---- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 197db3af81db6..36d10182ba9a4 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -638,7 +638,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, normalize : bool, optional (default=True) If ``False``, return the sum of the Jaccard similarity coefficient over the sample set. Otherwise, return the average of Jaccard - similarity coefficient. ``normalize`` is only meaningful when + similarity coefficient. ``normalize`` is only applicable when ``average='samples'``. sample_weight : array-like of shape = [n_samples], optional @@ -735,9 +735,10 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, if not np.all(denominator): # TODO: warn that 0 will be returned - denominator[denominator == 0] == 1 + denominator[denominator == 0] = 1 jaccard = numerator / denominator + print(numerator, denominator) if average is None: return jaccard if not normalize: @@ -745,6 +746,9 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, else sample_weight)).sum() if average == 'weighted': weights = MCM[:, 1, 0] + MCM[:, 1, 1] + if not np.any(weights): + # numerator is 0, and warning should have already been issued + weights = None elif average == 'samples' and sample_weight is not None: weights = sample_weight else: diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 2a7051c21398c..bbeed9f08e877 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1151,8 +1151,8 @@ def test_jaccard_similarity_score(): "another average setting.") assert_raise_message(ValueError, msg2, jaccard_similarity_score, y_true, y_pred, average='binary') - msg3 = ("Sample-based jaccard similarity score is not meaningful outside " - "multilabel classification. See the accuracy_score instead.") + msg3 = ("Samplewise metrics are not available outside of multilabel " + "classification.") assert_raise_message(ValueError, msg3, jaccard_similarity_score, y_true, y_pred, average='samples') assert_raise_message(ValueError, msg3, jaccard_similarity_score, y_true, @@ -1221,10 +1221,10 @@ def test_multilabel_jaccard_similarity_score(): y_pred, average='macro', normalize=False) assert_raise_message(ValueError, msg1, jaccard_similarity_score, y_true, y_pred, average='macro', normalize=True) - msg2 = 'All labels must be in [0, n, labels). Got 4 > 2' + msg2 = 'Got 4 > 2' assert_raise_message(ValueError, msg2, jaccard_similarity_score, y_true, y_pred, labels=[4]) - msg3 = 'All labels must be in [0, n, labels). Got -1 < 0' + msg3 = 'Got -1 < 0' assert_raise_message(ValueError, msg3, jaccard_similarity_score, y_true, y_pred, labels=[-1]) @@ -1253,8 +1253,8 @@ def test_multiclass_jaccard_similarity_score(): bin_jaccard_similarity_score(average=average, labels=b_label)) - y_true = np.array([]) - y_pred = np.array([]) + y_true = np.array([[0, 0], [0, 0], [0, 0]]) + y_pred = np.array([[0, 0], [0, 0], [0, 0]]) assert_equal(jaccard_similarity_score(y_true, y_pred, average='weighted'), 0.) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 8895d05a679f5..44249cb7c0581 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -433,6 +433,8 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "zero_one_loss", "unnormalized_zero_one_loss", "micro_jaccard_similarity_score", "macro_jaccard_similarity_score", + "binary_jaccard_similarity_score", + "samples_jaccard_similarity_score", "f1_score", "micro_f1_score", "macro_f1_score", "weighted_recall_score", @@ -440,8 +442,6 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): "micro_f0.5_score", "micro_f1_score", "micro_f2_score", "micro_precision_score", "micro_recall_score", - "binary_jaccard_similarity_score", - "matthews_corrcoef_score", "mean_absolute_error", "mean_squared_error", "median_absolute_error", "max_error", @@ -888,8 +888,7 @@ def test_normalize_option_binary_classification(name): continue metrics = ALL_METRICS[name] measure = metrics(y_true, y_pred, normalize=True) - assert_greater(measure, 0, - msg="We failed to test correctly the normalize option") + assert measure > 0, "We failed to test correctly the normalize option" assert_allclose(metrics(y_true, y_pred, normalize=False) / n_samples, measure) From 95dfadab6550bc379dc721cdfe0f544233e03d1b Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 18 Nov 2018 20:25:17 +1100 Subject: [PATCH 73/88] Credit in what's new --- doc/whats_new/v0.21.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 08d2c904e7a5a..2162ebe030850 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -79,9 +79,10 @@ Support for Python 3.4 and below has been officially dropped. - |Feature| |Fix| :func:`metrics.jaccard_similarity_score` now accepts ``average`` argument like :func:`metrics.precision_recall_fscore_support` as - a naively set-wise measure applying only to binary, multilabel targets and it - binarizes multiclass input and treats them like the corresponding multilabel - problem. :issue:`10083` by :user:`Gaurav Dhingra `. + a naively set-wise measure applying only to binary, multilabel targets. It + now binarizes multiclass input and treats them like the corresponding + multilabel problem. + :issue:`10083` by :user:`Gaurav Dhingra ` and `Joel Nothman`_. :mod:`sklearn.neighbors` ........................ From 99fdd5cdfa9b46d6bb9148d1a3146c9a248f9f5b Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 18 Nov 2018 20:29:22 +1100 Subject: [PATCH 74/88] Clean merge error in what's new --- doc/whats_new/v0.20.rst | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 348f1a1b7f137..934b3d63f45e0 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -1060,20 +1060,8 @@ Linear, kernelized and related models - |API| :class:`pipeline.FeatureUnion` now supports ``'drop'`` as a transformer to drop features. :issue:`11144` by :user:`Thomas Fan `. - -<<<<<<< HEAD -- Fixed a bug in :func:`metrics.jaccard_similarity_score`, to disallow - sample-wise averaging of multiclass input, since it is redundantly equal to - :func:`metrics.accuracy_score`. - :issue:`10083` by :user:`Gaurav Dhingra `. - -- Fixed a bug in :func:`metrics.precision_precision_recall_fscore_support` - when truncated `range(n_labels)` is passed as value for `labels`. - :issue:`10377` by :user:`Gaurav Dhingra `. -======= :mod:`sklearn.preprocessing` ............................ ->>>>>>> master - |MajorFeature| Expanded :class:`preprocessing.OneHotEncoder` to allow to encode categorical string features as a numeric array using a one-hot (or From 80520e9beb1181ee0a5e51181b0bf6ba973fde37 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 18 Nov 2018 20:30:12 +1100 Subject: [PATCH 75/88] Remove debug print --- sklearn/metrics/classification.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 36d10182ba9a4..3f4e465a24399 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -738,7 +738,6 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, denominator[denominator == 0] = 1 jaccard = numerator / denominator - print(numerator, denominator) if average is None: return jaccard if not normalize: From 4ba98bcf71deff3617775c51739bbfa7b845c627 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 18 Nov 2018 20:30:53 +1100 Subject: [PATCH 76/88] PEP8 --- sklearn/metrics/tests/test_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 44249cb7c0581..cdb2d2caa8e0d 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -907,7 +907,8 @@ def test_normalize_option_multiclass_classification(name): metrics = ALL_METRICS[name] measure = metrics(y_true, y_pred, normalize=True) assert_array_less(-1.0 * measure, 0, - err_msg="We failed to test correctly the normalize option") + err_msg="We failed to test correctly the normalize " + "option") assert_allclose(metrics(y_true, y_pred, normalize=False) / n_samples, measure) From 5b5f04c49c834e30eca37ebb8e5c00e084db6fac Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 18 Nov 2018 22:14:38 +1100 Subject: [PATCH 77/88] new array printing format --- sklearn/metrics/classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 3f4e465a24399..527a973103843 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -681,7 +681,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, >>> jaccard_similarity_score(y_true, y_pred, average='weighted') 0.5 >>> jaccard_similarity_score(y_true, y_pred, average=None) - array([ 0., 0., 1.]) + array([0., 0., 1.]) It may be a poor indicator if there are no positives for some samples or classes: From dfe58f4330314ccbe7d80289324bc224f31616dc Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Mon, 19 Nov 2018 08:49:55 +1100 Subject: [PATCH 78/88] new array printing format #2 --- doc/modules/model_evaluation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 158ee695fdb0f..efbd11e026558 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -704,7 +704,7 @@ multilabel problem: :: >>> jaccard_similarity_score(y_true, y_pred, average='micro') 0.33... >>> jaccard_similarity_score(y_true, y_pred, average=None) - array([ 1., 0., 0., 1.]) + array([1., 0., 0., 1.]) .. _precision_recall_f_measure_metrics: From 7422982f9b18f3a10389702552d541b5e6cf1c63 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Mon, 19 Nov 2018 10:39:20 +1100 Subject: [PATCH 79/88] Revert changes to v0.20.rst --- doc/whats_new/v0.20.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 934b3d63f45e0..165f200d0c848 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -65,6 +65,9 @@ Changelog location in :func:`datasets.fetch_olivetti_faces`. :issue:`12441` by :user:`Jérémie du Boisberranger ` +- |Fix| :func:`datasets.fetch_openml` to retry downloading when reading + from local cache fails. :issue:`12517` by :user:`Thomas Fan `. + :mod:`sklearn.decomposition` ............................ @@ -948,8 +951,6 @@ Support for Python 3.3 has been officially dropped. of parameters in the parameter grid. ``n_iter`` now acts as an upper bound on iterations. :issue:`10982` by :user:`Juliet Lawton ` -Linear, kernelized and related models -======= - |API| Invalid input for :class:`model_selection.ParameterGrid` now raises TypeError. :issue:`10928` by :user:`Solutus Immensus ` @@ -1060,6 +1061,7 @@ Linear, kernelized and related models - |API| :class:`pipeline.FeatureUnion` now supports ``'drop'`` as a transformer to drop features. :issue:`11144` by :user:`Thomas Fan `. + :mod:`sklearn.preprocessing` ............................ From afa7759832b326c646523ad13767bd35ab2e4616 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sat, 24 Nov 2018 23:48:37 +1100 Subject: [PATCH 80/88] Remove changes due to bad merge --- sklearn/metrics/tests/test_common.py | 45 +++++++++++++++------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index cdb2d2caa8e0d..2f7fa9d1f47e2 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -502,9 +502,12 @@ def test_symmetry(): for name in SYMMETRIC_METRICS: metric = ALL_METRICS[name] if name in METRIC_UNDEFINED_BINARY: - assert_allclose(metric(y_true_bin, y_pred_bin), - metric(y_pred_bin, y_true_bin), - err_msg="%s is not symmetric" % name) + if name in MULTILABELS_METRICS: + assert_allclose(metric(y_true_bin, y_pred_bin), + metric(y_pred_bin, y_true_bin), + err_msg="%s is not symmetric" % name) + else: + assert False, "This case is currently unhandled" else: assert_allclose(metric(y_true, y_pred), metric(y_pred, y_true), @@ -877,40 +880,40 @@ def test_raise_value_error_multilabel_sequences(name): @pytest.mark.parametrize('name', METRICS_WITH_NORMALIZE_OPTION) def test_normalize_option_binary_classification(name): + if name in METRIC_UNDEFINED_BINARY: + return # Test in the binary case n_samples = 20 random_state = check_random_state(0) y_true = random_state.randint(0, 2, size=(n_samples, )) y_pred = random_state.randint(0, 2, size=(n_samples, )) - for name in METRICS_WITH_NORMALIZE_OPTION: - if name in METRIC_UNDEFINED_BINARY: - continue - metrics = ALL_METRICS[name] - measure = metrics(y_true, y_pred, normalize=True) - assert measure > 0, "We failed to test correctly the normalize option" - assert_allclose(metrics(y_true, y_pred, normalize=False) - / n_samples, measure) + metrics = ALL_METRICS[name] + measure = metrics(y_true, y_pred, normalize=True) + assert_array_less(-1.0 * measure, 0, + err_msg="We failed to test correctly the normalize " + "option") + assert_allclose(metrics(y_true, y_pred, normalize=False) / n_samples, + measure) @pytest.mark.parametrize('name', METRICS_WITH_NORMALIZE_OPTION) def test_normalize_option_multiclass_classification(name): + if name in METRIC_UNDEFINED_MULTICLASS: + return # Test in the multiclass case random_state = check_random_state(0) y_true = random_state.randint(0, 4, size=(20, )) y_pred = random_state.randint(0, 4, size=(20, )) n_samples = y_true.shape[0] - for name in METRICS_WITH_NORMALIZE_OPTION: - if name in METRIC_UNDEFINED_MULTICLASS: - continue - metrics = ALL_METRICS[name] - measure = metrics(y_true, y_pred, normalize=True) - assert_array_less(-1.0 * measure, 0, - err_msg="We failed to test correctly the normalize " - "option") - assert_allclose(metrics(y_true, y_pred, normalize=False) - / n_samples, measure) + metrics = ALL_METRICS[name] + measure = metrics(y_true, y_pred, normalize=True) + assert_array_less(-1.0 * measure, 0, + err_msg="We failed to test correctly the normalize " + "option") + assert_allclose(metrics(y_true, y_pred, normalize=False) / n_samples, + measure) def test_normalize_option_multilabel_classification(): From 55b1e8333d7a3ff370734937f704386db4d4dfe6 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sat, 24 Nov 2018 23:57:24 +1100 Subject: [PATCH 81/88] Avoid assert_equal --- sklearn/metrics/tests/test_classification.py | 31 ++++++++------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index bbeed9f08e877..f36b6abbc7dde 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1129,11 +1129,9 @@ def test_multilabel_hamming_loss(): assert_equal(hamming_loss(y1[0], y2[0]), sp_hamming(y1[0], y2[0])) -def test_jaccard_similarity_score(): +def test_jaccard_similarity_score_validation(): y_true = np.array([0, 1, 0, 1, 1]) y_pred = np.array([0, 1, 0, 1, 1]) - assert_equal(jaccard_similarity_score(y_true, y_pred, average='binary', - pos_label=0), 1.) assert_raise_message(ValueError, "pos_label=2 is not a valid label: " "array([0, 1])", jaccard_similarity_score, y_true, y_pred, average='binary', pos_label=2) @@ -1174,13 +1172,13 @@ def test_multilabel_jaccard_similarity_score(): # size(y1 \inter y2) = [1, 2] # size(y1 \union y2) = [2, 2] - assert_equal(jaccard_similarity_score(y1, y2), 0.75) - assert_equal(jaccard_similarity_score(y1, y1), 1) - assert_equal(jaccard_similarity_score(y2, y2), 1) - assert_equal(jaccard_similarity_score(y2, np.logical_not(y2)), 0) - assert_equal(jaccard_similarity_score(y1, np.logical_not(y1)), 0) - assert_equal(jaccard_similarity_score(y1, np.zeros(y1.shape)), 0) - assert_equal(jaccard_similarity_score(y2, np.zeros(y1.shape)), 0) + assert jaccard_similarity_score(y1, y2) == 0.75 + assert jaccard_similarity_score(y1, y1) == 1 + assert jaccard_similarity_score(y2, y2) == 1 + assert jaccard_similarity_score(y2, np.logical_not(y2)) == 0 + assert jaccard_similarity_score(y1, np.logical_not(y1)) == 0 + assert jaccard_similarity_score(y1, np.zeros(y1.shape)) == 0 + assert jaccard_similarity_score(y2, np.zeros(y1.shape)) == 0 y_true = np.array([[0, 1, 1], [1, 0, 0]]) y_pred = np.array([[1, 1, 1], [1, 0, 1]]) @@ -1255,24 +1253,21 @@ def test_multiclass_jaccard_similarity_score(): y_true = np.array([[0, 0], [0, 0], [0, 0]]) y_pred = np.array([[0, 0], [0, 0], [0, 0]]) - assert_equal(jaccard_similarity_score(y_true, y_pred, average='weighted'), - 0.) + assert jaccard_similarity_score(y_true, y_pred, average='weighted') == 0 def test_average_binary_jaccard_similarity_score(): # tp=0, fp=0, fn=1, tn=0 y_true = np.array([1]) y_pred = np.array([0]) - assert_equal(jaccard_similarity_score(y_true, y_pred, - average='binary'), 0.) + assert jaccard_similarity_score(y_true, y_pred, average='binary') == 0. # tp=0, fp=0, fn=0, tn=1 y_true = np.array([0]) y_pred = np.array([0]) - assert_equal(jaccard_similarity_score(y_true, y_pred, - average='binary'), 0.) + assert jaccard_similarity_score(y_true, y_pred, average='binary') == 0. # tp=1, fp=0, fn=0, tn=0 (pos_label=0) - assert_equal(jaccard_similarity_score(y_true, y_pred, pos_label=0, - average='binary'), 1.) + assert jaccard_similarity_score(y_true, y_pred, pos_label=0, + average='binary') == 1. y_true = np.array([1, 0, 1, 1, 0]) y_pred = np.array([1, 0, 1, 1, 1]) assert_almost_equal(jaccard_similarity_score(y_true, y_pred, From 6b71c18aeebe513eced3467229ff758042947397 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 25 Nov 2018 00:08:06 +1100 Subject: [PATCH 82/88] cosmit --- sklearn/metrics/classification.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index cc8a9326af6b9..945a657d7cac1 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -646,7 +646,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, Returns ------- - score: float (if average is not None) or array of floats, shape =\ + score : float (if average is not None) or array of floats, shape =\ [n_unique_labels] See also @@ -665,14 +665,13 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, Examples -------- - >>> import numpy as np >>> from sklearn.metrics import jaccard_similarity_score - In the multilabel case with binary label indicators: + In the multilabel case: >>> y_true = np.array([[1, 0, 1], [0, 0, 1], [1, 1, 1]]) >>> y_pred = np.array([[0, 1, 1], [1, 1, 1], [0, 0, 1]]) - >>> jaccard_similarity_score(y_true, y_pred) + >>> jaccard_similarity_score(y_true, y_pred, average='samples') ... # doctest: +ELLIPSIS 0.33... >>> jaccard_similarity_score(y_true, y_pred, average='micro') @@ -683,18 +682,11 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, >>> jaccard_similarity_score(y_true, y_pred, average=None) array([0., 0., 1.]) - It may be a poor indicator if there are no positives for some samples - or classes: - - >>> jaccard_similarity_score(np.array([0]), np.array([0]), - ... average='binary') - 0.0 - In the multiclass case: - >>> jaccard_similarity_score(np.array([0, 2, 1, 3]), - ... np.array([0, 1, 2, 3]), average='macro') - 0.5 + >>> jaccard_similarity_score(np.array([0, 1, 2, 3]), + ... np.array([0, 2, 2, 3]), average='macro') + 0.625 """ _validate_set_wise_average(average) @@ -741,8 +733,8 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, if average is None: return jaccard if not normalize: - return (jaccard * (1 if sample_weight is None - else sample_weight)).sum() + return np.sum(jaccard * (1 if sample_weight is None + else sample_weight)) if average == 'weighted': weights = MCM[:, 1, 0] + MCM[:, 1, 1] if not np.any(weights): From 03d89de2375fde343c31076eaf5c4a360e9942c0 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 25 Nov 2018 00:22:05 +1100 Subject: [PATCH 83/88] Clean up validation --- sklearn/metrics/classification.py | 99 +++++++++++-------------------- 1 file changed, 33 insertions(+), 66 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 945a657d7cac1..4ba62a7cb06cc 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -689,31 +689,14 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, 0.625 """ - _validate_set_wise_average(average) - y_type, y_true, y_pred, present_labels = _validate_input(y_true, y_pred, - sample_weight) - if average == 'binary': - if y_type == 'binary': - if pos_label not in present_labels: - if len(present_labels) < 2: - return 0. - else: - raise ValueError("pos_label=%r is not a valid label: " - "%r" % (pos_label, present_labels)) - labels = [pos_label] - else: - raise ValueError("Target is %s but average='binary'. Please " - "choose another average setting." % y_type) - elif pos_label not in (None, 1): - warnings.warn("Note that pos_label (set to %r) is ignored when " - "average != 'binary' (got %r). You may use " - "labels=[pos_label] to specify a single positive class." - % (pos_label, average), UserWarning) if average != 'samples' and normalize != 'true-if-samples': raise ValueError("'normalize' is only meaningful with " "`average='samples'`, got `average='%s'`." % average) - + labels = _check_set_wise_labels(y_true, y_pred, average, labels, + pos_label) + if labels is _ALL_ZERO: + return 0. samplewise = average == 'samples' MCM = multilabel_confusion_matrix(y_true, y_pred, sample_weight=sample_weight, @@ -1173,38 +1156,39 @@ def _prf_divide(numerator, denominator, metric, modifier, average, warn_for): return result -def _validate_set_wise_average(average): - """Validate ``average`` as a valid average option for - functions :func:`metrics.precision_recall_fscore_support` and - :func:`metrics.jaccard_similarity_score`. +_ALL_ZERO = object() # sentinel for special, degenerate case + + +def _check_set_wise_labels(y_true, y_pred, average, labels, pos_label): + """Validation associated with set-wise metrics + + Returns identified labels or _ALL_ZERO sentinel """ average_options = (None, 'micro', 'macro', 'weighted', 'samples') if average not in average_options and average != 'binary': raise ValueError('average has to be one of ' + str(average_options)) - -def _validate_input(y_true, y_pred, sample_weight): - """Validate input for consistent length and type for functions - :func:`metrics.precision_recall_fscore_support` and - :func:`metrics.jaccard_similarity_score`. - """ y_type, y_true, y_pred = _check_targets(y_true, y_pred) - check_consistent_length(y_true, y_pred, sample_weight) present_labels = unique_labels(y_true, y_pred) - return y_type, y_true, y_pred, present_labels - - -def _validate_multilabels(labels, present_labels): - """All labels are index integers for multilabel.""" - if not np.all(labels == present_labels): - if np.max(labels) > np.max(present_labels): - raise ValueError('All labels must be in [0, n, labels). ' - 'Got %d > %d' % - (np.max(labels), np.max(present_labels))) - if np.min(labels) < 0: - raise ValueError('All labels must be in [0, n, labels). ' - 'Got %d < 0' % np.min(labels)) + if average == 'binary': + if y_type == 'binary': + if pos_label not in present_labels: + if len(present_labels) < 2: + return _ALL_ZERO + else: + raise ValueError("pos_label=%r is not a valid label: " + "%r" % (pos_label, present_labels)) + labels = [pos_label] + else: + raise ValueError("Target is %s but average='binary'. Please " + "choose another average setting." % y_type) + elif pos_label not in (None, 1): + warnings.warn("Note that pos_label (set to %r) is ignored when " + "average != 'binary' (got %r). You may use " + "labels=[pos_label] to specify a single positive class." + % (pos_label, average), UserWarning) + return labels def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, @@ -1349,29 +1333,12 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, array([2, 2, 2])) """ - _validate_set_wise_average(average) if beta <= 0: raise ValueError("beta should be >0 in the F-beta score") - y_type, y_true, y_pred, present_labels = _validate_input(y_true, y_pred, - sample_weight) - if average == 'binary': - if y_type == 'binary': - if pos_label not in present_labels: - if len(present_labels) < 2: - # Only negative labels - return (0., 0., 0., 0) - else: - raise ValueError("pos_label=%r is not a valid label: %r" % - (pos_label, present_labels)) - labels = [pos_label] - else: - raise ValueError("Target is %s but average='binary'. Please " - "choose another average setting." % y_type) - elif pos_label not in (None, 1): - warnings.warn("Note that pos_label (set to %r) is ignored when " - "average != 'binary' (got %r). You may use " - "labels=[pos_label] to specify a single positive class." - % (pos_label, average), UserWarning) + labels = _check_set_wise_labels(y_true, y_pred, average, labels, + pos_label) + if labels is _ALL_ZERO: + return (0., 0., 0., 0) # Calculate tp_sum, pred_sum, true_sum ### samplewise = average == 'samples' From 46c127489f8e3218a88f08e526d3f9c281c32d2a Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 25 Nov 2018 00:26:29 +1100 Subject: [PATCH 84/88] reuse warning code --- sklearn/metrics/classification.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 4ba62a7cb06cc..0dadbc013ae7a 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -708,11 +708,8 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, numerator = np.array([numerator.sum()]) denominator = np.array([denominator.sum()]) - if not np.all(denominator): - # TODO: warn that 0 will be returned - denominator[denominator == 0] = 1 - - jaccard = numerator / denominator + jaccard = _prf_divide(numerator, denominator, 'jaccard', + 'true or predicted', average, ('jaccard',)) if average is None: return jaccard if not normalize: From e082e62d18a1eb26cf34881ac3da4615727b799f Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Wed, 2 Jan 2019 23:12:08 +1100 Subject: [PATCH 85/88] WIP --- sklearn/metrics/classification.py | 2 + sklearn/metrics/tests/test_classification.py | 44 +++++++++++++++----- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 0dadbc013ae7a..10fbac76907b4 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -689,6 +689,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, 0.625 """ + print('!', y_true) if average != 'samples' and normalize != 'true-if-samples': raise ValueError("'normalize' is only meaningful with " "`average='samples'`, got `average='%s'`." @@ -708,6 +709,7 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, numerator = np.array([numerator.sum()]) denominator = np.array([denominator.sum()]) + print(locals()) jaccard = _prf_divide(numerator, denominator, 'jaccard', 'true or predicted', average, ('jaccard',)) if average is None: diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index f36b6abbc7dde..ee139f8f34261 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1164,7 +1164,7 @@ def test_jaccard_similarity_score_validation(): average='micro', pos_label=3) -def test_multilabel_jaccard_similarity_score(): +def test_multilabel_jaccard_similarity_score(recwarn): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) @@ -1226,8 +1226,26 @@ def test_multilabel_jaccard_similarity_score(): assert_raise_message(ValueError, msg3, jaccard_similarity_score, y_true, y_pred, labels=[-1]) + msg = ('Jaccard is ill-defined and being set to 0.0 in labels ' + 'with no true or predicted samples.') + assert assert_warns_message(UndefinedMetricWarning, msg, + jaccard_similarity_score, + np.array([[0, 1]]), + np.array([[0, 1]]), + average='macro') == 0.5 -def test_multiclass_jaccard_similarity_score(): + msg = ('Jaccard is ill-defined and being set to 0.0 in samples ' + 'with no true or predicted labels.') + assert assert_warns_message(UndefinedMetricWarning, msg, + jaccard_similarity_score, + np.transpose([[0, 1]]), + np.transpose([[0, 1]]), + average='samples') == 0.5 + + assert not list(recwarn) + + +def test_multiclass_jaccard_similarity_score(recwarn): y_true = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat', 'bird', 'bird'] y_pred = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird', 'bird', 'cat'] labels = ['ant', 'bird', 'cat'] @@ -1255,18 +1273,22 @@ def test_multiclass_jaccard_similarity_score(): y_pred = np.array([[0, 0], [0, 0], [0, 0]]) assert jaccard_similarity_score(y_true, y_pred, average='weighted') == 0 + assert not list(recwarn) -def test_average_binary_jaccard_similarity_score(): + +def test_average_binary_jaccard_similarity_score(recwarn): # tp=0, fp=0, fn=1, tn=0 - y_true = np.array([1]) - y_pred = np.array([0]) - assert jaccard_similarity_score(y_true, y_pred, average='binary') == 0. + assert jaccard_similarity_score([1], [0], average='binary') == 0. # tp=0, fp=0, fn=0, tn=1 - y_true = np.array([0]) - y_pred = np.array([0]) - assert jaccard_similarity_score(y_true, y_pred, average='binary') == 0. + msg = ('Jaccard is ill-defined and ' + 'being set to 0.0 in labels with no predicted samples.') + assert assert_warns_message(UndefinedMetricWarning, + msg, + jaccard_similarity_score, + [0], [0], + average='binary') == 0. # tp=1, fp=0, fn=0, tn=0 (pos_label=0) - assert jaccard_similarity_score(y_true, y_pred, pos_label=0, + assert jaccard_similarity_score([0], [0], pos_label=0, average='binary') == 1. y_true = np.array([1, 0, 1, 1, 0]) y_pred = np.array([1, 0, 1, 1, 1]) @@ -1276,6 +1298,8 @@ def test_average_binary_jaccard_similarity_score(): average='binary', pos_label=0), 1. / 2) + assert not list(recwarn) + @ignore_warnings def test_precision_recall_f1_score_multilabel_1(): From 28dcca431cad374eba01d01fa10eff639bd1059b Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 5 Feb 2019 08:27:03 +1100 Subject: [PATCH 86/88] Clean what's new --- doc/whats_new/v0.21.rst | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 4e5267669372a..99e81bec66963 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -167,8 +167,6 @@ Support for Python 3.4 and below has been officially dropped. multilabel problem. :issue:`10083` by :user:`Gaurav Dhingra ` and `Joel Nothman`_. -:mod:`sklearn.model_selection` -...................... - |Enhancement| Use label `accuracy` instead of `micro-average` on :func:`metrics.classification_report` to avoid confusion. `micro-average` is only shown for multi-label or multi-class with a subset of classes because @@ -176,15 +174,14 @@ Support for Python 3.4 and below has been officially dropped. :issue:`12334` by :user:`Emmanuel Arias `, `Joel Nothman`_ and `Andreas Müller`_ +- |Fix| The metric :func:`metrics.r2_score` is degenerate with a single sample + and now it returns NaN and raises :class:`exceptions.UndefinedMetricWarning`. + :issue:`12855` by :user:`Pawel Sendyk .` + - |API| The parameter ``labels`` in :func:`metrics.hamming_loss` is deprecated in version 0.21 and will be removed in version 0.23. :issue:`10580` by :user:`Reshama Shaikh ` and `Sandra Mitrovic `. ->>>>>>> master - -- |Fix| The metric :func:`metrics.r2_score` is degenerate with a single sample - and now it returns NaN and raises :class:`exceptions.UndefinedMetricWarning`. - :issue:`12855` by :user:`Pawel Sendyk .` :mod:`sklearn.model_selection` .............................. From 27cf502d291a6a5bc3605f92660ae58e502c9e29 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 5 Feb 2019 23:40:18 +1100 Subject: [PATCH 87/88] FIX coax tests to pass --- sklearn/metrics/classification.py | 6 +++++- sklearn/metrics/tests/test_classification.py | 14 ++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 428250236c1cc..14d520a8b85de 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -696,6 +696,8 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) if labels is _ALL_ZERO: + warnings.warn('Jaccard is ill-defined and being set to 0.0 with no ' + 'true or predicted samples', UndefinedMetricWarning) return 0. samplewise = average == 'samples' MCM = multilabel_confusion_matrix(y_true, y_pred, @@ -1119,8 +1121,10 @@ def _prf_divide(numerator, denominator, metric, modifier, average, warn_for): The metric, modifier and average arguments are used only for determining an appropriate warning. """ - result = numerator / denominator mask = denominator == 0.0 + denominator = denominator.copy() + denominator[mask] = 1 + result = numerator / denominator if not np.any(mask): return result diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 145bbde7665fc..24c69f86cf7bd 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1246,8 +1246,8 @@ def test_multilabel_jaccard_similarity_score(recwarn): 'with no true or predicted labels.') assert assert_warns_message(UndefinedMetricWarning, msg, jaccard_similarity_score, - np.transpose([[0, 1]]), - np.transpose([[0, 1]]), + np.array([[0, 0], [1, 1]]), + np.array([[0, 0], [1, 1]]), average='samples') == 0.5 assert not list(recwarn) @@ -1279,7 +1279,9 @@ def test_multiclass_jaccard_similarity_score(recwarn): y_true = np.array([[0, 0], [0, 0], [0, 0]]) y_pred = np.array([[0, 0], [0, 0], [0, 0]]) - assert jaccard_similarity_score(y_true, y_pred, average='weighted') == 0 + with ignore_warnings(): + assert (jaccard_similarity_score(y_true, y_pred, average='weighted') + == 0) assert not list(recwarn) @@ -1288,12 +1290,12 @@ def test_average_binary_jaccard_similarity_score(recwarn): # tp=0, fp=0, fn=1, tn=0 assert jaccard_similarity_score([1], [0], average='binary') == 0. # tp=0, fp=0, fn=0, tn=1 - msg = ('Jaccard is ill-defined and ' - 'being set to 0.0 in labels with no predicted samples.') + msg = ('Jaccard is ill-defined and being set to 0.0 with ' + 'no true or predicted samples') assert assert_warns_message(UndefinedMetricWarning, msg, jaccard_similarity_score, - [0], [0], + [0, 0], [0, 0], average='binary') == 0. # tp=1, fp=0, fn=0, tn=0 (pos_label=0) assert jaccard_similarity_score([0], [0], pos_label=0, From 47776c0c4186986e7774d0043be1770eabfae495 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Wed, 13 Feb 2019 10:13:49 +1100 Subject: [PATCH 88/88] Address Adrin's comments --- sklearn/metrics/classification.py | 3 ++- sklearn/metrics/tests/test_classification.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index a58cbf25a8af9..5ae254b6028f7 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -638,7 +638,8 @@ def jaccard_similarity_score(y_true, y_pred, labels=None, pos_label=1, If ``False``, return the sum of the Jaccard similarity coefficient over the sample set. Otherwise, return the average of Jaccard similarity coefficient. ``normalize`` is only applicable when - ``average='samples'``. + ``average='samples'``. The default value 'true-if-samples' behaves like + True, but does not raise an error with other values of `average`. sample_weight : array-like of shape = [n_samples], optional Sample weights. diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 3dcb494fee1e8..c28236e8bf7f2 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1160,8 +1160,6 @@ def test_jaccard_similarity_score_validation(): "classification.") assert_raise_message(ValueError, msg3, jaccard_similarity_score, y_true, y_pred, average='samples') - assert_raise_message(ValueError, msg3, jaccard_similarity_score, y_true, - y_pred, average='samples', normalize=False) assert_warns_message(UserWarning, "Note that pos_label (set to 3) is ignored when "