diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 4a19e27e9c11c..9e266238f8283 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -1137,11 +1137,11 @@ Here is a small example of how to use the :func:`roc_curve` function:: >>> scores = np.array([0.1, 0.4, 0.35, 0.8]) >>> fpr, tpr, thresholds = roc_curve(y, scores, pos_label=2) >>> fpr - array([ 0. , 0.5, 0.5, 1. ]) + array([ 0. , 0. , 0.5, 0.5, 1. ]) >>> tpr - array([ 0.5, 0.5, 1. , 1. ]) + array([ 0. , 0.5, 0.5, 1. , 1. ]) >>> thresholds - array([ 0.8 , 0.4 , 0.35, 0.1 ]) + array([ 1.8 , 0.8 , 0.4 , 0.35, 0.1 ]) This figure shows an example of such an ROC curve: diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 0897f331ebda0..e35ee16db037d 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -18,6 +18,7 @@ random sampling procedures. - :class:`decomposition.IncrementalPCA` in Python 2 (bug fix) - :class:`isotonic.IsotonicRegression` (bug fix) - :class:`metrics.roc_auc_score` (bug fix) +- :class:`metrics.roc_curve` (bug fix) - :class:`neural_network.BaseMultilayerPerceptron` (bug fix) - :class:`neural_network.MLPRegressor` (bug fix) - :class:`neural_network.MLPClassifier` (bug fix) @@ -160,6 +161,12 @@ Metrics - Fixed a bug due to floating point error in :func:`metrics.roc_auc_score` with non-integer sample weights. :issue:`9786` by :user:`Hanmin Qin `. +- Fixed a bug where :func:`metrics.roc_curve` sometimes starts on y-axis instead + of (0, 0), which is inconsistent with the document and other implementations. + Note that this will not influence the result from :func:`metrics.roc_auc_score` + :issue:`10093` by :user:`alexryndin ` + and :user:`Hanmin Qin `. + API changes summary ------------------- diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 987067a044835..1d8d37954b99c 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -217,7 +217,6 @@ def _binary_uninterpolated_average_precision( sample_weight=sample_weight) - def roc_auc_score(y_true, y_score, average="macro", sample_weight=None): """Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores. @@ -267,6 +266,9 @@ def roc_auc_score(y_true, y_score, average="macro", sample_weight=None): .. [1] `Wikipedia entry for the Receiver operating characteristic `_ + .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition + Letters, 2006, 27(8):861-874. + See also -------- average_precision_score : Area under the precision-recall curve @@ -541,6 +543,8 @@ def roc_curve(y_true, y_score, pos_label=None, sample_weight=None, .. [1] `Wikipedia entry for the Receiver operating characteristic `_ + .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition + Letters, 2006, 27(8):861-874. Examples -------- @@ -550,11 +554,11 @@ def roc_curve(y_true, y_score, pos_label=None, sample_weight=None, >>> scores = np.array([0.1, 0.4, 0.35, 0.8]) >>> fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2) >>> fpr - array([ 0. , 0.5, 0.5, 1. ]) + array([ 0. , 0. , 0.5, 0.5, 1. ]) >>> tpr - array([ 0.5, 0.5, 1. , 1. ]) + array([ 0. , 0.5, 0.5, 1. , 1. ]) >>> thresholds - array([ 0.8 , 0.4 , 0.35, 0.1 ]) + array([ 1.8 , 0.8 , 0.4 , 0.35, 0.1 ]) """ fps, tps, thresholds = _binary_clf_curve( @@ -578,8 +582,9 @@ def roc_curve(y_true, y_score, pos_label=None, sample_weight=None, tps = tps[optimal_idxs] thresholds = thresholds[optimal_idxs] - if tps.size == 0 or fps[0] != 0: + if tps.size == 0 or fps[0] != 0 or tps[0] != 0: # Add an extra threshold position if necessary + # to make sure that the curve starts at (0, 0) tps = np.r_[0, tps] fps = np.r_[0, fps] thresholds = np.r_[thresholds[0] + 1, thresholds] diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index acdec8932211c..a17935ae7de17 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -270,8 +270,8 @@ def test_roc_curve_toydata(): y_score = [0, 1] tpr, fpr, _ = roc_curve(y_true, y_score) roc_auc = roc_auc_score(y_true, y_score) - assert_array_almost_equal(tpr, [0, 1]) - assert_array_almost_equal(fpr, [1, 1]) + assert_array_almost_equal(tpr, [0, 0, 1]) + assert_array_almost_equal(fpr, [0, 1, 1]) assert_almost_equal(roc_auc, 1.) y_true = [0, 1] @@ -294,8 +294,8 @@ def test_roc_curve_toydata(): y_score = [1, 0] tpr, fpr, _ = roc_curve(y_true, y_score) roc_auc = roc_auc_score(y_true, y_score) - assert_array_almost_equal(tpr, [0, 1]) - assert_array_almost_equal(fpr, [1, 1]) + assert_array_almost_equal(tpr, [0, 0, 1]) + assert_array_almost_equal(fpr, [0, 1, 1]) assert_almost_equal(roc_auc, 1.) y_true = [1, 0] @@ -319,8 +319,8 @@ def test_roc_curve_toydata(): # assert UndefinedMetricWarning because of no negative sample in y_true tpr, fpr, _ = assert_warns(UndefinedMetricWarning, roc_curve, y_true, y_score) assert_raises(ValueError, roc_auc_score, y_true, y_score) - assert_array_almost_equal(tpr, [np.nan, np.nan]) - assert_array_almost_equal(fpr, [0.5, 1.]) + assert_array_almost_equal(tpr, [np.nan, np.nan, np.nan]) + assert_array_almost_equal(fpr, [0., 0.5, 1.]) # Multi-label classification task y_true = np.array([[0, 1], [0, 1]]) @@ -359,7 +359,7 @@ def test_roc_curve_drop_intermediate(): y_true = [0, 0, 0, 0, 1, 1] y_score = [0., 0.2, 0.5, 0.6, 0.7, 1.0] tpr, fpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=True) - assert_array_almost_equal(thresholds, [1., 0.7, 0.]) + assert_array_almost_equal(thresholds, [2., 1., 0.7, 0.]) # Test dropping thresholds with repeating scores y_true = [0, 0, 0, 0, 0, 0, 0, @@ -368,7 +368,7 @@ def test_roc_curve_drop_intermediate(): 0.6, 0.7, 0.8, 0.9, 0.9, 1.0] tpr, fpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=True) assert_array_almost_equal(thresholds, - [1.0, 0.9, 0.7, 0.6, 0.]) + [2.0, 1.0, 0.9, 0.7, 0.6, 0.]) def test_roc_curve_fpr_tpr_increasing():