From 6347353051b6d042a163ae7238f7d1518f0f845b Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 16 Oct 2019 11:58:13 -0400 Subject: [PATCH 1/6] BUG Fixes error with multiclass roc auc scorer --- sklearn/metrics/scorer.py | 10 ++++- sklearn/metrics/tests/test_score_objects.py | 47 +++++++++++++++++---- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 25b826ff91f75..ea4b3932b6274 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -296,12 +296,12 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): """ y_type = type_of_target(y) - if y_type not in ("binary", "multilabel-indicator"): + if y_type not in ("binary", "multilabel-indicator", "multiclass"): raise ValueError("{0} format is not supported".format(y_type)) if is_regressor(clf): y_pred = method_caller(clf, "predict", X) - else: + elif y_type in ("binary", "multilabel-indicator"): try: y_pred = method_caller(clf, "decision_function", X) @@ -323,6 +323,12 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): self._score_func.__name__)) elif isinstance(y_pred, list): y_pred = np.vstack([p[:, -1] for p in y_pred]).T + else: # multiclass + try: + y_pred = method_caller(clf, "predict_proba", X) + except (NotImplementedError, AttributeError): + raise ValueError("estimator must defined predict_proba for " + "multiclass threshold evaluation") if sample_weight is not None: return self._sign * self._score_func(y, y_pred, diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index cfabed6d2c4ac..c75141e97b8e0 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -4,6 +4,7 @@ import os import numbers from unittest.mock import Mock +from functools import partial import numpy as np import pytest @@ -28,7 +29,7 @@ from sklearn.svm import LinearSVC from sklearn.pipeline import make_pipeline from sklearn.cluster import KMeans -from sklearn.linear_model import Ridge, LogisticRegression +from sklearn.linear_model import Ridge, LogisticRegression, Perceptron from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.datasets import make_blobs from sklearn.datasets import make_classification @@ -381,13 +382,6 @@ def test_thresholded_scorers(): score2 = roc_auc_score(y_test, reg.predict(X_test)) assert_almost_equal(score1, score2) - # Test that an exception is raised on more than two classes - X, y = make_blobs(random_state=0, centers=3) - X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) - clf.fit(X_train, y_train) - with pytest.raises(ValueError, match="multiclass format is not supported"): - get_scorer('roc_auc')(clf, X_test, y_test) - # test error is raised with a single class present in model # (predict_proba shape is not suitable for binary auc) X, y = make_blobs(random_state=0, centers=2) @@ -669,3 +663,40 @@ def test_multimetric_scorer_sanity_check(): for key, value in result.items(): score_name = scorers[key] assert_allclose(value, seperate_scores[score_name]) + + +@pytest.mark.parametrize('scorer_name, metric', [ + ('roc_auc_ovr', partial(roc_auc_score, multi_class='ovr')), + ('roc_auc_ovo', partial(roc_auc_score, multi_class='ovo')), + ('roc_auc_ovr_weighted', partial(roc_auc_score, multi_class='ovr', + average='weighted')), + ('roc_auc_ovo_weighted', partial(roc_auc_score, multi_class='ovo', + average='weighted'))]) +def test_multiclass_threshold_scorer(scorer_name, metric): + scorer = get_scorer(scorer_name) + X, y = make_classification(n_classes=3, n_informative=3, n_samples=20, + random_state=0) + lr = LogisticRegression(multi_class="multinomial") + lr.fit(X, y) + + y_proba = lr.predict_proba(X) + expected_score = metric(y, y_proba) + + assert scorer(lr, X, y) == pytest.approx(expected_score) + + +@pytest.mark.parametrize('scorer_name, ', ['roc_auc_ovr', 'roc_auc_ovo', + 'roc_auc_ovr_weighted', + 'roc_auc_ovo_weighted']) +def test_multiclass_thresshold_no_predict_proba(scorer_name): + # estimator without predict_proba will fail + scorer = get_scorer(scorer_name) + X, y = make_classification(n_classes=3, n_informative=3, n_samples=20, + random_state=0) + est = Perceptron() + est.fit(X, y) + + msg = ("estimator must defined predict_proba for multiclass " + "threshold evaluation") + with pytest.raises(ValueError, match=msg): + scorer(est, X, y) From 1cb82c871ab5568aed33063f991a152d7e58cce2 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 16 Oct 2019 13:17:04 -0400 Subject: [PATCH 2/6] CLN Less lines --- sklearn/metrics/tests/test_score_objects.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index c75141e97b8e0..d184379ea75e3 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -676,9 +676,7 @@ def test_multiclass_threshold_scorer(scorer_name, metric): scorer = get_scorer(scorer_name) X, y = make_classification(n_classes=3, n_informative=3, n_samples=20, random_state=0) - lr = LogisticRegression(multi_class="multinomial") - lr.fit(X, y) - + lr = LogisticRegression(multi_class="multinomial").fit(X, y) y_proba = lr.predict_proba(X) expected_score = metric(y, y_proba) @@ -693,8 +691,7 @@ def test_multiclass_thresshold_no_predict_proba(scorer_name): scorer = get_scorer(scorer_name) X, y = make_classification(n_classes=3, n_informative=3, n_samples=20, random_state=0) - est = Perceptron() - est.fit(X, y) + est = Perceptron().fit(X, y) msg = ("estimator must defined predict_proba for multiclass " "threshold evaluation") From 0add9afa64658420f536f8c349aabae497461aec Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Mon, 28 Oct 2019 23:35:13 -0400 Subject: [PATCH 3/6] BUG Makes roc_auc_score depend on predict_proba --- sklearn/metrics/_scorer.py | 18 ++++++---------- sklearn/metrics/tests/test_score_objects.py | 23 +++++++-------------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 1ea61b73f83c7..d57d00eecda49 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -296,12 +296,12 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): """ y_type = type_of_target(y) - if y_type not in ("binary", "multilabel-indicator", "multiclass"): + if y_type not in ("binary", "multilabel-indicator"): raise ValueError("{0} format is not supported".format(y_type)) if is_regressor(clf): y_pred = method_caller(clf, "predict", X) - elif y_type in ("binary", "multilabel-indicator"): + else: try: y_pred = method_caller(clf, "decision_function", X) @@ -323,12 +323,6 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): self._score_func.__name__)) elif isinstance(y_pred, list): y_pred = np.vstack([p[:, -1] for p in y_pred]).T - else: # multiclass - try: - y_pred = method_caller(clf, "predict_proba", X) - except (NotImplementedError, AttributeError): - raise ValueError("estimator must defined predict_proba for " - "multiclass threshold evaluation") if sample_weight is not None: return self._sign * self._score_func(y, y_pred, @@ -651,14 +645,14 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, needs_threshold=True) average_precision_scorer = make_scorer(average_precision_score, needs_threshold=True) -roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_threshold=True, +roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class='ovo') -roc_auc_ovo_weighted_scorer = make_scorer(roc_auc_score, needs_threshold=True, +roc_auc_ovo_weighted_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class='ovo', average='weighted') -roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_threshold=True, +roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class='ovr') -roc_auc_ovr_weighted_scorer = make_scorer(roc_auc_score, needs_threshold=True, +roc_auc_ovr_weighted_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class='ovr', average='weighted') diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 1bd266cddb1e8..62f23dfad1b96 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -383,6 +383,13 @@ def test_thresholded_scorers(): score2 = roc_auc_score(y_test, reg.predict(X_test)) assert_almost_equal(score1, score2) + # Test that an exception is raised on more than two classes + X, y = make_blobs(random_state=0, centers=3) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + clf.fit(X_train, y_train) + with pytest.raises(ValueError, match="multiclass format is not supported"): + get_scorer('roc_auc')(clf, X_test, y_test) + # test error is raised with a single class present in model # (predict_proba shape is not suitable for binary auc) X, y = make_blobs(random_state=0, centers=2) @@ -682,19 +689,3 @@ def test_multiclass_threshold_scorer(scorer_name, metric): expected_score = metric(y, y_proba) assert scorer(lr, X, y) == pytest.approx(expected_score) - - -@pytest.mark.parametrize('scorer_name, ', ['roc_auc_ovr', 'roc_auc_ovo', - 'roc_auc_ovr_weighted', - 'roc_auc_ovo_weighted']) -def test_multiclass_thresshold_no_predict_proba(scorer_name): - # estimator without predict_proba will fail - scorer = get_scorer(scorer_name) - X, y = make_classification(n_classes=3, n_informative=3, n_samples=20, - random_state=0) - est = Perceptron().fit(X, y) - - msg = ("estimator must defined predict_proba for multiclass " - "threshold evaluation") - with pytest.raises(ValueError, match=msg): - scorer(est, X, y) From c961ea35fc7d71c287eb7c85aa0394b0c74abd1a Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Tue, 29 Oct 2019 14:21:41 -0400 Subject: [PATCH 4/6] DOC Adds whats new --- doc/whats_new/v0.22.rst | 4 ++++ sklearn/metrics/tests/test_score_objects.py | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index cf20726949cfc..48bb884c51903 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -493,6 +493,10 @@ Changelog ``multioutput`` parameter. :pr:`14732` by :user:`Agamemnon Krasoulis `. +- |Fix| The scorers: 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', + and 'roc_auc_ovo_weighted' are now correctly configured to use + :term:`predict_proba`. :pr:`15274` by `Thomas Fan`_. + :mod:`sklearn.model_selection` .............................. diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 62f23dfad1b96..3d16c4214ca5f 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -680,7 +680,7 @@ def test_multimetric_scorer_sanity_check(): average='weighted')), ('roc_auc_ovo_weighted', partial(roc_auc_score, multi_class='ovo', average='weighted'))]) -def test_multiclass_threshold_scorer(scorer_name, metric): +def test_multiclass_roc_proba_scorer(scorer_name, metric): scorer = get_scorer(scorer_name) X, y = make_classification(n_classes=3, n_informative=3, n_samples=20, random_state=0) @@ -689,3 +689,17 @@ def test_multiclass_threshold_scorer(scorer_name, metric): expected_score = metric(y, y_proba) assert scorer(lr, X, y) == pytest.approx(expected_score) + + +@pytest.mark.parametrize('scorer_name', [ + 'roc_auc_ovr', 'roc_auc_ovo', + 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted']) +def test_multiclass_roc_no_proba_scorer_errors(scorer_name): + # Perceptron has no predict_proba + scorer = get_scorer(scorer_name) + X, y = make_classification(n_classes=3, n_informative=3, n_samples=20, + random_state=0) + lr = Perceptron().fit(X, y) + msg = "'Perceptron' object has no attribute 'predict_proba'" + with pytest.raises(AttributeError, match=msg): + scorer(lr, X, y) From 86903f5bca890ed9a04632a5199dc28ac0973a84 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 30 Oct 2019 10:17:28 -0400 Subject: [PATCH 5/6] DOC Move to one entry --- doc/whats_new/v0.22.rst | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index aa14f42bf0a78..7a804edc443cf 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -462,9 +462,11 @@ Changelog Gain and Normalized Discounted Cumulative Gain. :pr:`9951` by :user:`Jérôme Dockès `. -- |Feature| Added multiclass support to :func:`metrics.roc_auc_score`. - :issue:`12789` by :user:`Kathy Chen `, - :user:`Mohamed Maskani `, and :user:`Thomas Fan `. +- |Feature| Added multiclass support to :func:`metrics.roc_auc_score` with + corresponding scorers 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', + and 'roc_auc_ovo_weighted'. :pr:`12789` and :pr:`15274` by + :user:`Kathy Chen `, :user:`Mohamed Maskani `, and + `Thomas Fan`_. - |Feature| Add :class:`metrics.mean_tweedie_deviance` measuring the Tweedie deviance for a given ``power`` parameter. Also add mean Poisson @@ -506,10 +508,6 @@ Changelog ``multioutput`` parameter. :pr:`14732` by :user:`Agamemnon Krasoulis `. -- |Fix| The scorers: 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', - and 'roc_auc_ovo_weighted' are now correctly configured to use - :term:`predict_proba`. :pr:`15274` by `Thomas Fan`_. - :mod:`sklearn.model_selection` .............................. From 043821fc9cf89ecafa47e5329c3b1e6e1f67a594 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 1 Nov 2019 10:03:59 -0400 Subject: [PATCH 6/6] TST Checks for not multiclass --- sklearn/metrics/_scorer.py | 2 +- sklearn/metrics/tests/test_score_objects.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index f334c6eab67a5..3df175c2ca306 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -247,7 +247,7 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): if y_type == "binary": if y_pred.shape[1] == 2: y_pred = y_pred[:, 1] - else: + elif y_pred.shape[1] == 1: # not multiclass raise ValueError('got predict_proba of shape {},' ' but need classifier with two' ' classes for {} scoring'.format( diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index fd9bf2fd19a58..00ff5a3a0563e 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -691,6 +691,22 @@ def test_multiclass_roc_proba_scorer(scorer_name, metric): assert scorer(lr, X, y) == pytest.approx(expected_score) +def test_multiclass_roc_proba_scorer_label(): + scorer = make_scorer(roc_auc_score, multi_class='ovo', + labels=[0, 1, 2], needs_proba=True) + X, y = make_classification(n_classes=3, n_informative=3, n_samples=20, + random_state=0) + lr = LogisticRegression(multi_class="multinomial").fit(X, y) + y_proba = lr.predict_proba(X) + + y_binary = y == 0 + expected_score = roc_auc_score(y_binary, y_proba, + multi_class='ovo', + labels=[0, 1, 2]) + + assert scorer(lr, X, y_binary) == pytest.approx(expected_score) + + @pytest.mark.parametrize('scorer_name', [ 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted'])