diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index 938bc0d15983d..81191160f7d23 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -470,9 +470,11 @@ Changelog Gain and Normalized Discounted Cumulative Gain. :pr:`9951` by :user:`Jérôme Dockès `. -- |Feature| Added multiclass support to :func:`metrics.roc_auc_score`. - :issue:`12789` by :user:`Kathy Chen `, - :user:`Mohamed Maskani `, and :user:`Thomas Fan `. +- |Feature| Added multiclass support to :func:`metrics.roc_auc_score` with + corresponding scorers 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', + and 'roc_auc_ovo_weighted'. :pr:`12789` and :pr:`15274` by + :user:`Kathy Chen `, :user:`Mohamed Maskani `, and + `Thomas Fan`_. - |Feature| Add :class:`metrics.mean_tweedie_deviance` measuring the Tweedie deviance for a given ``power`` parameter. Also add mean Poisson diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index b525ab77b87f3..3df175c2ca306 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -247,7 +247,7 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): if y_type == "binary": if y_pred.shape[1] == 2: y_pred = y_pred[:, 1] - else: + elif y_pred.shape[1] == 1: # not multiclass raise ValueError('got predict_proba of shape {},' ' but need classifier with two' ' classes for {} scoring'.format( @@ -645,14 +645,14 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False, needs_threshold=True) average_precision_scorer = make_scorer(average_precision_score, needs_threshold=True) -roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_threshold=True, +roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class='ovo') -roc_auc_ovo_weighted_scorer = make_scorer(roc_auc_score, needs_threshold=True, +roc_auc_ovo_weighted_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class='ovo', average='weighted') -roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_threshold=True, +roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class='ovr') -roc_auc_ovr_weighted_scorer = make_scorer(roc_auc_score, needs_threshold=True, +roc_auc_ovr_weighted_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class='ovr', average='weighted') diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 992afa98f6e8e..00ff5a3a0563e 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -4,6 +4,7 @@ import os import numbers from unittest.mock import Mock +from functools import partial import numpy as np import pytest @@ -29,7 +30,7 @@ from sklearn.svm import LinearSVC from sklearn.pipeline import make_pipeline from sklearn.cluster import KMeans -from sklearn.linear_model import Ridge, LogisticRegression +from sklearn.linear_model import Ridge, LogisticRegression, Perceptron from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.datasets import make_blobs from sklearn.datasets import make_classification @@ -670,3 +671,51 @@ def test_multimetric_scorer_sanity_check(): for key, value in result.items(): score_name = scorers[key] assert_allclose(value, seperate_scores[score_name]) + + +@pytest.mark.parametrize('scorer_name, metric', [ + ('roc_auc_ovr', partial(roc_auc_score, multi_class='ovr')), + ('roc_auc_ovo', partial(roc_auc_score, multi_class='ovo')), + ('roc_auc_ovr_weighted', partial(roc_auc_score, multi_class='ovr', + average='weighted')), + ('roc_auc_ovo_weighted', partial(roc_auc_score, multi_class='ovo', + average='weighted'))]) +def test_multiclass_roc_proba_scorer(scorer_name, metric): + scorer = get_scorer(scorer_name) + X, y = make_classification(n_classes=3, n_informative=3, n_samples=20, + random_state=0) + lr = LogisticRegression(multi_class="multinomial").fit(X, y) + y_proba = lr.predict_proba(X) + expected_score = metric(y, y_proba) + + assert scorer(lr, X, y) == pytest.approx(expected_score) + + +def test_multiclass_roc_proba_scorer_label(): + scorer = make_scorer(roc_auc_score, multi_class='ovo', + labels=[0, 1, 2], needs_proba=True) + X, y = make_classification(n_classes=3, n_informative=3, n_samples=20, + random_state=0) + lr = LogisticRegression(multi_class="multinomial").fit(X, y) + y_proba = lr.predict_proba(X) + + y_binary = y == 0 + expected_score = roc_auc_score(y_binary, y_proba, + multi_class='ovo', + labels=[0, 1, 2]) + + assert scorer(lr, X, y_binary) == pytest.approx(expected_score) + + +@pytest.mark.parametrize('scorer_name', [ + 'roc_auc_ovr', 'roc_auc_ovo', + 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted']) +def test_multiclass_roc_no_proba_scorer_errors(scorer_name): + # Perceptron has no predict_proba + scorer = get_scorer(scorer_name) + X, y = make_classification(n_classes=3, n_informative=3, n_samples=20, + random_state=0) + lr = Perceptron().fit(X, y) + msg = "'Perceptron' object has no attribute 'predict_proba'" + with pytest.raises(AttributeError, match=msg): + scorer(lr, X, y)