From 6347353051b6d042a163ae7238f7d1518f0f845b Mon Sep 17 00:00:00 2001
From: Thomas J Fan <thomasjpfan@gmail.com>
Date: Wed, 16 Oct 2019 11:58:13 -0400
Subject: [PATCH 1/6] BUG Fixes error with multiclass roc auc scorer

---
 sklearn/metrics/scorer.py                   | 10 ++++-
 sklearn/metrics/tests/test_score_objects.py | 47 +++++++++++++++++----
 2 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 25b826ff91f75..ea4b3932b6274 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -296,12 +296,12 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
         """
 
         y_type = type_of_target(y)
-        if y_type not in ("binary", "multilabel-indicator"):
+        if y_type not in ("binary", "multilabel-indicator", "multiclass"):
             raise ValueError("{0} format is not supported".format(y_type))
 
         if is_regressor(clf):
             y_pred = method_caller(clf, "predict", X)
-        else:
+        elif y_type in ("binary", "multilabel-indicator"):
             try:
                 y_pred = method_caller(clf, "decision_function", X)
 
@@ -323,6 +323,12 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
                                              self._score_func.__name__))
                 elif isinstance(y_pred, list):
                     y_pred = np.vstack([p[:, -1] for p in y_pred]).T
+        else:  # multiclass
+            try:
+                y_pred = method_caller(clf, "predict_proba", X)
+            except (NotImplementedError, AttributeError):
+                raise ValueError("estimator must defined predict_proba for "
+                                 "multiclass threshold evaluation")
 
         if sample_weight is not None:
             return self._sign * self._score_func(y, y_pred,
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index cfabed6d2c4ac..c75141e97b8e0 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -4,6 +4,7 @@
 import os
 import numbers
 from unittest.mock import Mock
+from functools import partial
 
 import numpy as np
 import pytest
@@ -28,7 +29,7 @@
 from sklearn.svm import LinearSVC
 from sklearn.pipeline import make_pipeline
 from sklearn.cluster import KMeans
-from sklearn.linear_model import Ridge, LogisticRegression
+from sklearn.linear_model import Ridge, LogisticRegression, Perceptron
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.datasets import make_blobs
 from sklearn.datasets import make_classification
@@ -381,13 +382,6 @@ def test_thresholded_scorers():
     score2 = roc_auc_score(y_test, reg.predict(X_test))
     assert_almost_equal(score1, score2)
 
-    # Test that an exception is raised on more than two classes
-    X, y = make_blobs(random_state=0, centers=3)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-    clf.fit(X_train, y_train)
-    with pytest.raises(ValueError, match="multiclass format is not supported"):
-        get_scorer('roc_auc')(clf, X_test, y_test)
-
     # test error is raised with a single class present in model
     # (predict_proba shape is not suitable for binary auc)
     X, y = make_blobs(random_state=0, centers=2)
@@ -669,3 +663,40 @@ def test_multimetric_scorer_sanity_check():
     for key, value in result.items():
         score_name = scorers[key]
         assert_allclose(value, seperate_scores[score_name])
+
+
+@pytest.mark.parametrize('scorer_name, metric', [
+    ('roc_auc_ovr', partial(roc_auc_score, multi_class='ovr')),
+    ('roc_auc_ovo', partial(roc_auc_score, multi_class='ovo')),
+    ('roc_auc_ovr_weighted', partial(roc_auc_score, multi_class='ovr',
+                                     average='weighted')),
+    ('roc_auc_ovo_weighted', partial(roc_auc_score, multi_class='ovo',
+                                     average='weighted'))])
+def test_multiclass_threshold_scorer(scorer_name, metric):
+    scorer = get_scorer(scorer_name)
+    X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
+                               random_state=0)
+    lr = LogisticRegression(multi_class="multinomial")
+    lr.fit(X, y)
+
+    y_proba = lr.predict_proba(X)
+    expected_score = metric(y, y_proba)
+
+    assert scorer(lr, X, y) == pytest.approx(expected_score)
+
+
+@pytest.mark.parametrize('scorer_name, ', ['roc_auc_ovr', 'roc_auc_ovo',
+                                           'roc_auc_ovr_weighted',
+                                           'roc_auc_ovo_weighted'])
+def test_multiclass_thresshold_no_predict_proba(scorer_name):
+    # estimator without predict_proba will fail
+    scorer = get_scorer(scorer_name)
+    X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
+                               random_state=0)
+    est = Perceptron()
+    est.fit(X, y)
+
+    msg = ("estimator must defined predict_proba for multiclass "
+           "threshold evaluation")
+    with pytest.raises(ValueError, match=msg):
+        scorer(est, X, y)

From 1cb82c871ab5568aed33063f991a152d7e58cce2 Mon Sep 17 00:00:00 2001
From: Thomas J Fan <thomasjpfan@gmail.com>
Date: Wed, 16 Oct 2019 13:17:04 -0400
Subject: [PATCH 2/6] CLN Less lines

---
 sklearn/metrics/tests/test_score_objects.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index c75141e97b8e0..d184379ea75e3 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -676,9 +676,7 @@ def test_multiclass_threshold_scorer(scorer_name, metric):
     scorer = get_scorer(scorer_name)
     X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
                                random_state=0)
-    lr = LogisticRegression(multi_class="multinomial")
-    lr.fit(X, y)
-
+    lr = LogisticRegression(multi_class="multinomial").fit(X, y)
     y_proba = lr.predict_proba(X)
     expected_score = metric(y, y_proba)
 
@@ -693,8 +691,7 @@ def test_multiclass_thresshold_no_predict_proba(scorer_name):
     scorer = get_scorer(scorer_name)
     X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
                                random_state=0)
-    est = Perceptron()
-    est.fit(X, y)
+    est = Perceptron().fit(X, y)
 
     msg = ("estimator must defined predict_proba for multiclass "
            "threshold evaluation")

From 0add9afa64658420f536f8c349aabae497461aec Mon Sep 17 00:00:00 2001
From: Thomas J Fan <thomasjpfan@gmail.com>
Date: Mon, 28 Oct 2019 23:35:13 -0400
Subject: [PATCH 3/6] BUG Makes roc_auc_score depend on predict_proba

---
 sklearn/metrics/_scorer.py                  | 18 ++++++----------
 sklearn/metrics/tests/test_score_objects.py | 23 +++++++--------------
 2 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 1ea61b73f83c7..d57d00eecda49 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -296,12 +296,12 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
         """
 
         y_type = type_of_target(y)
-        if y_type not in ("binary", "multilabel-indicator", "multiclass"):
+        if y_type not in ("binary", "multilabel-indicator"):
             raise ValueError("{0} format is not supported".format(y_type))
 
         if is_regressor(clf):
             y_pred = method_caller(clf, "predict", X)
-        elif y_type in ("binary", "multilabel-indicator"):
+        else:
             try:
                 y_pred = method_caller(clf, "decision_function", X)
 
@@ -323,12 +323,6 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
                                              self._score_func.__name__))
                 elif isinstance(y_pred, list):
                     y_pred = np.vstack([p[:, -1] for p in y_pred]).T
-        else:  # multiclass
-            try:
-                y_pred = method_caller(clf, "predict_proba", X)
-            except (NotImplementedError, AttributeError):
-                raise ValueError("estimator must defined predict_proba for "
-                                 "multiclass threshold evaluation")
 
         if sample_weight is not None:
             return self._sign * self._score_func(y, y_pred,
@@ -651,14 +645,14 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False,
                              needs_threshold=True)
 average_precision_scorer = make_scorer(average_precision_score,
                                        needs_threshold=True)
-roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_threshold=True,
+roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_proba=True,
                                  multi_class='ovo')
-roc_auc_ovo_weighted_scorer = make_scorer(roc_auc_score, needs_threshold=True,
+roc_auc_ovo_weighted_scorer = make_scorer(roc_auc_score, needs_proba=True,
                                           multi_class='ovo',
                                           average='weighted')
-roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_threshold=True,
+roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_proba=True,
                                  multi_class='ovr')
-roc_auc_ovr_weighted_scorer = make_scorer(roc_auc_score, needs_threshold=True,
+roc_auc_ovr_weighted_scorer = make_scorer(roc_auc_score, needs_proba=True,
                                           multi_class='ovr',
                                           average='weighted')
 
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 1bd266cddb1e8..62f23dfad1b96 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -383,6 +383,13 @@ def test_thresholded_scorers():
     score2 = roc_auc_score(y_test, reg.predict(X_test))
     assert_almost_equal(score1, score2)
 
+    # Test that an exception is raised on more than two classes
+    X, y = make_blobs(random_state=0, centers=3)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+    clf.fit(X_train, y_train)
+    with pytest.raises(ValueError, match="multiclass format is not supported"):
+        get_scorer('roc_auc')(clf, X_test, y_test)
+
     # test error is raised with a single class present in model
     # (predict_proba shape is not suitable for binary auc)
     X, y = make_blobs(random_state=0, centers=2)
@@ -682,19 +689,3 @@ def test_multiclass_threshold_scorer(scorer_name, metric):
     expected_score = metric(y, y_proba)
 
     assert scorer(lr, X, y) == pytest.approx(expected_score)
-
-
-@pytest.mark.parametrize('scorer_name, ', ['roc_auc_ovr', 'roc_auc_ovo',
-                                           'roc_auc_ovr_weighted',
-                                           'roc_auc_ovo_weighted'])
-def test_multiclass_thresshold_no_predict_proba(scorer_name):
-    # estimator without predict_proba will fail
-    scorer = get_scorer(scorer_name)
-    X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
-                               random_state=0)
-    est = Perceptron().fit(X, y)
-
-    msg = ("estimator must defined predict_proba for multiclass "
-           "threshold evaluation")
-    with pytest.raises(ValueError, match=msg):
-        scorer(est, X, y)

From c961ea35fc7d71c287eb7c85aa0394b0c74abd1a Mon Sep 17 00:00:00 2001
From: Thomas J Fan <thomasjpfan@gmail.com>
Date: Tue, 29 Oct 2019 14:21:41 -0400
Subject: [PATCH 4/6] DOC Adds whats new

---
 doc/whats_new/v0.22.rst                     |  4 ++++
 sklearn/metrics/tests/test_score_objects.py | 16 +++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index cf20726949cfc..48bb884c51903 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -493,6 +493,10 @@ Changelog
   ``multioutput`` parameter.
   :pr:`14732` by :user:`Agamemnon Krasoulis <agamemnonc>`.
 
+- |Fix| The scorers: 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 
+  and 'roc_auc_ovo_weighted' are now correctly configured to use
+  :term:`predict_proba`. :pr:`15274` by `Thomas Fan`_.
+
 :mod:`sklearn.model_selection`
 ..............................
 
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 62f23dfad1b96..3d16c4214ca5f 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -680,7 +680,7 @@ def test_multimetric_scorer_sanity_check():
                                      average='weighted')),
     ('roc_auc_ovo_weighted', partial(roc_auc_score, multi_class='ovo',
                                      average='weighted'))])
-def test_multiclass_threshold_scorer(scorer_name, metric):
+def test_multiclass_roc_proba_scorer(scorer_name, metric):
     scorer = get_scorer(scorer_name)
     X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
                                random_state=0)
@@ -689,3 +689,17 @@ def test_multiclass_threshold_scorer(scorer_name, metric):
     expected_score = metric(y, y_proba)
 
     assert scorer(lr, X, y) == pytest.approx(expected_score)
+
+
+@pytest.mark.parametrize('scorer_name', [
+    'roc_auc_ovr', 'roc_auc_ovo',
+    'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted'])
+def test_multiclass_roc_no_proba_scorer_errors(scorer_name):
+    # Perceptron has no predict_proba
+    scorer = get_scorer(scorer_name)
+    X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
+                               random_state=0)
+    lr = Perceptron().fit(X, y)
+    msg = "'Perceptron' object has no attribute 'predict_proba'"
+    with pytest.raises(AttributeError, match=msg):
+        scorer(lr, X, y)

From 86903f5bca890ed9a04632a5199dc28ac0973a84 Mon Sep 17 00:00:00 2001
From: Thomas J Fan <thomasjpfan@gmail.com>
Date: Wed, 30 Oct 2019 10:17:28 -0400
Subject: [PATCH 5/6] DOC Move to one entry

---
 doc/whats_new/v0.22.rst | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index aa14f42bf0a78..7a804edc443cf 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -462,9 +462,11 @@ Changelog
   Gain and Normalized Discounted Cumulative Gain. :pr:`9951` by :user:`Jérôme
   Dockès <jeromedockes>`.
 
-- |Feature| Added multiclass support to :func:`metrics.roc_auc_score`.
-  :issue:`12789` by :user:`Kathy Chen <kathyxchen>`,
-  :user:`Mohamed Maskani <maskani-moh>`, and :user:`Thomas Fan <thomasjpfan>`.
+- |Feature| Added multiclass support to :func:`metrics.roc_auc_score` with
+  corresponding scorers 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 
+  and 'roc_auc_ovo_weighted'. :pr:`12789` and :pr:`15274` by 
+  :user:`Kathy Chen <kathyxchen>`, :user:`Mohamed Maskani <maskani-moh>`, and
+  `Thomas Fan`_.
 
 - |Feature| Add :class:`metrics.mean_tweedie_deviance` measuring the
   Tweedie deviance for a given ``power`` parameter. Also add mean Poisson
@@ -506,10 +508,6 @@ Changelog
   ``multioutput`` parameter.
   :pr:`14732` by :user:`Agamemnon Krasoulis <agamemnonc>`.
 
-- |Fix| The scorers: 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 
-  and 'roc_auc_ovo_weighted' are now correctly configured to use
-  :term:`predict_proba`. :pr:`15274` by `Thomas Fan`_.
-
 :mod:`sklearn.model_selection`
 ..............................
 

From 043821fc9cf89ecafa47e5329c3b1e6e1f67a594 Mon Sep 17 00:00:00 2001
From: Thomas J Fan <thomasjpfan@gmail.com>
Date: Fri, 1 Nov 2019 10:03:59 -0400
Subject: [PATCH 6/6] TST Checks for not multiclass

---
 sklearn/metrics/_scorer.py                  |  2 +-
 sklearn/metrics/tests/test_score_objects.py | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index f334c6eab67a5..3df175c2ca306 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -247,7 +247,7 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
         if y_type == "binary":
             if y_pred.shape[1] == 2:
                 y_pred = y_pred[:, 1]
-            else:
+            elif y_pred.shape[1] == 1:  # not multiclass
                 raise ValueError('got predict_proba of shape {},'
                                  ' but need classifier with two'
                                  ' classes for {} scoring'.format(
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index fd9bf2fd19a58..00ff5a3a0563e 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -691,6 +691,22 @@ def test_multiclass_roc_proba_scorer(scorer_name, metric):
     assert scorer(lr, X, y) == pytest.approx(expected_score)
 
 
+def test_multiclass_roc_proba_scorer_label():
+    scorer = make_scorer(roc_auc_score, multi_class='ovo',
+                         labels=[0, 1, 2], needs_proba=True)
+    X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
+                               random_state=0)
+    lr = LogisticRegression(multi_class="multinomial").fit(X, y)
+    y_proba = lr.predict_proba(X)
+
+    y_binary = y == 0
+    expected_score = roc_auc_score(y_binary, y_proba,
+                                   multi_class='ovo',
+                                   labels=[0, 1, 2])
+
+    assert scorer(lr, X, y_binary) == pytest.approx(expected_score)
+
+
 @pytest.mark.parametrize('scorer_name', [
     'roc_auc_ovr', 'roc_auc_ovo',
     'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted'])