From d49b41c68ef8a41a38066c042e78386a9fa1c94f Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Sun, 10 Apr 2022 17:06:20 +0200
Subject: [PATCH 01/23] Formatting

---
 sklearn/metrics/_scorer.py             | 14 +++++++++-----
 sklearn/model_selection/_validation.py | 13 +++++++++++++
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index e1655af169fcc..09a85c7f8286d 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -102,11 +102,15 @@ def __call__(self, estimator, *args, **kwargs):
         cached_call = partial(_cached_call, cache)
 
         for name, scorer in self._scorers.items():
-            if isinstance(scorer, _BaseScorer):
-                score = scorer._score(cached_call, estimator, *args, **kwargs)
-            else:
-                score = scorer(estimator, *args, **kwargs)
-            scores[name] = score
+            try:
+                if isinstance(scorer, _BaseScorer):
+                    score = scorer._score(cached_call, estimator, *args, **kwargs)
+                else:
+                    score = scorer(estimator, *args, **kwargs)
+                scores[name] = score
+            except Exception as e:
+                scores[name] = e
+
         return scores
 
     def _use_cache(self, estimator):
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 6537c4785b48f..70eebf00eca51 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -784,6 +784,19 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"):
     error_msg = "scoring must return a number, got %s (%s) instead. (scorer=%s)"
     if isinstance(scores, dict):
         for name, score in scores.items():
+            # In case of _MultimetricScorer if a scorer fails instead of raising
+            # the exception, it is passed as score to handle it later.
+            if isinstance(score, Exception):
+                if error_score == "raise":
+                    raise score
+                else:
+                    score = error_score
+                    warnings.warn(
+                        f"Scoring failed for the {name} scorer. The score on this"
+                        " train-test partition for these parameters will be set "
+                        f"to {error_score}. Details: \n{format_exc()}",
+                        UserWarning,
+                    )
             if hasattr(score, "item"):
                 with suppress(ValueError):
                     # e.g. unwrap memmapped scalars

From 82e6dc131115db828537ae7d69a8fa4247b20f3e Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Sun, 10 Apr 2022 18:32:48 +0200
Subject: [PATCH 02/23] If all scorer fails, raise an error.

---
 sklearn/metrics/_scorer.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 09a85c7f8286d..aa4cdd4642926 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -101,6 +101,7 @@ def __call__(self, estimator, *args, **kwargs):
         cache = {} if self._use_cache(estimator) else None
         cached_call = partial(_cached_call, cache)
 
+        all_scorer_failed = True
         for name, scorer in self._scorers.items():
             try:
                 if isinstance(scorer, _BaseScorer):
@@ -110,6 +111,11 @@ def __call__(self, estimator, *args, **kwargs):
                 scores[name] = score
             except Exception as e:
                 scores[name] = e
+            else:
+                all_scorer_failed = False
+
+        if all_scorer_failed:
+            raise ValueError("All scorer failed")
 
         return scores
 

From 056b40c345520d00ea12f7a90b1b5a3275af06d4 Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Sun, 10 Apr 2022 18:38:35 +0200
Subject: [PATCH 03/23] Update warning message.

---
 sklearn/model_selection/_validation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 70eebf00eca51..4773a9c7bd3db 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -792,9 +792,9 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"):
                 else:
                     score = error_score
                     warnings.warn(
-                        f"Scoring failed for the {name} scorer. The score on this"
-                        " train-test partition for these parameters will be set "
-                        f"to {error_score}. Details: \n{format_exc()}",
+                        "Scoring failed. The score on this train-test partition for "
+                        f"these parameters will be set to {error_score}. Details: \n"
+                        f"{format_exc()}",
                         UserWarning,
                     )
             if hasattr(score, "item"):

From 2aa8ac71c36a4a1e7c6381dd8eeb2f19b61b8ef3 Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Sun, 10 Apr 2022 19:00:10 +0200
Subject: [PATCH 04/23] Update raised exception

---
 sklearn/metrics/_scorer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index aa4cdd4642926..f54e8617efb4c 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -111,11 +111,12 @@ def __call__(self, estimator, *args, **kwargs):
                 scores[name] = score
             except Exception as e:
                 scores[name] = e
+                last_exception = e
             else:
                 all_scorer_failed = False
 
         if all_scorer_failed:
-            raise ValueError("All scorer failed")
+            raise last_exception
 
         return scores
 

From 30363eb7302539006736644dbdf4602cc4349dcf Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Sun, 10 Apr 2022 22:38:24 +0200
Subject: [PATCH 05/23] Making the changes of the rewiev. TODO: update the
 tests and add new one to the new parts.

---
 sklearn/metrics/_scorer.py             |  7 ------
 sklearn/model_selection/_validation.py | 30 +++++++++++++++-----------
 2 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index f54e8617efb4c..09a85c7f8286d 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -101,7 +101,6 @@ def __call__(self, estimator, *args, **kwargs):
         cache = {} if self._use_cache(estimator) else None
         cached_call = partial(_cached_call, cache)
 
-        all_scorer_failed = True
         for name, scorer in self._scorers.items():
             try:
                 if isinstance(scorer, _BaseScorer):
@@ -111,12 +110,6 @@ def __call__(self, estimator, *args, **kwargs):
                 scores[name] = score
             except Exception as e:
                 scores[name] = e
-                last_exception = e
-            else:
-                all_scorer_failed = False
-
-        if all_scorer_failed:
-            raise last_exception
 
         return scores
 
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 4773a9c7bd3db..eb53ae80eb2a1 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -781,22 +781,26 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"):
                 UserWarning,
             )
 
+    # Check errors in `_MultimetricScorer`
+    if isinstance(scorer, _MultimetricScorer):
+        exceptions = {name: e for name, e in scores.items() if isinstance(e, Exception)}
+        if exceptions:
+            if error_score == "raise":
+                for name, e in exceptions.items():
+                    raise e
+            else:
+                new_scores = {name: error_score for name in exceptions}
+                scores.update(new_scores)
+                warnings.warn(
+                    "Scoring failed. The score on this train-test partition for "
+                    f"these parameters will be set to {error_score}. Details: \n"
+                    f"{format_exc()}",
+                    UserWarning,
+                )
+
     error_msg = "scoring must return a number, got %s (%s) instead. (scorer=%s)"
     if isinstance(scores, dict):
         for name, score in scores.items():
-            # In case of _MultimetricScorer if a scorer fails instead of raising
-            # the exception, it is passed as score to handle it later.
-            if isinstance(score, Exception):
-                if error_score == "raise":
-                    raise score
-                else:
-                    score = error_score
-                    warnings.warn(
-                        "Scoring failed. The score on this train-test partition for "
-                        f"these parameters will be set to {error_score}. Details: \n"
-                        f"{format_exc()}",
-                        UserWarning,
-                    )
             if hasattr(score, "item"):
                 with suppress(ValueError):
                     # e.g. unwrap memmapped scalars

From 5caba434e49a85d7931cf1498939bf705b292c98 Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Tue, 12 Apr 2022 21:19:17 +0200
Subject: [PATCH 06/23] Update  to a list.

---
 sklearn/model_selection/_validation.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index eb53ae80eb2a1..09f0f61937724 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -783,14 +783,15 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"):
 
     # Check errors in `_MultimetricScorer`
     if isinstance(scorer, _MultimetricScorer):
-        exceptions = {name: e for name, e in scores.items() if isinstance(e, Exception)}
+        exceptions = [
+            (name, e) for name, e in scores.items() if isinstance(e, Exception)
+        ]
         if exceptions:
             if error_score == "raise":
-                for name, e in exceptions.items():
-                    raise e
+                raise exceptions[0][1]
             else:
-                new_scores = {name: error_score for name in exceptions}
-                scores.update(new_scores)
+                for name, e in exceptions:
+                    scores[name] = error_score
                 warnings.warn(
                     "Scoring failed. The score on this train-test partition for "
                     f"these parameters will be set to {error_score}. Details: \n"

From e88cfde7023397165c1f18ebaa9bca57451c7dac Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Sat, 16 Apr 2022 22:47:14 +0200
Subject: [PATCH 07/23] Added formatted exception to the warnings. All the
 failing scorers message will show up there.

---
 sklearn/model_selection/_validation.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 09f0f61937724..b56e139b19f15 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -15,7 +15,7 @@
 import numbers
 import time
 from functools import partial
-from traceback import format_exc
+from traceback import format_exc, format_exception
 from contextlib import suppress
 from collections import Counter
 
@@ -792,12 +792,15 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"):
             else:
                 for name, e in exceptions:
                     scores[name] = error_score
-                warnings.warn(
-                    "Scoring failed. The score on this train-test partition for "
-                    f"these parameters will be set to {error_score}. Details: \n"
-                    f"{format_exc()}",
-                    UserWarning,
-                )
+                    details = "".join(
+                        format_exception(etype=type(e), value=e, tb=e.__traceback__)
+                    )
+                    warnings.warn(
+                        "Scoring failed. The score on this train-test partition for "
+                        f"these parameters will be set to {error_score}. Details: \n"
+                        f"{details}",
+                        UserWarning,
+                    )
 
     error_msg = "scoring must return a number, got %s (%s) instead. (scorer=%s)"
     if isinstance(scores, dict):

From 69c61e821cf953457a3e06339a47638f84df961d Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Sat, 16 Apr 2022 23:25:38 +0200
Subject: [PATCH 08/23] Calling the function with positional arguments instead.

---
 sklearn/model_selection/_validation.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index b56e139b19f15..d80cf2783b569 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -792,9 +792,7 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"):
             else:
                 for name, e in exceptions:
                     scores[name] = error_score
-                    details = "".join(
-                        format_exception(etype=type(e), value=e, tb=e.__traceback__)
-                    )
+                    details = "".join(format_exception(type(e), e, e.__traceback__))
                     warnings.warn(
                         "Scoring failed. The score on this train-test partition for "
                         f"these parameters will be set to {error_score}. Details: \n"

From 01ccc1a54dc1611630f78bc16c51207869902434 Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Mon, 18 Apr 2022 12:47:51 +0200
Subject: [PATCH 09/23] added
 test_multimetric_scorer_returning_exceptions_in_dictianary. TODO: a
 non-regression test for the original issue.

---
 sklearn/metrics/tests/test_score_objects.py | 34 +++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 23680e48ae3e7..22c8ed69912b5 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -871,6 +871,40 @@ def test_multimetric_scorer_sanity_check():
         assert_allclose(value, separate_scores[score_name])
 
 
+def test_multimetric_scorer_returning_exceptions_in_dictianary():
+    scorers = {
+        "failing_1": "neg_mean_squared_log_error",
+        "non_failing": "neg_median_absolute_error",
+        "failing_2": "neg_mean_squared_log_error",
+    }
+
+    X, y = make_classification(
+        n_samples=20, n_features=2, n_redundant=0, random_state=0
+    )
+    y *= -1  # neg_mean_squared_log_error fails if y contains negative values
+
+    clf = DecisionTreeClassifier()
+    clf.fit(X, y)
+
+    scorer_dict = _check_multimetric_scoring(clf, scorers)
+    multi_scorer = _MultimetricScorer(**scorer_dict)
+
+    result = multi_scorer(clf, X, y)
+
+    e1 = result["failing_1"]
+    score = result["non_failing"]
+    e2 = result["failing_2"]
+
+    error_msg = (
+        "Mean Squared Logarithmic Error cannot be used when targets contain negative"
+        " values."
+    )
+
+    assert type(e1) is ValueError and str(e1) == error_msg
+    assert isinstance(score, float)
+    assert type(e2) is ValueError and str(e2) == error_msg
+
+
 @pytest.mark.parametrize(
     "scorer_name, metric",
     [

From 218f5bdd4663b7d2bdd9421163c0dffd811e8c78 Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Mon, 18 Apr 2022 14:29:07 +0200
Subject: [PATCH 10/23] Update test_cross_validate_failing_scorer to cover the
 failing and non-failing multimetric case.

---
 sklearn/metrics/tests/test_score_objects.py   |  2 +-
 .../model_selection/tests/test_validation.py  | 25 ++++++++++++++-----
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 22c8ed69912b5..295fd75d5755f 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -879,7 +879,7 @@ def test_multimetric_scorer_returning_exceptions_in_dictianary():
     }
 
     X, y = make_classification(
-        n_samples=20, n_features=2, n_redundant=0, random_state=0
+        n_samples=50, n_features=2, n_redundant=0, random_state=0
     )
     y *= -1  # neg_mean_squared_log_error fails if y contains negative values
 
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 90b5a605ac2e4..d93bd67588165 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -2232,15 +2232,22 @@ def test_cross_val_score_failing_scorer(error_score):
 def test_cross_validate_failing_scorer(
     error_score, return_train_score, with_multimetric
 ):
-    # check that an estimator can fail during scoring in `cross_validate` and
-    # that we can optionally replaced it with `error_score`
+    # Check that an estimator can fail during scoring in `cross_validate` and
+    # that we can optionally replace it with `error_score`. In the multimetric
+    # case also check the result of a non-failing scorer where the other scorers
+    # are failing.
     X, y = load_iris(return_X_y=True)
     clf = LogisticRegression(max_iter=5).fit(X, y)
 
     error_msg = "This scorer is supposed to fail!!!"
     failing_scorer = partial(_failing_scorer, error_msg=error_msg)
     if with_multimetric:
-        scoring = {"score_1": failing_scorer, "score_2": failing_scorer}
+        non_failing_scorer = make_scorer(mean_squared_error)
+        scoring = {
+            "score_1": failing_scorer,
+            "score_2": non_failing_scorer,
+            "score_3": failing_scorer,
+        }
     else:
         scoring = failing_scorer
 
@@ -2272,9 +2279,15 @@ def test_cross_validate_failing_scorer(
             )
             for key in results:
                 if "_score" in key:
-                    # check the test (and optionally train score) for all
-                    # scorers that should be assigned to `error_score`.
-                    assert_allclose(results[key], error_score)
+                    if "2" in key:
+                        # check the test (and optionally train) score for the
+                        # scorer that should be non-failing
+                        for i in results[key]:
+                            assert isinstance(i, float)
+                    else:
+                        # check the test (and optionally train) score for all
+                        # scorers that should be assigned to `error_score`.
+                        assert_allclose(results[key], error_score)
 
 
 def three_params_scorer(i, j, k):

From 1ab52b2e0755894417237fc1ace2fe04cac5941d Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Sat, 30 Apr 2022 13:38:30 +0200
Subject: [PATCH 11/23] Changing '2' to more explicit 'score_2' in the
 test_cross_validate_failing_scorer

---
 sklearn/model_selection/tests/test_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index d93bd67588165..11dd933b1f43b 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -2279,7 +2279,7 @@ def test_cross_validate_failing_scorer(
             )
             for key in results:
                 if "_score" in key:
-                    if "2" in key:
+                    if "score_2" in key:
                         # check the test (and optionally train) score for the
                         # scorer that should be non-failing
                         for i in results[key]:

From 3e066c61fac39788ee22424528c37a4fe432a341 Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Tue, 3 May 2022 22:56:37 +0200
Subject: [PATCH 12/23] _MultimetricScorer now can raise an exception in
 __call__ or not. This is controlled by the new bool parameter. The _score
 function is changed accordingly. The test for the _MultimetricScorer which
 tested the exception returning case is replaced with the
 test_multimetric_scorer_exception_handling test. The
 test_cross_validate_failing_scorer function needs no modification.

---
 sklearn/metrics/_scorer.py                  | 14 +++++-
 sklearn/metrics/tests/test_score_objects.py | 31 +++++++-----
 sklearn/model_selection/_validation.py      | 56 +++++++++++----------
 3 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 09a85c7f8286d..e2e68a8c4a5b0 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -21,6 +21,7 @@
 from collections.abc import Iterable
 from functools import partial
 from collections import Counter
+from traceback import format_exc
 
 import numpy as np
 import copy
@@ -88,11 +89,17 @@ class _MultimetricScorer:
 
     Parameters
     ----------
+    raise_exc : bool
+        Whether to raise the exception in `__call__` or not. If set to False
+        a formatted string of the exception details is passed as result of
+        the failing scorer.
+
     scorers : dict
         Dictionary mapping names to callable scorers.
     """
 
-    def __init__(self, **scorers):
+    def __init__(self, *, raise_exc=True, **scorers):
+        self._raise_exc = raise_exc
         self._scorers = scorers
 
     def __call__(self, estimator, *args, **kwargs):
@@ -109,7 +116,10 @@ def __call__(self, estimator, *args, **kwargs):
                     score = scorer(estimator, *args, **kwargs)
                 scores[name] = score
             except Exception as e:
-                scores[name] = e
+                if self._raise_exc:
+                    raise e
+                else:
+                    scores[name] = format_exc()
 
         return scores
 
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 295fd75d5755f..f5e5a3d0d9ce8 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -871,7 +871,8 @@ def test_multimetric_scorer_sanity_check():
         assert_allclose(value, separate_scores[score_name])
 
 
-def test_multimetric_scorer_returning_exceptions_in_dictianary():
+@pytest.mark.parametrize("raise_exc", [True, False])
+def test_multimetric_scorer_exception_handling(raise_exc):
     scorers = {
         "failing_1": "neg_mean_squared_log_error",
         "non_failing": "neg_median_absolute_error",
@@ -887,22 +888,26 @@ def test_multimetric_scorer_returning_exceptions_in_dictianary():
     clf.fit(X, y)
 
     scorer_dict = _check_multimetric_scoring(clf, scorers)
-    multi_scorer = _MultimetricScorer(**scorer_dict)
-
-    result = multi_scorer(clf, X, y)
-
-    e1 = result["failing_1"]
-    score = result["non_failing"]
-    e2 = result["failing_2"]
+    multi_scorer = _MultimetricScorer(raise_exc=raise_exc, **scorer_dict)
 
     error_msg = (
-        "Mean Squared Logarithmic Error cannot be used when targets contain negative"
-        " values."
+        "Mean Squared Logarithmic Error cannot be used when targets contain"
+        " negative values."
     )
 
-    assert type(e1) is ValueError and str(e1) == error_msg
-    assert isinstance(score, float)
-    assert type(e2) is ValueError and str(e2) == error_msg
+    if raise_exc:
+        with pytest.raises(ValueError, match=error_msg):
+            multi_scorer(clf, X, y)
+    else:
+        result = multi_scorer(clf, X, y)
+
+        exception_message_1 = result["failing_1"]
+        score = result["non_failing"]
+        exception_message_2 = result["failing_2"]
+
+        assert isinstance(exception_message_1, str) and error_msg in exception_message_1
+        assert isinstance(score, float)
+        assert isinstance(exception_message_2, str) and error_msg in exception_message_2
 
 
 @pytest.mark.parametrize(
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index d80cf2783b569..fd2eb2c4ba38e 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -759,7 +759,7 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"):
     """
     if isinstance(scorer, dict):
         # will cache method calls if needed. scorer() returns a dict
-        scorer = _MultimetricScorer(**scorer)
+        scorer = _MultimetricScorer(raise_exc=(error_score == "raise"), **scorer)
 
     try:
         if y_test is None:
@@ -767,38 +767,42 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"):
         else:
             scores = scorer(estimator, X_test, y_test)
     except Exception:
-        if error_score == "raise":
+        if isinstance(scorer, _MultimetricScorer):
+            assert error_score == "raise", (
+                "If `_MultimetricScorer` raises exception, the `error_score`"
+                " parameter should be equal to 'raise'."
+            )
             raise
         else:
-            if isinstance(scorer, _MultimetricScorer):
-                scores = {name: error_score for name in scorer._scorers}
+            if error_score == "raise":
+                raise
             else:
                 scores = error_score
-            warnings.warn(
-                "Scoring failed. The score on this train-test partition for "
-                f"these parameters will be set to {error_score}. Details: \n"
-                f"{format_exc()}",
-                UserWarning,
-            )
+                warnings.warn(
+                    "Scoring failed. The score on this train-test partition for "
+                    f"these parameters will be set to {error_score}. Details: \n"
+                    f"{format_exc()}",
+                    UserWarning,
+                )
 
-    # Check errors in `_MultimetricScorer`
+    # Check non-raised error messages in `_MultimetricScorer`
     if isinstance(scorer, _MultimetricScorer):
-        exceptions = [
-            (name, e) for name, e in scores.items() if isinstance(e, Exception)
+        exception_messages = [
+            (name, str_e) for name, str_e in scores.items() if isinstance(str_e, str)
         ]
-        if exceptions:
-            if error_score == "raise":
-                raise exceptions[0][1]
-            else:
-                for name, e in exceptions:
-                    scores[name] = error_score
-                    details = "".join(format_exception(type(e), e, e.__traceback__))
-                    warnings.warn(
-                        "Scoring failed. The score on this train-test partition for "
-                        f"these parameters will be set to {error_score}. Details: \n"
-                        f"{details}",
-                        UserWarning,
-                    )
+        if exception_messages:
+            assert error_score != "raise", (
+                "`error_score` == 'raise', but the exception is not raised in"
+                " `_MultimetricScorer`."
+            )
+            for name, str_e in exception_messages:
+                scores[name] = error_score
+                warnings.warn(
+                    "Scoring failed. The score on this train-test partition for "
+                    f"these parameters will be set to {error_score}. Details: \n"
+                    f"{str_e}",
+                    UserWarning,
+                )
 
     error_msg = "scoring must return a number, got %s (%s) instead. (scorer=%s)"
     if isinstance(scores, dict):

From b56b585887b51395bcba7dc11ee5d2fdc3db050f Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Tue, 3 May 2022 23:08:00 +0200
Subject: [PATCH 13/23] Remove not used function from the imports.

---
 sklearn/metrics/tests/test_score_objects.py | 3 +++
 sklearn/model_selection/_validation.py      | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index f5e5a3d0d9ce8..c8e8f1477a672 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -873,6 +873,9 @@ def test_multimetric_scorer_sanity_check():
 
 @pytest.mark.parametrize("raise_exc", [True, False])
 def test_multimetric_scorer_exception_handling(raise_exc):
+    # Check that the `_MultimetricScorer` returns exception messages in the
+    # result dict for the failing scorers in case of `raise_exc` is False
+    # and if `raise_exc` is True, then the proper exception is raised.
     scorers = {
         "failing_1": "neg_mean_squared_log_error",
         "non_failing": "neg_median_absolute_error",
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index fd2eb2c4ba38e..a64edc5de95f7 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -15,7 +15,7 @@
 import numbers
 import time
 from functools import partial
-from traceback import format_exc, format_exception
+from traceback import format_exc
 from contextlib import suppress
 from collections import Counter
 

From ea793c9cf2fb36ae5bbbd5354e10218283e735b3 Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Wed, 11 May 2022 21:28:12 +0200
Subject: [PATCH 14/23] Changing _MultimetricScorer API: now the 'scorers'
 parameter is a positional (dict) instead of a depacked dict using **.

---
 sklearn/inspection/_permutation_importance.py |  2 +-
 sklearn/metrics/_scorer.py                    |  4 ++--
 sklearn/metrics/tests/test_score_objects.py   | 10 +++++-----
 sklearn/model_selection/_validation.py        |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/sklearn/inspection/_permutation_importance.py b/sklearn/inspection/_permutation_importance.py
index 7ca586efe8630..e00ff3b40e3e5 100644
--- a/sklearn/inspection/_permutation_importance.py
+++ b/sklearn/inspection/_permutation_importance.py
@@ -252,7 +252,7 @@ def permutation_importance(
         scorer = check_scoring(estimator, scoring=scoring)
     else:
         scorers_dict = _check_multimetric_scoring(estimator, scoring)
-        scorer = _MultimetricScorer(**scorers_dict)
+        scorer = _MultimetricScorer(scorers=scorers_dict)
 
     baseline_score = _weights_scorer(scorer, estimator, X, y, sample_weight)
 
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index e2e68a8c4a5b0..a268f98535916 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -98,9 +98,9 @@ class _MultimetricScorer:
         Dictionary mapping names to callable scorers.
     """
 
-    def __init__(self, *, raise_exc=True, **scorers):
-        self._raise_exc = raise_exc
+    def __init__(self, *, scorers, raise_exc=True):
         self._scorers = scorers
+        self._raise_exc = raise_exc
 
     def __call__(self, estimator, *args, **kwargs):
         """Evaluate predicted target values."""
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index c8e8f1477a672..8fe8e11da1320 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -784,7 +784,7 @@ def test_multimetric_scorer_calls_method_once(
     mock_est.classes_ = np.array([0, 1])
 
     scorer_dict = _check_multimetric_scoring(LogisticRegression(), scorers)
-    multi_scorer = _MultimetricScorer(**scorer_dict)
+    multi_scorer = _MultimetricScorer(scorers=scorer_dict)
     results = multi_scorer(mock_est, X, y)
 
     assert set(scorers) == set(results)  # compare dict keys
@@ -811,7 +811,7 @@ def predict_proba(self, X):
 
     scorers = ["roc_auc", "neg_log_loss"]
     scorer_dict = _check_multimetric_scoring(clf, scorers)
-    scorer = _MultimetricScorer(**scorer_dict)
+    scorer = _MultimetricScorer(scorers=scorer_dict)
     scorer(clf, X, y)
 
     assert predict_proba_call_cnt == 1
@@ -834,7 +834,7 @@ def predict(self, X):
 
     scorers = {"neg_mse": "neg_mean_squared_error", "r2": "roc_auc"}
     scorer_dict = _check_multimetric_scoring(clf, scorers)
-    scorer = _MultimetricScorer(**scorer_dict)
+    scorer = _MultimetricScorer(scorers=scorer_dict)
     scorer(clf, X, y)
 
     assert predict_called_cnt == 1
@@ -857,7 +857,7 @@ def test_multimetric_scorer_sanity_check():
     clf.fit(X, y)
 
     scorer_dict = _check_multimetric_scoring(clf, scorers)
-    multi_scorer = _MultimetricScorer(**scorer_dict)
+    multi_scorer = _MultimetricScorer(scorers=scorer_dict)
 
     result = multi_scorer(clf, X, y)
 
@@ -891,7 +891,7 @@ def test_multimetric_scorer_exception_handling(raise_exc):
     clf.fit(X, y)
 
     scorer_dict = _check_multimetric_scoring(clf, scorers)
-    multi_scorer = _MultimetricScorer(raise_exc=raise_exc, **scorer_dict)
+    multi_scorer = _MultimetricScorer(scorers=scorer_dict, raise_exc=raise_exc)
 
     error_msg = (
         "Mean Squared Logarithmic Error cannot be used when targets contain"
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index a64edc5de95f7..bc8d8b61e2a2c 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -759,7 +759,7 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"):
     """
     if isinstance(scorer, dict):
         # will cache method calls if needed. scorer() returns a dict
-        scorer = _MultimetricScorer(raise_exc=(error_score == "raise"), **scorer)
+        scorer = _MultimetricScorer(scorers=scorer, raise_exc=(error_score == "raise"))
 
     try:
         if y_test is None:

From a6e05c7327714b73dca1eb098ccb5c8ab2ba642f Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Wed, 11 May 2022 21:52:01 +0200
Subject: [PATCH 15/23] Adding changelog.

---
 doc/whats_new/v1.1.rst     | 5 +++++
 sklearn/metrics/_scorer.py | 6 +++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 33e2b89dccb65..1ccbb156979b7 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -834,6 +834,11 @@ Changelog
   `n_components=1`.
   :pr:`23034` by :user:`Meekail Zain <micky774>`.
 
+- |Fix| :function:`model_selection.cross_validate` with multimetric scoring
+  in case of some failing scorers the non-failing scorers now returning proper
+  scores instead of `error_score` values.
+  :pr:`22969` by :user:`András Simon <simonandras>`.
+
 :mod:`sklearn.multiclass`
 .........................
 
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index a268f98535916..407c43c98f879 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -89,13 +89,13 @@ class _MultimetricScorer:
 
     Parameters
     ----------
+    scorers : dict
+        Dictionary mapping names to callable scorers.
+
     raise_exc : bool
         Whether to raise the exception in `__call__` or not. If set to False
         a formatted string of the exception details is passed as result of
         the failing scorer.
-
-    scorers : dict
-        Dictionary mapping names to callable scorers.
     """
 
     def __init__(self, *, scorers, raise_exc=True):

From 81006616f2524e88a5a653907ca4f9912c75a88a Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Wed, 11 May 2022 21:56:42 +0200
Subject: [PATCH 16/23] small changes to changelelog

---
 doc/whats_new/v1.1.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 1ccbb156979b7..2701649f0297b 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -829,16 +829,16 @@ Changelog
 - |Fix| :func:`model_selection.learning_curve` now supports `partial_fit`
   with regressors. :pr:`22982` by `Thomas Fan`_.
 
+- |Fix| :function:`model_selection.cross_validate` with multimetric scoring
+  in case of some failing scorers the non-failing scorers now returning proper
+  scores instead of `error_score` values.
+  :pr:`22969` by :user:`András Simon <simonandras>` and `Thomas Fan`_.
+
 - |Fix| :class:`mixture.GaussianMixture` now normalizes `weights_` more safely,
   preventing rounding errors when calling :meth:`GaussianMixture.sample` with
   `n_components=1`.
   :pr:`23034` by :user:`Meekail Zain <micky774>`.
 
-- |Fix| :function:`model_selection.cross_validate` with multimetric scoring
-  in case of some failing scorers the non-failing scorers now returning proper
-  scores instead of `error_score` values.
-  :pr:`22969` by :user:`András Simon <simonandras>`.
-
 :mod:`sklearn.multiclass`
 .........................
 

From 176d4065acdce60838e95c148fa555223a01ca48 Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Wed, 11 May 2022 22:01:09 +0200
Subject: [PATCH 17/23] Changing the changelog again.

---
 doc/whats_new/v1.1.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 2701649f0297b..479f35bda81d5 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -829,16 +829,16 @@ Changelog
 - |Fix| :func:`model_selection.learning_curve` now supports `partial_fit`
   with regressors. :pr:`22982` by `Thomas Fan`_.
 
-- |Fix| :function:`model_selection.cross_validate` with multimetric scoring
-  in case of some failing scorers the non-failing scorers now returning proper
-  scores instead of `error_score` values.
-  :pr:`22969` by :user:`András Simon <simonandras>` and `Thomas Fan`_.
-
 - |Fix| :class:`mixture.GaussianMixture` now normalizes `weights_` more safely,
   preventing rounding errors when calling :meth:`GaussianMixture.sample` with
   `n_components=1`.
   :pr:`23034` by :user:`Meekail Zain <micky774>`.
 
+- |Fix| :function:`model_selection.cross_validate` with multimetric scoring
+  in case of some failing scorers the non-failing scorers now returning proper
+  scores instead of `error_score` values.
+  :pr:`22969` by :user:`András Simon <simonandras>` and `Thomas Fan`_.
+
 :mod:`sklearn.multiclass`
 .........................
 

From fd616b937dc10e1d6e9ed104c12e263f23b41254 Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Wed, 11 May 2022 22:13:45 +0200
Subject: [PATCH 18/23] Changing the number in the changelogand a small change
 in the test_cross_validate_failing_scorer: checking for '_score_2' instead of
 'score_2'.

---
 doc/whats_new/v1.1.rst                           | 2 +-
 sklearn/model_selection/tests/test_validation.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 07e0dddbbd486..b9b34e34e6dd4 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -960,7 +960,7 @@ Changelog
 - |Fix| :function:`model_selection.cross_validate` with multimetric scoring
   in case of some failing scorers the non-failing scorers now returning proper
   scores instead of `error_score` values.
-  :pr:`22969` by :user:`András Simon <simonandras>` and `Thomas Fan`_.
+  :pr:`23101` by :user:`András Simon <simonandras>` and `Thomas Fan`_.
 
 :mod:`sklearn.multiclass`
 .........................
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 11dd933b1f43b..a05e395002e3b 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -2279,7 +2279,7 @@ def test_cross_validate_failing_scorer(
             )
             for key in results:
                 if "_score" in key:
-                    if "score_2" in key:
+                    if "_score_2" in key:
                         # check the test (and optionally train) score for the
                         # scorer that should be non-failing
                         for i in results[key]:

From f7857add66ab2539d9ce127a2df0094f05d2b52c Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Wed, 11 May 2022 22:35:38 +0200
Subject: [PATCH 19/23] Correct doc.

---
 doc/whats_new/v1.1.rst                      | 4 ++--
 sklearn/metrics/_scorer.py                  | 2 +-
 sklearn/metrics/tests/test_score_objects.py | 7 ++++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index b9b34e34e6dd4..56d66bb48c602 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -957,8 +957,8 @@ Changelog
   `n_components=1`.
   :pr:`23034` by :user:`Meekail Zain <micky774>`.
 
-- |Fix| :function:`model_selection.cross_validate` with multimetric scoring
-  in case of some failing scorers the non-failing scorers now returning proper
+- |Fix| :func:`model_selection.cross_validate` with multimetric scoring in
+  case of some failing scorers the non-failing scorers now returning proper
   scores instead of `error_score` values.
   :pr:`23101` by :user:`András Simon <simonandras>` and `Thomas Fan`_.
 
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 407c43c98f879..d5386dd47e385 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -93,7 +93,7 @@ class _MultimetricScorer:
         Dictionary mapping names to callable scorers.
 
     raise_exc : bool
-        Whether to raise the exception in `__call__` or not. If set to False
+        Whether to raise the exception in `__call__` or not. If set to `False`
         a formatted string of the exception details is passed as result of
         the failing scorer.
     """
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 8fe8e11da1320..f1bec69fa1f15 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -873,9 +873,10 @@ def test_multimetric_scorer_sanity_check():
 
 @pytest.mark.parametrize("raise_exc", [True, False])
 def test_multimetric_scorer_exception_handling(raise_exc):
-    # Check that the `_MultimetricScorer` returns exception messages in the
-    # result dict for the failing scorers in case of `raise_exc` is False
-    # and if `raise_exc` is True, then the proper exception is raised.
+    # Check that the calling of the `_MultimetricScorer` returns
+    # exception messages in the result dict for the failing scorers
+    # in case of `raise_exc` is `False` and if `raise_exc` is `True`,
+    # then the proper exception is raised.
     scorers = {
         "failing_1": "neg_mean_squared_log_error",
         "non_failing": "neg_median_absolute_error",

From 70c59e0914b0cd783300fd9c63191522221b3439 Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Sat, 21 May 2022 13:49:35 +0200
Subject: [PATCH 20/23] Changing changelog based on rewiev.

---
 doc/whats_new/v1.1.rst | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 56d66bb48c602..29dcdcb94f1d5 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -33,6 +33,14 @@ Changelog
 - |Fix| :func:`utils.class_weight.compute_sample_weight` now works with sparse `y`.
   :pr:`23115` by :user:`kernc <kernc>`.
 
+:mod:`sklearn.model_selection`
+..............................
+
+- |Fix| :func:`model_selection.cross_validate` with multimetric scoring in
+  case of some failing scorers the non-failing scorers now returning proper
+  scores instead of `error_score` values.
+  :pr:`23101` by :user:`András Simon <simonandras>` and `Thomas Fan`_.
+
 .. _changes_1_1:
 
 Version 1.1.0
@@ -952,16 +960,6 @@ Changelog
 - |Fix| :func:`model_selection.learning_curve` now supports `partial_fit`
   with regressors. :pr:`22982` by `Thomas Fan`_.
 
-- |Fix| :class:`mixture.GaussianMixture` now normalizes `weights_` more safely,
-  preventing rounding errors when calling :meth:`GaussianMixture.sample` with
-  `n_components=1`.
-  :pr:`23034` by :user:`Meekail Zain <micky774>`.
-
-- |Fix| :func:`model_selection.cross_validate` with multimetric scoring in
-  case of some failing scorers the non-failing scorers now returning proper
-  scores instead of `error_score` values.
-  :pr:`23101` by :user:`András Simon <simonandras>` and `Thomas Fan`_.
-
 :mod:`sklearn.multiclass`
 .........................
 

From 03d33c8466a372824368b9738317dd23fdbb8c93 Mon Sep 17 00:00:00 2001
From: simonandras <simonandras97@gmail.com>
Date: Sat, 21 May 2022 14:01:44 +0200
Subject: [PATCH 21/23] Removing assertions from _validation.py and leaving
 comments instead.

---
 sklearn/model_selection/_validation.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index be2963bd8997c..35e5be05b60b8 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -767,10 +767,8 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"):
             scores = scorer(estimator, X_test, y_test)
     except Exception:
         if isinstance(scorer, _MultimetricScorer):
-            assert error_score == "raise", (
-                "If `_MultimetricScorer` raises exception, the `error_score`"
-                " parameter should be equal to 'raise'."
-            )
+            # If `_MultimetricScorer` raises exception, the `error_score`
+            # parameter is equal to "raise".
             raise
         else:
             if error_score == "raise":
@@ -790,10 +788,7 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"):
             (name, str_e) for name, str_e in scores.items() if isinstance(str_e, str)
         ]
         if exception_messages:
-            assert error_score != "raise", (
-                "`error_score` == 'raise', but the exception is not raised in"
-                " `_MultimetricScorer`."
-            )
+            # error_score != "raise"
             for name, str_e in exception_messages:
                 scores[name] = error_score
                 warnings.warn(

From 89cf53e58ab45c8978aa9476bef3ac3994a5048f Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 28 Dec 2022 14:43:44 +0100
Subject: [PATCH 22/23] DOC move entry changelog

---
 doc/whats_new/v1.1.rst | 8 --------
 doc/whats_new/v1.3.rst | 7 +++++++
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 3f663374099ae..e213f385a78c9 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -201,14 +201,6 @@ Changelog
 - |Fix| :func:`utils.class_weight.compute_sample_weight` now works with sparse `y`.
   :pr:`23115` by :user:`kernc <kernc>`.
 
-:mod:`sklearn.model_selection`
-..............................
-
-- |Fix| :func:`model_selection.cross_validate` with multimetric scoring in
-  case of some failing scorers the non-failing scorers now returning proper
-  scores instead of `error_score` values.
-  :pr:`23101` by :user:`András Simon <simonandras>` and `Thomas Fan`_.
-
 .. _changes_1_1:
 
 Version 1.1.0
diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
index eb9f0cc473e27..36d11a1e6cfa5 100644
--- a/doc/whats_new/v1.3.rst
+++ b/doc/whats_new/v1.3.rst
@@ -48,6 +48,13 @@ Changelog
   :class:`ensemble.ExtraTreesClassifier` and :class:`ensemble.ExtraTreesRegressor`.
   :pr:`25177` by :user:`Tim Head <betatim>`.
 
+:mod:`sklearn.model_selection`
+..............................
+- |Fix| :func:`model_selection.cross_validate` with multimetric scoring in
+  case of some failing scorers the non-failing scorers now returning proper
+  scores instead of `error_score` values.
+  :pr:`23101` by :user:`András Simon <simonandras>` and `Thomas Fan`_.
+
 :mod:`sklearn.pipeline`
 .......................
 - |Feature| :class:`pipeline.FeatureUnion` can now use indexing notation (e.g.

From 7be9ca14e7a9d3c36644097d108ff1aceeb81627 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 28 Dec 2022 15:02:40 +0100
Subject: [PATCH 23/23] Apply suggestions from code review

---
 doc/whats_new/v1.3.rst                      |  2 +-
 sklearn/metrics/_scorer.py                  |  2 +-
 sklearn/metrics/tests/test_score_objects.py | 12 ++++++------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
index 36d11a1e6cfa5..2039b2f0ad0da 100644
--- a/doc/whats_new/v1.3.rst
+++ b/doc/whats_new/v1.3.rst
@@ -51,7 +51,7 @@ Changelog
 :mod:`sklearn.model_selection`
 ..............................
 - |Fix| :func:`model_selection.cross_validate` with multimetric scoring in
-  case of some failing scorers the non-failing scorers now returning proper
+  case of some failing scorers the non-failing scorers now returns proper
   scores instead of `error_score` values.
   :pr:`23101` by :user:`András Simon <simonandras>` and `Thomas Fan`_.
 
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 13e80d2e743c2..a414818f497d2 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -93,7 +93,7 @@ class _MultimetricScorer:
     scorers : dict
         Dictionary mapping names to callable scorers.
 
-    raise_exc : bool
+    raise_exc : bool, default=True
         Whether to raise the exception in `__call__` or not. If set to `False`
         a formatted string of the exception details is passed as result of
         the failing scorer.
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 315c0f2e0189b..d39db7fc894c4 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -875,10 +875,11 @@ def test_multimetric_scorer_sanity_check():
 
 @pytest.mark.parametrize("raise_exc", [True, False])
 def test_multimetric_scorer_exception_handling(raise_exc):
-    # Check that the calling of the `_MultimetricScorer` returns
-    # exception messages in the result dict for the failing scorers
-    # in case of `raise_exc` is `False` and if `raise_exc` is `True`,
-    # then the proper exception is raised.
+    """Check that the calling of the `_MultimetricScorer` returns
+    exception messages in the result dict for the failing scorers
+    in case of `raise_exc` is `False` and if `raise_exc` is `True`,
+    then the proper exception is raised.
+    """
     scorers = {
         "failing_1": "neg_mean_squared_log_error",
         "non_failing": "neg_median_absolute_error",
@@ -890,8 +891,7 @@ def test_multimetric_scorer_exception_handling(raise_exc):
     )
     y *= -1  # neg_mean_squared_log_error fails if y contains negative values
 
-    clf = DecisionTreeClassifier()
-    clf.fit(X, y)
+    clf = DecisionTreeClassifier().fit(X, y)
 
     scorer_dict = _check_multimetric_scoring(clf, scorers)
     multi_scorer = _MultimetricScorer(scorers=scorer_dict, raise_exc=raise_exc)