From 1b305893318e283754de02aacbdbcc60236cce4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= <aurelien.bellet@inria.fr>
Date: Mon, 4 Sep 2017 18:36:33 +0200
Subject: [PATCH 01/10] add option to cross_validate to return the estimators
 fitted on each split

---
 sklearn/model_selection/_validation.py | 35 ++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 147d741b500b9..9e3a9a1e7f57b 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -37,7 +37,8 @@
 
 def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None,
                    n_jobs=1, verbose=0, fit_params=None,
-                   pre_dispatch='2*n_jobs', return_train_score=True):
+                   pre_dispatch='2*n_jobs', return_train_score=True,
+                   return_estimator=False):
     """Evaluate metric(s) by cross-validation and also record fit/score times.
 
     Read more in the :ref:`User Guide <multimetric_cross_validation>`.
@@ -119,6 +120,9 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None,
         Whether to include train scores in the return dict if ``scoring`` is
         of multimetric type.
 
+    return_estimator : boolean, default False
+        Whether to return the estimators fitted on each split.
+
     Returns
     -------
     scores : dict of float arrays of shape=(n_splits,)
@@ -140,6 +144,8 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None,
                 The time for scoring the estimator on the test set for each
                 cv split. (Note time for scoring on the train set is not
                 included even if ``return_train_score`` is set to ``True``
+            ``estimator``
+                The list of estimator objects for each cv split.
 
     Examples
     --------
@@ -191,20 +197,30 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None,
         delayed(_fit_and_score)(
             clone(estimator), X, y, scorers, train, test, verbose, None,
             fit_params, return_train_score=return_train_score,
-            return_times=True)
+            return_times=True, return_estimator=return_estimator)
         for train, test in cv.split(X, y, groups))
 
     if return_train_score:
-        train_scores, test_scores, fit_times, score_times = zip(*scores)
+        if return_estimator:
+            (train_scores, test_scores, fit_times, score_times,
+             fitted_est) = zip(*scores)
+        else:
+            train_scores, test_scores, fit_times, score_times = zip(*scores)
         train_scores = _aggregate_score_dicts(train_scores)
     else:
-        test_scores, fit_times, score_times = zip(*scores)
+        if return_estimator:
+            test_scores, fit_times, score_times, fitted_est = zip(*scores)
+        else:
+            test_scores, fit_times, score_times = zip(*scores)
     test_scores = _aggregate_score_dicts(test_scores)
 
     ret = dict()
     ret['fit_time'] = np.array(fit_times)
     ret['score_time'] = np.array(score_times)
 
+    if return_estimator:
+        ret['estimator'] = fitted_est
+
     for name in scorers:
         ret['test_%s' % name] = np.array(test_scores[name])
         if return_train_score:
@@ -325,7 +341,8 @@ def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv=None,
 def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
                    parameters, fit_params, return_train_score=False,
                    return_parameters=False, return_n_test_samples=False,
-                   return_times=False, error_score='raise'):
+                   return_times=False, return_estimator=False,
+                   error_score='raise'):
     """Fit estimator and compute scores for a given dataset split.
 
     Parameters
@@ -383,6 +400,9 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
     return_times : boolean, optional, default: False
         Whether to return the fit/score times.
 
+    return_estimator : boolean, optimal, default: False
+        Whether to return the fitted estimator.
+
     Returns
     -------
     train_scores : dict of scorer name -> float, optional
@@ -403,6 +423,9 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
 
     parameters : dict or None, optional
         The parameters that have been evaluated.
+
+    estimator : estimator object
+        The fitted estimator
     """
     if verbose > 1:
         if parameters is None:
@@ -489,6 +512,8 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
         ret.extend([fit_time, score_time])
     if return_parameters:
         ret.append(parameters)
+    if return_estimator:
+        ret.append(estimator)
     return ret
 
 

From 468556efa382dd7cbb9f7afbe5689a636b01141a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= <aurelien.bellet@inria.fr>
Date: Mon, 18 Sep 2017 22:30:17 +0200
Subject: [PATCH 02/10] fixed variable name and nested loop and added a test

---
 sklearn/model_selection/_validation.py        | 19 +++++++++----------
 .../model_selection/tests/test_validation.py  | 17 ++++++++++++++---
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 9e3a9a1e7f57b..ab84579774257 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -200,18 +200,17 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None,
             return_times=True, return_estimator=return_estimator)
         for train, test in cv.split(X, y, groups))
 
+    zipped_scores = list(zip(*scores))
+    unpacked = 0
     if return_train_score:
-        if return_estimator:
-            (train_scores, test_scores, fit_times, score_times,
-             fitted_est) = zip(*scores)
-        else:
-            train_scores, test_scores, fit_times, score_times = zip(*scores)
+        train_scores = zipped_scores[0]
         train_scores = _aggregate_score_dicts(train_scores)
+        unpacked += 1
+    if return_estimator:
+        (test_scores, fit_times, score_times,
+         fitted_estimators) = zipped_scores[unpacked:]
     else:
-        if return_estimator:
-            test_scores, fit_times, score_times, fitted_est = zip(*scores)
-        else:
-            test_scores, fit_times, score_times = zip(*scores)
+        test_scores, fit_times, score_times = zipped_scores[unpacked:]
     test_scores = _aggregate_score_dicts(test_scores)
 
     ret = dict()
@@ -219,7 +218,7 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None,
     ret['score_time'] = np.array(score_times)
 
     if return_estimator:
-        ret['estimator'] = fitted_est
+        ret['estimator'] = fitted_estimators
 
     for name in scorers:
         ret['test_%s' % name] = np.array(test_scores[name])
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 5f650cb644079..90e38a80655a2 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -360,20 +360,23 @@ def test_cross_validate():
         test_mse_scores = []
         train_r2_scores = []
         test_r2_scores = []
+        fitted_estimators = []
         for train, test in cv.split(X, y):
             est = clone(reg).fit(X[train], y[train])
             train_mse_scores.append(mse_scorer(est, X[train], y[train]))
             train_r2_scores.append(r2_scorer(est, X[train], y[train]))
             test_mse_scores.append(mse_scorer(est, X[test], y[test]))
             test_r2_scores.append(r2_scorer(est, X[test], y[test]))
+            fitted_estimators.append(est)
 
         train_mse_scores = np.array(train_mse_scores)
         test_mse_scores = np.array(test_mse_scores)
         train_r2_scores = np.array(train_r2_scores)
         test_r2_scores = np.array(test_r2_scores)
+        fitted_estimators = np.array(fitted_estimators)
 
         scores = (train_mse_scores, test_mse_scores, train_r2_scores,
-                  test_r2_scores)
+                  test_r2_scores, fitted_estimators)
 
         yield check_cross_validate_single_metric, est, X, y, scores
         yield check_cross_validate_multi_metric, est, X, y, scores
@@ -381,7 +384,7 @@ def test_cross_validate():
 
 def check_cross_validate_single_metric(clf, X, y, scores):
     (train_mse_scores, test_mse_scores, train_r2_scores,
-     test_r2_scores) = scores
+     test_r2_scores, fitted_estimators) = scores
     # Test single metric evaluation when scoring is string or singleton list
     for (return_train_score, dict_len) in ((True, 4), (False, 3)):
         # Single metric passed as a string
@@ -413,11 +416,19 @@ def check_cross_validate_single_metric(clf, X, y, scores):
         assert_equal(len(r2_scores_dict), dict_len)
         assert_array_almost_equal(r2_scores_dict['test_r2'], test_r2_scores)
 
+    # Test return_estimator option
+    mse_scores_dict = cross_validate(clf, X, y, cv=5,
+                                     scoring='neg_mean_squared_error',
+                                     return_estimator=True)
+    for k, est in enumerate(mse_scores_dict['estimator']):
+        assert_almost_equal(est.coef_, fitted_estimators[k].coef_)
+        assert_almost_equal(est.intercept_, fitted_estimators[k].intercept_)
+
 
 def check_cross_validate_multi_metric(clf, X, y, scores):
     # Test multimetric evaluation when scoring is a list / dict
     (train_mse_scores, test_mse_scores, train_r2_scores,
-     test_r2_scores) = scores
+     test_r2_scores, fitted_estimators) = scores
     all_scoring = (('r2', 'neg_mean_squared_error'),
                    {'r2': make_scorer(r2_score),
                     'neg_mean_squared_error': 'neg_mean_squared_error'})

From e690acd6ee1f31bc39d3bd3d6078de5555765869 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= <aurelien.bellet@inria.fr>
Date: Tue, 17 Oct 2017 09:43:26 +0200
Subject: [PATCH 03/10] fixes suggested by jnothman

---
 sklearn/model_selection/_validation.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index ab84579774257..880f890e40616 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -145,7 +145,7 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None,
                 cv split. (Note time for scoring on the train set is not
                 included even if ``return_train_score`` is set to ``True``
             ``estimator``
-                The list of estimator objects for each cv split.
+                The estimator objects for each cv split.
 
     Examples
     --------
@@ -201,16 +201,14 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None,
         for train, test in cv.split(X, y, groups))
 
     zipped_scores = list(zip(*scores))
-    unpacked = 0
     if return_train_score:
-        train_scores = zipped_scores[0]
+        train_scores = zipped_scores.pop(0)
         train_scores = _aggregate_score_dicts(train_scores)
-        unpacked += 1
     if return_estimator:
         (test_scores, fit_times, score_times,
-         fitted_estimators) = zipped_scores[unpacked:]
+         fitted_estimators) = zipped_scores
     else:
-        test_scores, fit_times, score_times = zipped_scores[unpacked:]
+        test_scores, fit_times, score_times = zipped_scores
     test_scores = _aggregate_score_dicts(test_scores)
 
     ret = dict()
@@ -399,7 +397,7 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
     return_times : boolean, optional, default: False
         Whether to return the fit/score times.
 
-    return_estimator : boolean, optimal, default: False
+    return_estimator : boolean, optional, default: False
         Whether to return the fitted estimator.
 
     Returns

From 4e65976113db25c5bd992008ad9949c58b64f222 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= <aurelien.bellet@inria.fr>
Date: Tue, 17 Oct 2017 09:53:28 +0200
Subject: [PATCH 04/10] mention return_estimator option in doc

---
 doc/modules/cross_validation.rst | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index b47726979351f..f21e747156bb0 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -182,7 +182,7 @@ The ``cross_validate`` function differs from ``cross_val_score`` in two ways -
 
 - It allows specifying multiple metrics for evaluation.
 
-- It returns a dict containing training scores, fit-times and score-times in
+- It returns a dict containing training scores, fit-times, score-times and fitted estimators in
   addition to the test score.
 
 For single metric evaluation, where the scoring parameter is a string,
@@ -196,6 +196,8 @@ following keys -
 for all the scorers. If train scores are not needed, this should be set to
 ``False`` explicitly.
 
+``return_estimator`` is set to ``False`` by default. When set to ``True``, it adds an ``estimator`` key containing the estimators fitted on each split.
+
 The multiple metrics can be specified either as a list, tuple or set of
 predefined scorer names::
 
@@ -227,8 +229,9 @@ Here is an example of ``cross_validate`` using a single metric::
 
     >>> scores = cross_validate(clf, iris.data, iris.target,
     ...                         scoring='precision_macro')
+    ...                         return_estimator=True)
     >>> sorted(scores.keys())
-    ['fit_time', 'score_time', 'test_score', 'train_score']
+    ['fit_time', 'score_time', 'estimator', test_score', 'train_score']
 
 
 Obtaining predictions by cross-validation

From b60cd8905d245444ebc33a0cec4f74efb80675fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= <aurelien.bellet@inria.fr>
Date: Tue, 17 Oct 2017 10:24:01 +0200
Subject: [PATCH 05/10] fixes as suggested

---
 doc/modules/cross_validation.rst       | 3 ++-
 sklearn/model_selection/_validation.py | 6 ++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index f21e747156bb0..8ae10cfa69985 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -196,7 +196,8 @@ following keys -
 for all the scorers. If train scores are not needed, this should be set to
 ``False`` explicitly.
 
-``return_estimator`` is set to ``False`` by default. When set to ``True``, it adds an ``estimator`` key containing the estimators fitted on each split.
+You may also retain the estimator fitted on each training set by setting
+``return_estimator=True``.
 
 The multiple metrics can be specified either as a list, tuple or set of
 predefined scorer names::
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 880f890e40616..4e8c7c43e6857 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -205,10 +205,8 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None,
         train_scores = zipped_scores.pop(0)
         train_scores = _aggregate_score_dicts(train_scores)
     if return_estimator:
-        (test_scores, fit_times, score_times,
-         fitted_estimators) = zipped_scores
-    else:
-        test_scores, fit_times, score_times = zipped_scores
+        fitted_estimators = zipped_scores.pop()
+    test_scores, fit_times, score_times = zipped_scores
     test_scores = _aggregate_score_dicts(test_scores)
 
     ret = dict()

From 87b52f005b68143bc00c78a04d6bd593b9412a8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= <aurelien.bellet@inria.fr>
Date: Thu, 26 Oct 2017 13:26:41 +0200
Subject: [PATCH 06/10] fixes suggested by amueller

---
 doc/modules/cross_validation.rst       | 3 ++-
 sklearn/model_selection/_validation.py | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index 0d4ce4bea6cc8..541edd1dcf494 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -182,7 +182,8 @@ The ``cross_validate`` function differs from ``cross_val_score`` in two ways -
 
 - It allows specifying multiple metrics for evaluation.
 
-- It returns a dict containing training scores, fit-times, score-times and fitted estimators in
+- It returns a dict containing fit-times, score-times
+  (and optionally training scores as well as fitted estimators) in
   addition to the test score.
 
 For single metric evaluation, where the scoring parameter is a string,
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index cc751c157fe86..0cca1fb02b7ff 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -155,6 +155,8 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None,
                 included even if ``return_train_score`` is set to ``True``
             ``estimator``
                 The estimator objects for each cv split.
+                This is available only if ``return_estimator`` parameter
+                is set to ``True``.
 
     Examples
     --------

From cc4165f301a897c743ef1e145b339b1bbd1e56bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Bellet?= <aurelien.bellet@inria.fr>
Date: Thu, 26 Oct 2017 14:20:00 +0200
Subject: [PATCH 07/10] comma fix

---
 doc/modules/cross_validation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index 541edd1dcf494..1216cd2716ee0 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -230,7 +230,7 @@ Or as a dict mapping scorer name to a predefined or custom scoring function::
 Here is an example of ``cross_validate`` using a single metric::
 
     >>> scores = cross_validate(clf, iris.data, iris.target,
-    ...                         scoring='precision_macro')
+    ...                         scoring='precision_macro',
     ...                         return_estimator=True)
     >>> sorted(scores.keys())
     ['fit_time', 'score_time', 'estimator', test_score', 'train_score']

From e6e54d739a80b9bdf66c934d03256a3b5e6f1c71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien?= <aurelien.bellet@inria.fr>
Date: Fri, 15 Dec 2017 16:06:24 -0800
Subject: [PATCH 08/10] right order of keys in doc

---
 doc/modules/cross_validation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index 1216cd2716ee0..84935478951b2 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -233,7 +233,7 @@ Here is an example of ``cross_validate`` using a single metric::
     ...                         scoring='precision_macro',
     ...                         return_estimator=True)
     >>> sorted(scores.keys())
-    ['fit_time', 'score_time', 'estimator', test_score', 'train_score']
+    ['estimator', 'fit_time', 'score_time', test_score', 'train_score']
 
 
 Obtaining predictions by cross-validation

From f166c539b325df09f1d494b0f4e377526a616ffb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien?= <aurelien.bellet@inria.fr>
Date: Fri, 15 Dec 2017 16:09:24 -0800
Subject: [PATCH 09/10] missing apostrophe

---
 doc/modules/cross_validation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index 84935478951b2..570b7022bddd6 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -233,7 +233,7 @@ Here is an example of ``cross_validate`` using a single metric::
     ...                         scoring='precision_macro',
     ...                         return_estimator=True)
     >>> sorted(scores.keys())
-    ['estimator', 'fit_time', 'score_time', test_score', 'train_score']
+    ['estimator', 'fit_time', 'score_time', 'test_score', 'train_score']
 
 
 Obtaining predictions by cross-validation

From e4374a0c51424b4fe3c0064b0e82904903f3c87d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien?= <aurelien.bellet@inria.fr>
Date: Tue, 27 Feb 2018 21:21:41 +0000
Subject: [PATCH 10/10] update whats new file

---
 doc/whats_new/v0.20.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 193204002664a..621fe0d99ea89 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -163,6 +163,10 @@ Model evaluation and meta-estimators
   group-based CV strategies. :issue:`9085` by :user:`Laurent Direr <ldirer>`
   and `Andreas Müller`_.
 
+- Add `return_estimator` parameter in :func:`model_selection.cross_validate` to
+  return estimators fitted on each split. :issue:`9686` by :user:`Aurélien Bellet
+  <bellet>`.
+
 Metrics
 
 - :func:`metrics.roc_auc_score` now supports binary ``y_true`` other than