From 9a2919b44e31ce9211ed7017e768db6e424691fa Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Mon, 1 Oct 2018 17:44:27 -0400
Subject: [PATCH 1/5] return_train_score deprecation

---
 sklearn/model_selection/_search.py            | 63 +++----------
 sklearn/model_selection/_validation.py        | 14 +--
 sklearn/model_selection/tests/test_search.py  | 89 -------------------
 .../model_selection/tests/test_validation.py  | 22 -----
 4 files changed, 13 insertions(+), 175 deletions(-)

diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 5c1b89bbb6d00..1e3c7905e89a5 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -635,18 +635,6 @@ def fit(self, X, y=None, groups=None, **fit_params):
         **fit_params : dict of string -> object
             Parameters passed to the ``fit`` method of the estimator
         """
-
-        if self.fit_params is not None:
-            warnings.warn('"fit_params" as a constructor argument was '
-                          'deprecated in version 0.19 and will be removed '
-                          'in version 0.21. Pass fit parameters to the '
-                          '"fit" method instead.', DeprecationWarning)
-            if fit_params:
-                warnings.warn('Ignoring fit_params passed as a constructor '
-                              'argument in favor of keyword arguments to '
-                              'the "fit" method.', RuntimeWarning)
-            else:
-                fit_params = self.fit_params
         estimator = self.estimator
         cv = check_cv(self.cv, y, classifier=is_classifier(estimator))
 
@@ -768,9 +756,7 @@ def _format_results(self, candidate_params, scorers, n_splits, out):
         if self.return_train_score:
             train_scores = _aggregate_score_dicts(train_score_dicts)
 
-        # TODO: replace by a dict in 0.21
-        results = (DeprecationDict() if self.return_train_score == 'warn'
-                   else {})
+        results = {}
 
         def _store(key_name, array, weights=None, splits=False, rank=False):
             """A small helper to store the scores/times to the cv_results_"""
@@ -850,15 +836,6 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
                 prev_keys = set(results.keys())
                 _store('train_%s' % scorer_name, train_scores[scorer_name],
                        splits=True)
-                if self.return_train_score == 'warn':
-                    for key in set(results.keys()) - prev_keys:
-                        message = (
-                            'You are accessing a training score ({!r}), '
-                            'which will not be available by default '
-                            'any more in 0.21. If you need training scores, '
-                            'please set return_train_score=True').format(key)
-                        # warn on key access
-                        results.add_warning(key, message, FutureWarning)
 
         return results
 
@@ -907,14 +884,6 @@ class GridSearchCV(BaseSearchCV):
 
         If None, the estimator's default scorer (if available) is used.
 
-    fit_params : dict, optional
-        Parameters to pass to the fit method.
-
-        .. deprecated:: 0.19
-           ``fit_params`` as a constructor argument was deprecated in version
-           0.19 and will be removed in version 0.21. Pass fit parameters to
-           the ``fit`` method instead.
-
     n_jobs : int or None, optional (default=None)
         Number of jobs to run in parallel.
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
@@ -944,7 +913,7 @@ class GridSearchCV(BaseSearchCV):
         identically distributed across the folds, and the loss minimized is
         the total loss per sample, and not the mean loss across the folds. If
         False, return the average score across folds. Default is True, but
-        will change to False in version 0.21, to correspond to the standard
+        will change to False in version 0.22, to correspond to the standard
         definition of cross-validation.
 
         .. versionchanged:: 0.20
@@ -1001,13 +970,12 @@ class GridSearchCV(BaseSearchCV):
         step, which will always raise the error. Default is 'raise' but from
         version 0.22 it will change to np.nan.
 
-    return_train_score : boolean, optional
+    return_train_score : boolean, default=False
         If ``False``, the ``cv_results_`` attribute will not include training
         scores.
 
         Current default is ``'warn'``, which behaves as ``True`` in addition
         to raising a warning when a training score is looked up.
-        That default will be changed to ``False`` in 0.21.
         Computing training scores is used to get insights on how different
         parameter settings impact the overfitting/underfitting trade-off.
         However computing the scores on the training set can be computationally
@@ -1174,12 +1142,12 @@ class GridSearchCV(BaseSearchCV):
 
     """
 
-    def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
+    def __init__(self, estimator, param_grid, scoring=None,
                  n_jobs=None, iid='warn', refit=True, cv='warn', verbose=0,
                  pre_dispatch='2*n_jobs', error_score='raise-deprecating',
-                 return_train_score="warn"):
+                 return_train_score=False):
         super(GridSearchCV, self).__init__(
-            estimator=estimator, scoring=scoring, fit_params=fit_params,
+            estimator=estimator, scoring=scoring,
             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch, error_score=error_score,
             return_train_score=return_train_score)
@@ -1254,14 +1222,6 @@ class RandomizedSearchCV(BaseSearchCV):
 
         If None, the estimator's default scorer (if available) is used.
 
-    fit_params : dict, optional
-        Parameters to pass to the fit method.
-
-        .. deprecated:: 0.19
-           ``fit_params`` as a constructor argument was deprecated in version
-           0.19 and will be removed in version 0.21. Pass fit parameters to
-           the ``fit`` method instead.
-
     n_jobs : int or None, optional (default=None)
         Number of jobs to run in parallel.
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
@@ -1291,7 +1251,7 @@ class RandomizedSearchCV(BaseSearchCV):
         identically distributed across the folds, and the loss minimized is
         the total loss per sample, and not the mean loss across the folds. If
         False, return the average score across folds. Default is True, but
-        will change to False in version 0.21, to correspond to the standard
+        will change to False in version 0.22, to correspond to the standard
         definition of cross-validation.
 
         .. versionchanged:: 0.20
@@ -1356,13 +1316,12 @@ class RandomizedSearchCV(BaseSearchCV):
         step, which will always raise the error. Default is 'raise' but from
         version 0.22 it will change to np.nan.
 
-    return_train_score : boolean, optional
+    return_train_score : boolean, default=False
         If ``False``, the ``cv_results_`` attribute will not include training
         scores.
 
         Current default is ``'warn'``, which behaves as ``True`` in addition
         to raising a warning when a training score is looked up.
-        That default will be changed to ``False`` in 0.21.
         Computing training scores is used to get insights on how different
         parameter settings impact the overfitting/underfitting trade-off.
         However computing the scores on the training set can be computationally
@@ -1495,15 +1454,15 @@ class RandomizedSearchCV(BaseSearchCV):
     """
 
     def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
-                 fit_params=None, n_jobs=None, iid='warn', refit=True,
+                 n_jobs=None, iid='warn', refit=True,
                  cv='warn', verbose=0, pre_dispatch='2*n_jobs',
                  random_state=None, error_score='raise-deprecating',
-                 return_train_score="warn"):
+                 return_train_score=False):
         self.param_distributions = param_distributions
         self.n_iter = n_iter
         self.random_state = random_state
         super(RandomizedSearchCV, self).__init__(
-            estimator=estimator, scoring=scoring, fit_params=fit_params,
+            estimator=estimator, scoring=scoring,
             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch, error_score=error_score,
             return_train_score=return_train_score)
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 4ddfc5edac6ad..5f66da9de913e 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -126,12 +126,11 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
             - A string, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
 
-    return_train_score : boolean, optional
+    return_train_score : boolean, default=False
         Whether to include train scores.
 
         Current default is ``'warn'``, which behaves as ``True`` in addition
         to raising a warning when a training score is looked up.
-        That default will be changed to ``False`` in 0.21.
         Computing training scores is used to get insights on how different
         parameter settings impact the overfitting/underfitting trade-off.
         However computing the scores on the training set can be computationally
@@ -248,8 +247,7 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
     test_scores, fit_times, score_times = zipped_scores
     test_scores = _aggregate_score_dicts(test_scores)
 
-    # TODO: replace by a dict in 0.21
-    ret = DeprecationDict() if return_train_score == 'warn' else {}
+    ret = {}
     ret['fit_time'] = np.array(fit_times)
     ret['score_time'] = np.array(score_times)
 
@@ -261,14 +259,6 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
         if return_train_score:
             key = 'train_%s' % name
             ret[key] = np.array(train_scores[name])
-            if return_train_score == 'warn':
-                message = (
-                    'You are accessing a training score ({!r}), '
-                    'which will not be available by default '
-                    'any more in 0.21. If you need training scores, '
-                    'please set return_train_score=True').format(key)
-                # warn on key access
-                ret.add_warning(key, message, FutureWarning)
 
     return ret
 
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index ac9a478c234ec..05ffc59881730 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -237,57 +237,6 @@ def test_random_search_with_fit_params():
                                                   error_score='raise')
 
 
-@pytest.mark.filterwarnings('ignore: The default of the `iid`')  # 0.22
-def test_grid_search_fit_params_deprecation():
-    # NOTE: Remove this test in v0.21
-
-    # Use of `fit_params` in the class constructor is deprecated,
-    # but will still work until v0.21.
-    X = np.arange(100).reshape(10, 10)
-    y = np.array([0] * 5 + [1] * 5)
-    clf = CheckingClassifier(expected_fit_params=['spam'])
-    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]},
-                               fit_params={'spam': np.ones(10)})
-    assert_warns(DeprecationWarning, grid_search.fit, X, y)
-
-
-@pytest.mark.filterwarnings('ignore: The default of the `iid`')  # 0.22
-@pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
-def test_grid_search_fit_params_two_places():
-    # NOTE: Remove this test in v0.21
-
-    # If users try to input fit parameters in both
-    # the constructor (deprecated use) and the `fit`
-    # method, we'll ignore the values passed to the constructor.
-    X = np.arange(100).reshape(10, 10)
-    y = np.array([0] * 5 + [1] * 5)
-    clf = CheckingClassifier(expected_fit_params=['spam'])
-
-    # The "spam" array is too short and will raise an
-    # error in the CheckingClassifier if used.
-    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]},
-                               fit_params={'spam': np.ones(1)})
-
-    expected_warning = ('Ignoring fit_params passed as a constructor '
-                        'argument in favor of keyword arguments to '
-                        'the "fit" method.')
-    assert_warns_message(RuntimeWarning, expected_warning,
-                         grid_search.fit, X, y, spam=np.ones(10))
-
-    # Verify that `fit` prefers its own kwargs by giving valid
-    # kwargs in the constructor and invalid in the method call
-    with warnings.catch_warnings():
-        # JvR: As passing fit params to the constructor is deprecated, this
-        # unit test raises a warning (unit test can be removed after version
-        # 0.22)
-        warnings.filterwarnings("ignore", category=DeprecationWarning)
-        grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]},
-                                   fit_params={'spam': np.ones(10)},
-                                   error_score='raise')
-        assert_raise_message(AssertionError, "Fit parameter spam has length 1",
-                             grid_search.fit, X, y, spam=np.ones(1))
-
-
 @ignore_warnings
 def test_grid_search_no_score():
     # Test grid-search on classifier that has no score function.
@@ -374,44 +323,6 @@ def test_grid_search_groups():
         gs.fit(X, y)
 
 
-def test_return_train_score_warn():
-    # Test that warnings are raised. Will be removed in 0.21
-
-    X = np.arange(100).reshape(10, 10)
-    y = np.array([0] * 5 + [1] * 5)
-    grid = {'C': [1, 2]}
-
-    estimators = [GridSearchCV(LinearSVC(random_state=0), grid,
-                               iid=False, cv=3),
-                  RandomizedSearchCV(LinearSVC(random_state=0), grid,
-                                     n_iter=2, iid=False, cv=3)]
-
-    result = {}
-    for estimator in estimators:
-        for val in [True, False, 'warn']:
-            estimator.set_params(return_train_score=val)
-            fit_func = ignore_warnings(estimator.fit,
-                                       category=ConvergenceWarning)
-            result[val] = assert_no_warnings(fit_func, X, y).cv_results_
-
-    train_keys = ['split0_train_score', 'split1_train_score',
-                  'split2_train_score', 'mean_train_score', 'std_train_score']
-    for key in train_keys:
-        msg = (
-            'You are accessing a training score ({!r}), '
-            'which will not be available by default '
-            'any more in 0.21. If you need training scores, '
-            'please set return_train_score=True').format(key)
-        train_score = assert_warns_message(FutureWarning, msg,
-                                           result['warn'].get, key)
-        assert np.allclose(train_score, result[True][key])
-        assert key not in result[False]
-
-    for key in result['warn']:
-        if key not in train_keys:
-            assert_no_warnings(result['warn'].get, key)
-
-
 @pytest.mark.filterwarnings('ignore: The default of the `iid`')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_classes__property():
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 986d701cee651..b88abfdcef2dc 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -405,28 +405,6 @@ def test_cross_validate():
         check_cross_validate_multi_metric(est, X, y, scores)
 
 
-def test_cross_validate_return_train_score_warn():
-    # Test that warnings are raised. Will be removed in 0.21
-
-    X, y = make_classification(random_state=0)
-    estimator = MockClassifier()
-
-    result = {}
-    for val in [False, True, 'warn']:
-        result[val] = assert_no_warnings(cross_validate, estimator, X, y,
-                                         return_train_score=val, cv=5)
-
-    msg = (
-        'You are accessing a training score ({!r}), '
-        'which will not be available by default '
-        'any more in 0.21. If you need training scores, '
-        'please set return_train_score=True').format('train_score')
-    train_score = assert_warns_message(FutureWarning, msg,
-                                       result['warn'].get, 'train_score')
-    assert np.allclose(train_score, result[True]['train_score'])
-    assert 'train_score' not in result[False]
-
-
 def check_cross_validate_single_metric(clf, X, y, scores):
     (train_mse_scores, test_mse_scores, train_r2_scores,
      test_r2_scores, fitted_estimators) = scores

From ca6fd96895086d5d8655ec69ca6bb1407fe2e535 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Mon, 1 Oct 2018 18:05:08 -0400
Subject: [PATCH 2/5] fix some doctests

---
 sklearn/model_selection/_search.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 1e3c7905e89a5..f5c426c76f599 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -999,17 +999,16 @@ class GridSearchCV(BaseSearchCV):
                          kernel='rbf', max_iter=-1, probability=False,
                          random_state=None, shrinking=True, tol=...,
                          verbose=False),
-           fit_params=None, iid=..., n_jobs=None,
+           iid=..., n_jobs=None,
            param_grid=..., pre_dispatch=..., refit=..., return_train_score=...,
            scoring=..., verbose=...)
     >>> sorted(clf.cv_results_.keys())
     ...                             # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
     ['mean_fit_time', 'mean_score_time', 'mean_test_score',...
-     'mean_train_score', 'param_C', 'param_kernel', 'params',...
+     'param_C', 'param_kernel', 'params',...
      'rank_test_score', 'split0_test_score',...
-     'split0_train_score', 'split1_test_score', 'split1_train_score',...
-     'split2_test_score', 'split2_train_score',...
-     'std_fit_time', 'std_score_time', 'std_test_score', 'std_train_score'...]
+     'split2_test_score', ...
+     'std_fit_time', 'std_score_time', 'std_test_score']
 
     Attributes
     ----------

From 1d9b6bb634f27ab0893d5f2d8b1814acd23925a9 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 2 Oct 2018 12:32:23 -0400
Subject: [PATCH 3/5] finish return_train_score deprecation

---
 doc/modules/cross_validation.rst              |  8 ++--
 examples/compose/plot_digits_pipe.py          |  3 +-
 sklearn/model_selection/_search.py            |  8 ----
 sklearn/model_selection/_validation.py        |  9 +---
 sklearn/model_selection/tests/test_search.py  |  5 +-
 .../model_selection/tests/test_validation.py  |  2 -
 sklearn/utils/deprecation.py                  | 47 +------------------
 sklearn/utils/tests/test_deprecation.py       | 16 -------
 8 files changed, 9 insertions(+), 89 deletions(-)

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index 2d05e4b81c69d..a33777014404a 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -191,9 +191,9 @@ And for multiple metric evaluation, the return value is a dict with the
 following keys -
 ``['test_<scorer1_name>', 'test_<scorer2_name>', 'test_<scorer...>', 'fit_time', 'score_time']``
 
-``return_train_score`` is set to ``True`` by default. It adds train score keys
-for all the scorers. If train scores are not needed, this should be set to
-``False`` explicitly.
+``return_train_score`` is set to ``False`` by default to save computation time.
+To evaluate the scores on the training set as well you need to be set to
+``True``.
 
 You may also retain the estimator fitted on each training set by setting
 ``return_estimator=True``.
@@ -206,7 +206,7 @@ predefined scorer names::
     >>> scoring = ['precision_macro', 'recall_macro']
     >>> clf = svm.SVC(kernel='linear', C=1, random_state=0)
     >>> scores = cross_validate(clf, iris.data, iris.target, scoring=scoring,
-    ...                         cv=5, return_train_score=False)
+    ...                         cv=5)
     >>> sorted(scores.keys())
     ['fit_time', 'score_time', 'test_precision_macro', 'test_recall_macro']
     >>> scores['test_recall_macro']                       # doctest: +ELLIPSIS
diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py
index 6e722c9861529..c5b0fb2a13609 100644
--- a/examples/compose/plot_digits_pipe.py
+++ b/examples/compose/plot_digits_pipe.py
@@ -47,8 +47,7 @@
     'pca__n_components': [5, 20, 30, 40, 50, 64],
     'logistic__alpha': np.logspace(-4, 4, 5),
 }
-search = GridSearchCV(pipe, param_grid, iid=False, cv=5,
-                      return_train_score=False)
+search = GridSearchCV(pipe, param_grid, iid=False, cv=5)
 search.fit(X_digits, y_digits)
 print("Best parameter (CV score=%0.3f):" % search.best_score_)
 print(search.best_params_)
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index f5c426c76f599..46a92436f9bd6 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -39,7 +39,6 @@
 from ..utils.random import sample_without_replacement
 from ..utils.validation import indexable, check_is_fitted
 from ..utils.metaestimators import if_delegate_has_method
-from ..utils.deprecation import DeprecationDict
 from ..metrics.scorer import _check_multimetric_scoring
 from ..metrics.scorer import check_scoring
 
@@ -833,7 +832,6 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
                    splits=True, rank=True,
                    weights=test_sample_counts if iid else None)
             if self.return_train_score:
-                prev_keys = set(results.keys())
                 _store('train_%s' % scorer_name, train_scores[scorer_name],
                        splits=True)
 
@@ -973,9 +971,6 @@ class GridSearchCV(BaseSearchCV):
     return_train_score : boolean, default=False
         If ``False``, the ``cv_results_`` attribute will not include training
         scores.
-
-        Current default is ``'warn'``, which behaves as ``True`` in addition
-        to raising a warning when a training score is looked up.
         Computing training scores is used to get insights on how different
         parameter settings impact the overfitting/underfitting trade-off.
         However computing the scores on the training set can be computationally
@@ -1318,9 +1313,6 @@ class RandomizedSearchCV(BaseSearchCV):
     return_train_score : boolean, default=False
         If ``False``, the ``cv_results_`` attribute will not include training
         scores.
-
-        Current default is ``'warn'``, which behaves as ``True`` in addition
-        to raising a warning when a training score is looked up.
         Computing training scores is used to get insights on how different
         parameter settings impact the overfitting/underfitting trade-off.
         However computing the scores on the training set can be computationally
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 5f66da9de913e..c1773ea0d6a0c 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -22,7 +22,6 @@
 
 from ..base import is_classifier, clone
 from ..utils import indexable, check_random_state, safe_indexing
-from ..utils.deprecation import DeprecationDict
 from ..utils.validation import _is_arraylike, _num_samples
 from ..utils.metaestimators import _safe_split
 from ..utils import Parallel, delayed
@@ -40,7 +39,7 @@
 
 def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
                    n_jobs=None, verbose=0, fit_params=None,
-                   pre_dispatch='2*n_jobs', return_train_score="warn",
+                   pre_dispatch='2*n_jobs', return_train_score=False,
                    return_estimator=False, error_score='raise-deprecating'):
     """Evaluate metric(s) by cross-validation and also record fit/score times.
 
@@ -128,9 +127,6 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
 
     return_train_score : boolean, default=False
         Whether to include train scores.
-
-        Current default is ``'warn'``, which behaves as ``True`` in addition
-        to raising a warning when a training score is looked up.
         Computing training scores is used to get insights on how different
         parameter settings impact the overfitting/underfitting trade-off.
         However computing the scores on the training set can be computationally
@@ -190,8 +186,7 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
 
     Single metric evaluation using ``cross_validate``
 
-    >>> cv_results = cross_validate(lasso, X, y, cv=3,
-    ...                             return_train_score=False)
+    >>> cv_results = cross_validate(lasso, X, y, cv=3)
     >>> sorted(cv_results.keys())                         # doctest: +ELLIPSIS
     ['fit_time', 'score_time', 'test_score']
     >>> cv_results['test_score']    # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 05ffc59881730..27fd330e35586 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -7,7 +7,6 @@
 import sys
 from types import GeneratorType
 import re
-import warnings
 
 import numpy as np
 import scipy.sparse as sp
@@ -37,7 +36,6 @@
 from sklearn.base import BaseEstimator
 from sklearn.base import clone
 from sklearn.exceptions import NotFittedError
-from sklearn.exceptions import ConvergenceWarning
 from sklearn.datasets import make_classification
 from sklearn.datasets import make_blobs
 from sklearn.datasets import make_multilabel_classification
@@ -1441,8 +1439,7 @@ def test_search_train_scores_set_to_false():
     y = [0, 0, 0, 1, 1, 1]
     clf = LinearSVC(random_state=0)
 
-    gs = GridSearchCV(clf, param_grid={'C': [0.1, 0.2]},
-                      return_train_score=False)
+    gs = GridSearchCV(clf, param_grid={'C': [0.1, 0.2]})
     gs.fit(X, y)
 
 
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index b88abfdcef2dc..77a86218cb960 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -22,7 +22,6 @@
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import assert_no_warnings
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_less
@@ -412,7 +411,6 @@ def check_cross_validate_single_metric(clf, X, y, scores):
     for (return_train_score, dict_len) in ((True, 4), (False, 3)):
         # Single metric passed as a string
         if return_train_score:
-            # It must be True by default - deprecated
             mse_scores_dict = cross_validate(clf, X, y, cv=5,
                                              scoring='neg_mean_squared_error',
                                              return_train_score=True)
diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index b84e0bd9b4fa9..fbdb2524aeb7a 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -2,7 +2,7 @@
 import warnings
 import functools
 
-__all__ = ["deprecated", "DeprecationDict"]
+__all__ = ["deprecated"]
 
 
 class deprecated(object):
@@ -105,48 +105,3 @@ def _is_deprecated(func):
                                               for c in closures
                      if isinstance(c.cell_contents, str)]))
     return is_deprecated
-
-
-class DeprecationDict(dict):
-    """A dict which raises a warning when some keys are looked up
-
-    Note, this does not raise a warning for __contains__ and iteration.
-
-    It also will raise a warning even after the key has been manually set by
-    the user.
-    """
-    def __init__(self, *args, **kwargs):
-        self._deprecations = {}
-        super(DeprecationDict, self).__init__(*args, **kwargs)
-
-    def __getitem__(self, key):
-        if key in self._deprecations:
-            warn_args, warn_kwargs = self._deprecations[key]
-            warnings.warn(*warn_args, **warn_kwargs)
-        return super(DeprecationDict, self).__getitem__(key)
-
-    def get(self, key, default=None):
-        """Return the value corresponding to key, else default.
-
-        Parameters
-        ----------
-        key : any hashable object
-            The key
-        default : object, optional
-            The default returned when key is not in dict
-        """
-        # dict does not implement it like this, hence it needs to be overridden
-        try:
-            return self[key]
-        except KeyError:
-            return default
-
-    def add_warning(self, key, *args, **kwargs):
-        """Add a warning to be triggered when the specified key is read
-
-        Parameters
-        ----------
-        key : any hashable object
-            The key
-        """
-        self._deprecations[key] = (args, kwargs)
diff --git a/sklearn/utils/tests/test_deprecation.py b/sklearn/utils/tests/test_deprecation.py
index d7b3f48c183c1..e5a1f021cda7e 100644
--- a/sklearn/utils/tests/test_deprecation.py
+++ b/sklearn/utils/tests/test_deprecation.py
@@ -8,9 +8,7 @@
 from sklearn.utils.deprecation import _is_deprecated
 from sklearn.utils.deprecation import deprecated
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import assert_no_warnings
 from sklearn.utils.testing import SkipTest
-from sklearn.utils.deprecation import DeprecationDict
 
 
 @deprecated('qwerty')
@@ -62,17 +60,3 @@ def test_is_deprecated():
 
 def test_pickle():
     pickle.loads(pickle.dumps(mock_function))
-
-
-def test_deprecationdict():
-    dd = DeprecationDict()
-    dd.add_warning('a', 'hello')
-    dd.add_warning('b', 'world', DeprecationWarning)
-    assert 1 == assert_warns_message(UserWarning, 'hello', dd.get, 'a', 1)
-    dd['a'] = 5
-    dd['b'] = 6
-    dd['c'] = 7
-    assert 5 == assert_warns_message(UserWarning, 'hello', dd.__getitem__, 'a')
-    assert 6 == assert_warns_message(DeprecationWarning, 'world',
-                                     dd.__getitem__, 'b')
-    assert 7 == assert_no_warnings(dd.get, 'c')

From 2042ded91c07c8c9f023cb21f86e91c0d81c8fd8 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 2 Oct 2018 13:43:18 -0400
Subject: [PATCH 4/5] remove train_score from doctest

---
 doc/modules/cross_validation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index a33777014404a..17d9ea680a2cd 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -231,7 +231,7 @@ Here is an example of ``cross_validate`` using a single metric::
     ...                         scoring='precision_macro', cv=5,
     ...                         return_estimator=True)
     >>> sorted(scores.keys())
-    ['estimator', 'fit_time', 'score_time', 'test_score', 'train_score']
+    ['estimator', 'fit_time', 'score_time', 'test_score']
 
 
 Obtaining predictions by cross-validation

From 9954544a55878e9bdb755008d0d009eb9095f16b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 2 Oct 2018 16:05:05 -0400
Subject: [PATCH 5/5] remove redundant return_train_score=False in
 cross_val_score

---
 sklearn/model_selection/_validation.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index c1773ea0d6a0c..c60ce54e8fe1b 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -380,7 +380,6 @@ def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv='warn',
 
     cv_results = cross_validate(estimator=estimator, X=X, y=y, groups=groups,
                                 scoring={'score': scorer}, cv=cv,
-                                return_train_score=False,
                                 n_jobs=n_jobs, verbose=verbose,
                                 fit_params=fit_params,
                                 pre_dispatch=pre_dispatch,