Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[MRG + 1] return_train_score deprecation #12241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions doc/modules/cross_validation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,9 @@ And for multiple metric evaluation, the return value is a dict with the
following keys -
``['test_<scorer1_name>', 'test_<scorer2_name>', 'test_<scorer...>', 'fit_time', 'score_time']``

``return_train_score`` is set to ``True`` by default. It adds train score keys
for all the scorers. If train scores are not needed, this should be set to
``False`` explicitly.
``return_train_score`` is set to ``False`` by default to save computation time.
To evaluate the scores on the training set as well you need to be set to
``True``.

You may also retain the estimator fitted on each training set by setting
``return_estimator=True``.
Expand All @@ -206,7 +206,7 @@ predefined scorer names::
>>> scoring = ['precision_macro', 'recall_macro']
>>> clf = svm.SVC(kernel='linear', C=1, random_state=0)
>>> scores = cross_validate(clf, iris.data, iris.target, scoring=scoring,
... cv=5, return_train_score=False)
... cv=5)
>>> sorted(scores.keys())
['fit_time', 'score_time', 'test_precision_macro', 'test_recall_macro']
>>> scores['test_recall_macro'] # doctest: +ELLIPSIS
Expand All @@ -231,7 +231,7 @@ Here is an example of ``cross_validate`` using a single metric::
... scoring='precision_macro', cv=5,
... return_estimator=True)
>>> sorted(scores.keys())
['estimator', 'fit_time', 'score_time', 'test_score', 'train_score']
['estimator', 'fit_time', 'score_time', 'test_score']


Obtaining predictions by cross-validation
Expand Down
3 changes: 1 addition & 2 deletions examples/compose/plot_digits_pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@
'pca__n_components': [5, 20, 30, 40, 50, 64],
'logistic__alpha': np.logspace(-4, 4, 5),
}
search = GridSearchCV(pipe, param_grid, iid=False, cv=5,
return_train_score=False)
search = GridSearchCV(pipe, param_grid, iid=False, cv=5)
search.fit(X_digits, y_digits)
print("Best parameter (CV score=%0.3f):" % search.best_score_)
print(search.best_params_)
Expand Down
80 changes: 15 additions & 65 deletions sklearn/model_selection/_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
from ..utils.random import sample_without_replacement
from ..utils.validation import indexable, check_is_fitted
from ..utils.metaestimators import if_delegate_has_method
from ..utils.deprecation import DeprecationDict
from ..metrics.scorer import _check_multimetric_scoring
from ..metrics.scorer import check_scoring

Expand Down Expand Up @@ -635,18 +634,6 @@ def fit(self, X, y=None, groups=None, **fit_params):
**fit_params : dict of string -> object
Parameters passed to the ``fit`` method of the estimator
"""

if self.fit_params is not None:
warnings.warn('"fit_params" as a constructor argument was '
'deprecated in version 0.19 and will be removed '
'in version 0.21. Pass fit parameters to the '
'"fit" method instead.', DeprecationWarning)
if fit_params:
warnings.warn('Ignoring fit_params passed as a constructor '
'argument in favor of keyword arguments to '
'the "fit" method.', RuntimeWarning)
else:
fit_params = self.fit_params
estimator = self.estimator
cv = check_cv(self.cv, y, classifier=is_classifier(estimator))

Expand Down Expand Up @@ -768,9 +755,7 @@ def _format_results(self, candidate_params, scorers, n_splits, out):
if self.return_train_score:
train_scores = _aggregate_score_dicts(train_score_dicts)

# TODO: replace by a dict in 0.21
results = (DeprecationDict() if self.return_train_score == 'warn'
else {})
results = {}

def _store(key_name, array, weights=None, splits=False, rank=False):
"""A small helper to store the scores/times to the cv_results_"""
Expand Down Expand Up @@ -847,18 +832,8 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
splits=True, rank=True,
weights=test_sample_counts if iid else None)
if self.return_train_score:
prev_keys = set(results.keys())
_store('train_%s' % scorer_name, train_scores[scorer_name],
splits=True)
if self.return_train_score == 'warn':
for key in set(results.keys()) - prev_keys:
message = (
'You are accessing a training score ({!r}), '
'which will not be available by default '
'any more in 0.21. If you need training scores, '
'please set return_train_score=True').format(key)
# warn on key access
results.add_warning(key, message, FutureWarning)

return results

Expand Down Expand Up @@ -907,14 +882,6 @@ class GridSearchCV(BaseSearchCV):

If None, the estimator's default scorer (if available) is used.

fit_params : dict, optional
Parameters to pass to the fit method.

.. deprecated:: 0.19
``fit_params`` as a constructor argument was deprecated in version
0.19 and will be removed in version 0.21. Pass fit parameters to
the ``fit`` method instead.

n_jobs : int or None, optional (default=None)
Number of jobs to run in parallel.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
Expand Down Expand Up @@ -944,7 +911,7 @@ class GridSearchCV(BaseSearchCV):
identically distributed across the folds, and the loss minimized is
the total loss per sample, and not the mean loss across the folds. If
False, return the average score across folds. Default is True, but
will change to False in version 0.21, to correspond to the standard
will change to False in version 0.22, to correspond to the standard
definition of cross-validation.

.. versionchanged:: 0.20
Expand Down Expand Up @@ -1001,13 +968,9 @@ class GridSearchCV(BaseSearchCV):
step, which will always raise the error. Default is 'raise' but from
version 0.22 it will change to np.nan.

return_train_score : boolean, optional
return_train_score : boolean, default=False
If ``False``, the ``cv_results_`` attribute will not include training
scores.

Current default is ``'warn'``, which behaves as ``True`` in addition
to raising a warning when a training score is looked up.
That default will be changed to ``False`` in 0.21.
Computing training scores is used to get insights on how different
parameter settings impact the overfitting/underfitting trade-off.
However computing the scores on the training set can be computationally
Expand All @@ -1031,17 +994,16 @@ class GridSearchCV(BaseSearchCV):
kernel='rbf', max_iter=-1, probability=False,
random_state=None, shrinking=True, tol=...,
verbose=False),
fit_params=None, iid=..., n_jobs=None,
iid=..., n_jobs=None,
param_grid=..., pre_dispatch=..., refit=..., return_train_score=...,
scoring=..., verbose=...)
>>> sorted(clf.cv_results_.keys())
... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
['mean_fit_time', 'mean_score_time', 'mean_test_score',...
'mean_train_score', 'param_C', 'param_kernel', 'params',...
'param_C', 'param_kernel', 'params',...
'rank_test_score', 'split0_test_score',...
'split0_train_score', 'split1_test_score', 'split1_train_score',...
'split2_test_score', 'split2_train_score',...
'std_fit_time', 'std_score_time', 'std_test_score', 'std_train_score'...]
'split2_test_score', ...
'std_fit_time', 'std_score_time', 'std_test_score']

Attributes
----------
Expand Down Expand Up @@ -1174,12 +1136,12 @@ class GridSearchCV(BaseSearchCV):

"""

def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
def __init__(self, estimator, param_grid, scoring=None,
n_jobs=None, iid='warn', refit=True, cv='warn', verbose=0,
pre_dispatch='2*n_jobs', error_score='raise-deprecating',
return_train_score="warn"):
return_train_score=False):
super(GridSearchCV, self).__init__(
estimator=estimator, scoring=scoring, fit_params=fit_params,
estimator=estimator, scoring=scoring,
n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
pre_dispatch=pre_dispatch, error_score=error_score,
return_train_score=return_train_score)
Expand Down Expand Up @@ -1254,14 +1216,6 @@ class RandomizedSearchCV(BaseSearchCV):

If None, the estimator's default scorer (if available) is used.

fit_params : dict, optional
Parameters to pass to the fit method.

.. deprecated:: 0.19
``fit_params`` as a constructor argument was deprecated in version
0.19 and will be removed in version 0.21. Pass fit parameters to
the ``fit`` method instead.

n_jobs : int or None, optional (default=None)
Number of jobs to run in parallel.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
Expand Down Expand Up @@ -1291,7 +1245,7 @@ class RandomizedSearchCV(BaseSearchCV):
identically distributed across the folds, and the loss minimized is
the total loss per sample, and not the mean loss across the folds. If
False, return the average score across folds. Default is True, but
will change to False in version 0.21, to correspond to the standard
will change to False in version 0.22, to correspond to the standard
definition of cross-validation.

.. versionchanged:: 0.20
Expand Down Expand Up @@ -1356,13 +1310,9 @@ class RandomizedSearchCV(BaseSearchCV):
step, which will always raise the error. Default is 'raise' but from
version 0.22 it will change to np.nan.

return_train_score : boolean, optional
return_train_score : boolean, default=False
If ``False``, the ``cv_results_`` attribute will not include training
scores.

Current default is ``'warn'``, which behaves as ``True`` in addition
to raising a warning when a training score is looked up.
That default will be changed to ``False`` in 0.21.
Computing training scores is used to get insights on how different
parameter settings impact the overfitting/underfitting trade-off.
However computing the scores on the training set can be computationally
Expand Down Expand Up @@ -1495,15 +1445,15 @@ class RandomizedSearchCV(BaseSearchCV):
"""

def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
fit_params=None, n_jobs=None, iid='warn', refit=True,
n_jobs=None, iid='warn', refit=True,
cv='warn', verbose=0, pre_dispatch='2*n_jobs',
random_state=None, error_score='raise-deprecating',
return_train_score="warn"):
return_train_score=False):
self.param_distributions = param_distributions
self.n_iter = n_iter
self.random_state = random_state
super(RandomizedSearchCV, self).__init__(
estimator=estimator, scoring=scoring, fit_params=fit_params,
estimator=estimator, scoring=scoring,
n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
pre_dispatch=pre_dispatch, error_score=error_score,
return_train_score=return_train_score)
Expand Down
24 changes: 4 additions & 20 deletions sklearn/model_selection/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

from ..base import is_classifier, clone
from ..utils import indexable, check_random_state, safe_indexing
from ..utils.deprecation import DeprecationDict
from ..utils.validation import _is_arraylike, _num_samples
from ..utils.metaestimators import _safe_split
from ..utils import Parallel, delayed
Expand All @@ -40,7 +39,7 @@

def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
n_jobs=None, verbose=0, fit_params=None,
pre_dispatch='2*n_jobs', return_train_score="warn",
pre_dispatch='2*n_jobs', return_train_score=False,
return_estimator=False, error_score='raise-deprecating'):
"""Evaluate metric(s) by cross-validation and also record fit/score times.

Expand Down Expand Up @@ -126,12 +125,8 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
- A string, giving an expression as a function of n_jobs,
as in '2*n_jobs'

return_train_score : boolean, optional
return_train_score : boolean, default=False
Whether to include train scores.

Current default is ``'warn'``, which behaves as ``True`` in addition
to raising a warning when a training score is looked up.
That default will be changed to ``False`` in 0.21.
Computing training scores is used to get insights on how different
parameter settings impact the overfitting/underfitting trade-off.
However computing the scores on the training set can be computationally
Expand Down Expand Up @@ -191,8 +186,7 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',

Single metric evaluation using ``cross_validate``

>>> cv_results = cross_validate(lasso, X, y, cv=3,
... return_train_score=False)
>>> cv_results = cross_validate(lasso, X, y, cv=3)
>>> sorted(cv_results.keys()) # doctest: +ELLIPSIS
['fit_time', 'score_time', 'test_score']
>>> cv_results['test_score'] # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
Expand Down Expand Up @@ -248,8 +242,7 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
test_scores, fit_times, score_times = zipped_scores
test_scores = _aggregate_score_dicts(test_scores)

# TODO: replace by a dict in 0.21
ret = DeprecationDict() if return_train_score == 'warn' else {}
ret = {}
ret['fit_time'] = np.array(fit_times)
ret['score_time'] = np.array(score_times)

Expand All @@ -261,14 +254,6 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
if return_train_score:
key = 'train_%s' % name
ret[key] = np.array(train_scores[name])
if return_train_score == 'warn':
message = (
'You are accessing a training score ({!r}), '
'which will not be available by default '
'any more in 0.21. If you need training scores, '
'please set return_train_score=True').format(key)
# warn on key access
ret.add_warning(key, message, FutureWarning)

return ret

Expand Down Expand Up @@ -395,7 +380,6 @@ def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv='warn',

cv_results = cross_validate(estimator=estimator, X=X, y=y, groups=groups,
scoring={'score': scorer}, cv=cv,
return_train_score=False,
n_jobs=n_jobs, verbose=verbose,
fit_params=fit_params,
pre_dispatch=pre_dispatch,
Expand Down
Loading