From 35a8780b9c30ca1343e30b7f5fb3523e1309db7e Mon Sep 17 00:00:00 2001 From: Eugene Chen Date: Sat, 16 Jul 2016 17:01:45 -0500 Subject: [PATCH 1/5] Resolved issue #6894 and #6895: Now *SearchCV.results_ includes both timing and training scores. --- sklearn/model_selection/_search.py | 71 +++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 17 deletions(-) diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index bbef014ba34b5..080bbb9cb772b 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -371,7 +371,7 @@ class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator, def __init__(self, estimator, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', - error_score='raise'): + error_score='raise', return_train_score=False): self.scoring = scoring self.estimator = estimator @@ -383,6 +383,7 @@ def __init__(self, estimator, scoring=None, self.verbose = verbose self.pre_dispatch = pre_dispatch self.error_score = error_score + self.return_train_score = return_train_score @property def _estimator_type(self): @@ -533,16 +534,28 @@ def _fit(self, X, y, labels, parameter_iterable): pre_dispatch=pre_dispatch )(delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_, train, test, self.verbose, parameters, - self.fit_params, return_parameters=True, + self.fit_params, + return_train_score=self.return_train_score, + return_parameters=True, error_score=self.error_score) for parameters in parameter_iterable for train, test in cv.split(X, y, labels)) - test_scores, test_sample_counts, _, parameters = zip(*out) + # if one choose to see train score, out will have train score info. + if self.return_train_score: + train_scores, test_scores, test_sample_counts, _, parameters =\ + zip(*out) + else: + test_scores, test_sample_counts, _, parameters = zip(*out) candidate_params = parameters[::n_splits] n_candidates = len(candidate_params) + # if one choose to return train score, reshape the train_scores array + if self.return_train_score: + train_scores = np.array(train_scores, + dtype=np.float64).reshape(n_candidates, + n_splits) test_scores = np.array(test_scores, dtype=np.float64).reshape(n_candidates, n_splits) @@ -552,17 +565,39 @@ def _fit(self, X, y, labels, parameter_iterable): # Computed the (weighted) mean and std for all the candidates weights = test_sample_counts if self.iid else None - means = np.average(test_scores, axis=1, weights=weights) - stds = np.sqrt(np.average((test_scores - means[:, np.newaxis]) ** 2, - axis=1, weights=weights)) + + time = np.array(_, dtype=np.float64).reshape(n_candidates, n_splits) + time_means = np.average(time, axis=1, weights=weights) + time_stds = np.sqrt( + np.average((time - time_means[:, np.newaxis]) ** 2, + axis=1, weights=weights)) + if self.return_train_score: + train_means = np.average(train_scores, axis=1, weights=weights) + train_stds = np.sqrt( + np.average((train_scores - train_means[:, np.newaxis]) ** 2, + axis=1, weights=weights)) + test_means = np.average(test_scores, axis=1, weights=weights) + test_stds = np.sqrt( + np.average((test_scores - test_means[:, np.newaxis]) ** 2, axis=1, + weights=weights)) results = dict() for split_i in range(n_splits): results["test_split%d_score" % split_i] = test_scores[:, split_i] - results["test_mean_score"] = means - results["test_std_score"] = stds + results["test_mean_score"] = test_means + results["test_std_score"] = test_stds + + if self.return_train_score: + for split_i in range(n_splits): + results["train_split%d_score" % split_i] =\ + train_scores[:, split_i] + results["train_mean_score"] = train_means + results["train_std_score"] = train_stds + + results["test_mean_time"] = time_means + results["test_std_time"] = time_stds - ranks = np.asarray(rankdata(-means, method='min'), dtype=np.int32) + ranks = np.asarray(rankdata(-test_means, method='min'), dtype=np.int32) best_index = np.flatnonzero(ranks == 1)[0] best_parameters = candidate_params[best_index] @@ -848,11 +883,13 @@ class GridSearchCV(BaseSearchCV): def __init__(self, estimator, param_grid, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, - pre_dispatch='2*n_jobs', error_score='raise'): + pre_dispatch='2*n_jobs', error_score='raise', + return_train_score=False): super(GridSearchCV, self).__init__( estimator=estimator, scoring=scoring, fit_params=fit_params, n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose, - pre_dispatch=pre_dispatch, error_score=error_score) + pre_dispatch=pre_dispatch, error_score=error_score, + return_train_score=return_train_score) self.param_grid = param_grid _check_param_grid(param_grid) @@ -1074,15 +1111,15 @@ class RandomizedSearchCV(BaseSearchCV): def __init__(self, estimator, param_distributions, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, - error_score='raise'): - + error_score='raise', return_train_score=False): self.param_distributions = param_distributions self.n_iter = n_iter self.random_state = random_state super(RandomizedSearchCV, self).__init__( - estimator=estimator, scoring=scoring, fit_params=fit_params, - n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose, - pre_dispatch=pre_dispatch, error_score=error_score) + estimator=estimator, scoring=scoring, fit_params=fit_params, + n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose, + pre_dispatch=pre_dispatch, error_score=error_score, + return_train_score=return_train_score) def fit(self, X, y=None, labels=None): """Run fit on the estimator with randomly drawn parameters. @@ -1104,4 +1141,4 @@ def fit(self, X, y=None, labels=None): sampled_params = ParameterSampler(self.param_distributions, self.n_iter, random_state=self.random_state) - return self._fit(X, y, labels, sampled_params) + return self._fit(X, y, labels, sampled_params) \ No newline at end of file From 316ffba8365abd44d67dd4ec13e7c9ddf4393e56 Mon Sep 17 00:00:00 2001 From: Eugene Chen Date: Sat, 16 Jul 2016 23:33:03 -0500 Subject: [PATCH 2/5] wrote new test (sklearn/model_selection/test_search.py) and new doctest (sklearn/model_selection/_search.py) --- sklearn/model_selection/_search.py | 13 ++++++---- sklearn/model_selection/tests/test_search.py | 26 ++++++++++++-------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 080bbb9cb772b..51f17faaaacfa 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -541,7 +541,7 @@ def _fit(self, X, y, labels, parameter_iterable): for parameters in parameter_iterable for train, test in cv.split(X, y, labels)) - # if one choose to see train score, out will have train score info. + # if one choose to see train score, "out" will contain train score info if self.return_train_score: train_scores, test_scores, test_sample_counts, _, parameters =\ zip(*out) @@ -761,6 +761,9 @@ class GridSearchCV(BaseSearchCV): FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error. + return_train_score: boolean, default=False + If "True", the results_ attribute will include training scores. + Examples -------- @@ -779,13 +782,13 @@ class GridSearchCV(BaseSearchCV): random_state=None, shrinking=True, tol=..., verbose=False), fit_params={}, iid=..., n_jobs=1, - param_grid=..., pre_dispatch=..., refit=..., + param_grid=..., pre_dispatch=..., refit=..., return_train_score=..., scoring=..., verbose=...) >>> sorted(clf.results_.keys()) ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS - ['param_C', 'param_kernel', 'params', 'test_mean_score',... - 'test_rank_score', 'test_split0_score', 'test_split1_score',... - 'test_split2_score', 'test_std_score'] + ['param_C', 'param_kernel', 'params', 'test_mean_score', 'test_mean_time', + 'test_rank_score', 'test_split0_score', 'test_split1_score', + 'test_split2_score', 'test_std_score', 'test_std_time'] Attributes ---------- diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index c3365bd3a7e60..2da5252875307 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -602,16 +602,18 @@ def test_grid_search_results(): params = [dict(kernel=['rbf', ], C=[1, 10], gamma=[0.1, 1]), dict(kernel=['poly', ], degree=[1, 2])] grid_search = GridSearchCV(SVC(), cv=n_folds, iid=False, - param_grid=params) + param_grid=params, return_train_score=True) grid_search.fit(X, y) grid_search_iid = GridSearchCV(SVC(), cv=n_folds, iid=True, - param_grid=params) + param_grid=params, return_train_score=True) grid_search_iid.fit(X, y) param_keys = ('param_C', 'param_degree', 'param_gamma', 'param_kernel') - score_keys = ('test_mean_score', 'test_rank_score', - 'test_split0_score', 'test_split1_score', - 'test_split2_score', 'test_std_score') + score_keys = ('test_mean_score', 'train_mean_score', 'test_mean_time', + 'test_rank_score', 'test_split0_score', 'test_split1_score', + 'test_split2_score', 'train_split0_score', + 'train_split1_score', 'train_split2_score', + 'test_std_score', 'train_std_score', 'test_std_time') n_candidates = n_grid_points for search, iid in zip((grid_search, grid_search_iid), (False, True)): @@ -649,17 +651,21 @@ def test_random_search_results(): n_search_iter = 30 params = dict(C=expon(scale=10), gamma=expon(scale=0.1)) random_search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_folds, - iid=False, param_distributions=params) + iid=False, param_distributions=params, + return_train_score=True) random_search.fit(X, y) random_search_iid = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_folds, iid=True, - param_distributions=params) + param_distributions=params, + return_train_score=True) random_search_iid.fit(X, y) param_keys = ('param_C', 'param_gamma') - score_keys = ('test_mean_score', 'test_rank_score', - 'test_split0_score', 'test_split1_score', - 'test_split2_score', 'test_std_score') + score_keys = ('test_mean_score', 'train_mean_score', 'test_mean_time', + 'test_rank_score', 'test_split0_score', 'test_split1_score', + 'test_split2_score', 'train_split0_score', + 'train_split1_score', 'train_split2_score', + 'test_std_score', 'train_std_score', 'test_std_time') n_cand = n_search_iter for search, iid in zip((random_search, random_search_iid), (False, True)): From ea38f8460f33a5bf2fc63b39c5983ea89d4ffc4b Mon Sep 17 00:00:00 2001 From: Eugene Chen Date: Sun, 17 Jul 2016 00:24:08 -0500 Subject: [PATCH 3/5] added a few more lines in the docstring of GridSearchCV and RandomizedSearchCV. --- sklearn/model_selection/_search.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 51f17faaaacfa..d1c278dd83526 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -822,13 +822,17 @@ class GridSearchCV(BaseSearchCV): 'test_split0_score' : [0.8, 0.7, 0.8, 0.9], 'test_split1_score' : [0.82, 0.5, 0.7, 0.78], 'test_mean_score' : [0.81, 0.60, 0.75, 0.82], + 'test_mean_time' : [ 0.00073, 0.00063, 0.00043, 0.00049] + 'test_std_time' : [ 1.62e-4, 3.37e-5, 1.42e-5, 1.1e-5] 'test_std_score' : [0.02, 0.01, 0.03, 0.03], 'test_rank_score' : [2, 4, 3, 1], 'params' : [{'kernel': 'poly', 'degree': 2}, ...], } NOTE that the key ``'params'`` is used to store a list of parameter - settings dict for all the parameter candidates. + settings dict for all the parameter candidates. Besides, + 'train_mean_score', 'train_split*_score', ... will be present when + return_train_score is set to True. best_estimator_ : estimator Estimator that was chosen by the search, i.e. estimator @@ -1026,6 +1030,9 @@ class RandomizedSearchCV(BaseSearchCV): FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error. + return_train_score: boolean, default=False + If "True", the results_ attribute will include training scores. + Attributes ---------- results_ : dict of numpy (masked) ndarrays @@ -1053,13 +1060,17 @@ class RandomizedSearchCV(BaseSearchCV): 'test_split0_score' : [0.8, 0.9, 0.7], 'test_split1_score' : [0.82, 0.5, 0.7], 'test_mean_score' : [0.81, 0.7, 0.7], + 'test_mean_time' : [0.00073, 0.00063, 0.00043] + 'test_std_time' : [1.62e-4, 3.37e-5, 1.1e-5] 'test_std_score' : [0.02, 0.2, 0.], 'test_rank_score' : [3, 1, 1], 'params' : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...], } NOTE that the key ``'params'`` is used to store a list of parameter - settings dict for all the parameter candidates. + settings dict for all the parameter candidates. Besides, + 'train_mean_score', 'train_split*_score', ... will be present when + return_train_score is set to True. best_estimator_ : estimator Estimator that was chosen by the search, i.e. estimator From 5a43a55257c5b2f241e76801a92e977182f469e6 Mon Sep 17 00:00:00 2001 From: Eugene Chen Date: Sun, 17 Jul 2016 10:32:42 -0500 Subject: [PATCH 4/5] Revised code according to suggestions. --- sklearn/model_selection/_search.py | 66 +++++++++++++++++------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index d1c278dd83526..c16b530455aa1 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -371,7 +371,7 @@ class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator, def __init__(self, estimator, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', - error_score='raise', return_train_score=False): + error_score='raise', return_train_score=True): self.scoring = scoring self.estimator = estimator @@ -543,10 +543,10 @@ def _fit(self, X, y, labels, parameter_iterable): # if one choose to see train score, "out" will contain train score info if self.return_train_score: - train_scores, test_scores, test_sample_counts, _, parameters =\ + train_scores, test_scores, test_sample_counts, time, parameters =\ zip(*out) else: - test_scores, test_sample_counts, _, parameters = zip(*out) + test_scores, test_sample_counts, time, parameters = zip(*out) candidate_params = parameters[::n_splits] n_candidates = len(candidate_params) @@ -563,23 +563,23 @@ def _fit(self, X, y, labels, parameter_iterable): test_sample_counts = np.array(test_sample_counts[:n_splits], dtype=np.int) - # Computed the (weighted) mean and std for all the candidates + # Computed the (weighted) mean and std for test scores weights = test_sample_counts if self.iid else None + test_means = np.average(test_scores, axis=1, weights=weights) + test_stds = np.sqrt( + np.average((test_scores - test_means[:, np.newaxis]) ** 2, axis=1, + weights=weights)) - time = np.array(_, dtype=np.float64).reshape(n_candidates, n_splits) - time_means = np.average(time, axis=1, weights=weights) + time = np.array(time, dtype=np.float64).reshape(n_candidates, n_splits) + time_means = np.average(time, axis=1) time_stds = np.sqrt( - np.average((time - time_means[:, np.newaxis]) ** 2, - axis=1, weights=weights)) + np.average((time - time_means[:, np.newaxis]) ** 2, + axis=1)) if self.return_train_score: - train_means = np.average(train_scores, axis=1, weights=weights) + train_means = np.average(train_scores, axis=1) train_stds = np.sqrt( np.average((train_scores - train_means[:, np.newaxis]) ** 2, - axis=1, weights=weights)) - test_means = np.average(test_scores, axis=1, weights=weights) - test_stds = np.sqrt( - np.average((test_scores - test_means[:, np.newaxis]) ** 2, axis=1, - weights=weights)) + axis=1)) results = dict() for split_i in range(n_splits): @@ -761,8 +761,9 @@ class GridSearchCV(BaseSearchCV): FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error. - return_train_score: boolean, default=False - If "True", the results_ attribute will include training scores. + return_train_score: boolean, default=True + If ``'False'``, the results_ attribute will not include training + scores. Examples @@ -788,7 +789,7 @@ class GridSearchCV(BaseSearchCV): ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS ['param_C', 'param_kernel', 'params', 'test_mean_score', 'test_mean_time', 'test_rank_score', 'test_split0_score', 'test_split1_score', - 'test_split2_score', 'test_std_score', 'test_std_time'] + 'test_split2_score', 'test_std_score', ...] Attributes ---------- @@ -822,17 +823,21 @@ class GridSearchCV(BaseSearchCV): 'test_split0_score' : [0.8, 0.7, 0.8, 0.9], 'test_split1_score' : [0.82, 0.5, 0.7, 0.78], 'test_mean_score' : [0.81, 0.60, 0.75, 0.82], - 'test_mean_time' : [ 0.00073, 0.00063, 0.00043, 0.00049] - 'test_std_time' : [ 1.62e-4, 3.37e-5, 1.42e-5, 1.1e-5] + 'train_split0_score': [0.9, 0.8, 0.85, 1.] + 'train_split1_score': [0.95, 0.7, 0.8, 0.8] + 'train_mean_score' : [0.93, 0.75, 0.83, 0.9] + 'test_mean_time' : [0.00073, 0.00063, 0.00043, 0.00049] + 'test_std_time' : [1.62e-4, 3.37e-5, 1.42e-5, 1.1e-5] 'test_std_score' : [0.02, 0.01, 0.03, 0.03], 'test_rank_score' : [2, 4, 3, 1], + ... 'params' : [{'kernel': 'poly', 'degree': 2}, ...], } NOTE that the key ``'params'`` is used to store a list of parameter settings dict for all the parameter candidates. Besides, - 'train_mean_score', 'train_split*_score', ... will be present when - return_train_score is set to True. + ``'train_mean_score'``, ``'train_split*_score'``, ... will be present + when return_train_score=True. best_estimator_ : estimator Estimator that was chosen by the search, i.e. estimator @@ -1030,8 +1035,9 @@ class RandomizedSearchCV(BaseSearchCV): FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error. - return_train_score: boolean, default=False - If "True", the results_ attribute will include training scores. + return_train_score: boolean, default=True + If ``'False'``, the results_ attribute will not include training + scores. Attributes ---------- @@ -1060,10 +1066,14 @@ class RandomizedSearchCV(BaseSearchCV): 'test_split0_score' : [0.8, 0.9, 0.7], 'test_split1_score' : [0.82, 0.5, 0.7], 'test_mean_score' : [0.81, 0.7, 0.7], + 'train_split0_score': [0.9, 0.8, 0.85] + 'train_split1_score': [0.95, 0.7, 0.8] + 'train_mean_score' : [0.93, 0.75, 0.83] 'test_mean_time' : [0.00073, 0.00063, 0.00043] 'test_std_time' : [1.62e-4, 3.37e-5, 1.1e-5] 'test_std_score' : [0.02, 0.2, 0.], 'test_rank_score' : [3, 1, 1], + ... 'params' : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...], } @@ -1130,10 +1140,10 @@ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None, self.n_iter = n_iter self.random_state = random_state super(RandomizedSearchCV, self).__init__( - estimator=estimator, scoring=scoring, fit_params=fit_params, - n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose, - pre_dispatch=pre_dispatch, error_score=error_score, - return_train_score=return_train_score) + estimator=estimator, scoring=scoring, fit_params=fit_params, + n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose, + pre_dispatch=pre_dispatch, error_score=error_score, + return_train_score=return_train_score) def fit(self, X, y=None, labels=None): """Run fit on the estimator with randomly drawn parameters. @@ -1155,4 +1165,4 @@ def fit(self, X, y=None, labels=None): sampled_params = ParameterSampler(self.param_distributions, self.n_iter, random_state=self.random_state) - return self._fit(X, y, labels, sampled_params) \ No newline at end of file + return self._fit(X, y, labels, sampled_params) From b9a425d972b341c0bee411e0de1288a28835d3ea Mon Sep 17 00:00:00 2001 From: Eugene Chen Date: Sun, 17 Jul 2016 21:56:12 -0700 Subject: [PATCH 5/5] Add a few more lines to test_grid_search_results(): 1. check test_rank_score always >= 1 2. check all regular scores (test/train_mean/std_score) and timing >= 0 3. check all regular scores <= 1 Note that timing can be greater than 1 in general, and std of regular scores always <= 1 because the scores are bounded between 0 and 1. --- sklearn/model_selection/tests/test_search.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index 2da5252875307..05d875f97e896 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -619,6 +619,13 @@ def test_grid_search_results(): for search, iid in zip((grid_search, grid_search_iid), (False, True)): assert_equal(iid, search.iid) results = search.results_ + # Check if score and timing are reasonable + assert_true(all(results['test_rank_score'] >= 1)) + assert_true(all(results[k] >= 0) for k in score_keys + if k is not 'test_rank_score') + assert_true(all(results[k] <= 1) for k in score_keys + if not k.endswith('time') and + k is not 'test_rank_score') # Check results structure check_results_array_types(results, param_keys, score_keys) check_results_keys(results, param_keys, score_keys, n_candidates)