@@ -321,24 +321,57 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
321321 return score , parameters , n_samples_test
322322
323323
324- def _check_param_grid ( param_grid ):
325- if hasattr ( param_grid , 'items' ):
326- param_grid = [ param_grid ]
324+ def _check_param_grid_or_dist ( param_grid_or_dist ):
325+ """Validate param_grid/distribution and return the unique parameters"""
326+ parameter_names = set ()
327327
328- for p in param_grid :
328+ if hasattr (param_grid_or_dist , 'items' ):
329+ param_grid_or_dist = [param_grid_or_dist ]
330+
331+ for p in param_grid_or_dist :
329332 for v in p .values ():
330333 if isinstance (v , np .ndarray ) and v .ndim > 1 :
331334 raise ValueError ("Parameter array should be one-dimensional." )
332335
333- check = [isinstance (v , k ) for k in (list , tuple , np .ndarray )]
334- if True not in check :
336+ if not isinstance (v , (list , tuple , np .ndarray )):
335337 raise ValueError ("Parameter values should be a list." )
336338
337339 if len (v ) == 0 :
338340 raise ValueError ("Parameter values should be a non-empty "
339341 "list." )
340342
343+ parameter_names .update (p .keys ())
344+
345+ return list (parameter_names )
346+
347+
348+ def _get_metric_names (scoring ):
349+ """Generate the list of metric name(s) given the scoring parameter"""
350+ metric_names = list ()
351+ # XXX Do we index from 0?
352+ # NOTE we need this to prevent collisions between similarly named
353+ # custom metric (i.e [foo.bar, bar])
354+ n_custom_metrics = 1
341355
356+ if not isinstance (scoring , (list , tuple )):
357+ scoring = [scoring ]
358+
359+ for metric in scoring :
360+ if callable (metric ):
361+ metric_names .append ("custom_metric_%s_%s" %
362+ (n_custom_metrics , metric .__name__ ))
363+ n_custom_metrics += 1
364+
365+ elif isinstance (metric , six .string_types ):
366+ metric_names .append (metric )
367+
368+ else :
369+ raise ValueError ("Unknown metric type - %r" % type (metric ))
370+
371+ return metric_names
372+
373+
374+ # XXX Remove in 0.20
342375class _CVScoreTuple (namedtuple ('_CVScoreTuple' ,
343376 ('parameters' ,
344377 'mean_validation_score' ,
@@ -381,6 +414,7 @@ def __init__(self, estimator, scoring=None,
381414 self .verbose = verbose
382415 self .pre_dispatch = pre_dispatch
383416 self .error_score = error_score
417+ self .metric_names_ = _get_metric_names (scoring )
384418
385419 @property
386420 def _estimator_type (self ):
@@ -521,6 +555,12 @@ def inverse_transform(self, Xt):
521555 """
522556 return self .best_estimator_ .transform (Xt )
523557
558+ @property
559+ @deprecated ("The grid_scores_ attribute is deprecated in favor of the "
560+ "search_results_ and will be removed in version 0.20." )
561+ def grid_scores_ (self ):
562+ return self ._grid_scores
563+
524564 def _fit (self , X , y , labels , parameter_iterable ):
525565 """Actual fitting, performing the search over parameters."""
526566
@@ -561,38 +601,67 @@ def _fit(self, X, y, labels, parameter_iterable):
561601 # Out is a list of triplet: score, estimator, n_test_samples
562602 n_fits = len (out )
563603
564- scores = list ()
565- grid_scores = list ()
566- for grid_start in range (0 , n_fits , n_splits ):
567- n_test_samples = 0
568- score = 0
569- all_scores = []
570- for this_score , this_n_test_samples , _ , parameters in \
571- out [grid_start :grid_start + n_splits ]:
572- all_scores .append (this_score )
604+ self ._grid_scores = list ()
605+
606+ # XXX Do we want to store these?
607+ n_candidates = n_fits / n_splits
608+ n_parameters = len (self .parameter_names_ )
609+ n_metrics = len (scoring )
610+
611+ search_results_ = dict ()
612+
613+ for param in self .parameter_names_ :
614+ search_results_ [param ] = np .empty ((n_candidates ,), dtype = object )
615+
616+ for metric in self .metric_names_ :
617+ # Make a column for each split
618+ # XXX To make it future proof
619+ for split_i in range (n_splits )]:
620+ search_results_ ["%s_split_%s" % (metric , split_i )] = (
621+ np .empty ((n_candidates ,), dtype = np .float32 ))
622+
623+ search_results_ ["%s_aggregated" ] = np .empty ((n_candidates ,),
624+ dtype = np .float32 )
625+ search_results_ ["%s_rank" ] = np .empty ((n_candidates ,), dtype = int )
626+
627+ for grid_start in range (0 , n_fits , n_splits ):
628+ n_test_samples = 0
629+ aggregated_score = 0
630+ all_scores = []
631+
632+ # XXX Loop this when multiple metric support is enabled
633+ for (this_score , this_n_test_samples , _ , parameters ), i in \
634+ enumerate (out [grid_start :grid_start + n_splits ]):
635+ all_scores .append (this_score )
636+
637+ if self .iid :
638+ this_score *= this_n_test_samples
639+ n_test_samples += this_n_test_samples
640+ aggregated_score += this_score
641+ search_results_ ["%s_split_%s" % (metric , i )] = this_score
642+
573643 if self .iid :
574- this_score *= this_n_test_samples
575- n_test_samples += this_n_test_samples
576- score += this_score
577- if self .iid :
578- score /= float (n_test_samples )
579- else :
580- score /= float (n_splits )
581- scores .append ((score , parameters ))
582- # TODO: shall we also store the test_fold_sizes?
583- grid_scores .append (_CVScoreTuple (
644+ aggregated_score /= float (n_test_samples )
645+ else :
646+ aggregated_score /= float (n_splits )
647+
648+ search_results_ ["%s_aggregated" % metric ] = aggregated_score
649+
650+ # XXX Remove in version 0.20
651+ self ._grid_scores .append (_CVScoreTuple (
584652 parameters ,
585653 score ,
586654 np .array (all_scores )))
587- # Store the computed scores
588- self .grid_scores_ = grid_scores
589655
590- # Find the best parameters by comparing on the mean validation score:
591- # note that `sorted` is deterministic in the way it breaks ties
592- best = sorted (grid_scores , key = lambda x : x .mean_validation_score ,
593- reverse = True )[0 ]
594- self .best_params_ = best .parameters
595- self .best_score_ = best .mean_validation_score
656+ # Find the best parameters by comparing on the mean validation score:
657+ # note that `sorted` is deterministic in the way it breaks ties
658+ np .argsort (search_results_ ["%s" ])
659+ search_results_ ["%s_aggregated" % metric ] = aggregated_score
660+
661+ best = sorted (grid_scores , key = lambda x : x .mean_validation_score ,
662+ reverse = True )[0 ]
663+ self .best_params_ = best .parameters
664+ self .best_score_ = best .mean_validation_score
596665
597666 if self .refit :
598667 # fit the best estimator using the entire dataset
@@ -723,15 +792,32 @@ class GridSearchCV(BaseSearchCV):
723792
724793 Attributes
725794 ----------
726- grid_scores_ : list of named tuples
727- Contains scores for all parameter combinations in param_grid.
728- Each entry corresponds to one parameter setting.
729- Each named tuple has the attributes:
730-
731- * ``parameters``, a dict of parameter settings
732- * ``mean_validation_score``, the mean score over the
733- cross-validation folds
734- * ``cv_validation_scores``, the list of scores for each fold
795+ search_results_ : dict of numpy (masked) ndarrays
796+ A dict with keys as column headers and values as columns, that can be
797+ imported into a pandas DataFrame.
798+
799+ For instance the below given table
800+
801+ kernel|gamma|degree|accuracy_score_split_0...|accuracy_score_mean ...|
802+ =====================================================================
803+ 'poly'| - | 2 | 0.8 | 0.81 |
804+ 'poly'| - | 3 | 0.7 | 0.60 |
805+ 'rbf' | 0.1 | - | 0.8 | 0.75 |
806+ 'rbf' | 0.2 | - | 0.9 | 0.82 |
807+
808+ will be represented by a search_results_ dict of :
809+
810+ {'kernel' : masked_array(data = ['poly', 'poly', 'rbf', 'rbf'],
811+ mask = [False False False False]...)
812+ 'gamma' : masked_array(data = [-- -- 0.1 0.2],
813+ mask = [ True True False False]...),
814+ 'degree' : masked_array(data = [2.0 3.0 -- --],
815+ mask = [False False True True]...),
816+ 'accuracy_score_split_0' : [0.8, 0.7, 0.8, 0.9],
817+ 'accuracy_score_split_1' : [0.82, 0.5, 0.7, 0.78],
818+ 'accuracy_score_mean' : [0.81, 0.60, 0.75, 0.82],
819+ 'candidate_rank' : [2, 4, 3, 1],
820+ }
735821
736822 best_estimator_ : estimator
737823 Estimator that was chosen by the search, i.e. estimator
@@ -785,7 +871,7 @@ def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
785871 n_jobs = n_jobs , iid = iid , refit = refit , cv = cv , verbose = verbose ,
786872 pre_dispatch = pre_dispatch , error_score = error_score )
787873 self .param_grid = param_grid
788- _check_param_grid (param_grid )
874+ self . parameter_names_ = _check_param_grid_or_dist (param_grid )
789875
790876 def fit (self , X , y = None , labels = None ):
791877 """Run fit with all sets of parameters.
@@ -919,15 +1005,32 @@ class RandomizedSearchCV(BaseSearchCV):
9191005
9201006 Attributes
9211007 ----------
922- grid_scores_ : list of named tuples
923- Contains scores for all parameter combinations in param_grid.
924- Each entry corresponds to one parameter setting.
925- Each named tuple has the attributes:
926-
927- * ``parameters``, a dict of parameter settings
928- * ``mean_validation_score``, the mean score over the
929- cross-validation folds
930- * ``cv_validation_scores``, the list of scores for each fold
1008+ search_results_ : dict of numpy (masked) ndarrays
1009+ A dict with keys as column headers and values as columns, that can be
1010+ imported into a pandas DataFrame.
1011+
1012+ For instance the below given table
1013+
1014+ kernel|gamma|degree|accuracy_score_split_0...|accuracy_score_mean ...|
1015+ =====================================================================
1016+ 'poly'| - | 2 | 0.8 | 0.81 |
1017+ 'poly'| - | 3 | 0.7 | 0.60 |
1018+ 'rbf' | 0.1 | - | 0.8 | 0.75 |
1019+ 'rbf' | 0.2 | - | 0.9 | 0.82 |
1020+
1021+ will be represented by a search_results_ dict of :
1022+
1023+ {'kernel' : masked_array(data = ['poly', 'poly', 'rbf', 'rbf'],
1024+ mask = [False False False False]...)
1025+ 'gamma' : masked_array(data = [-- -- 0.1 0.2],
1026+ mask = [ True True False False]...),
1027+ 'degree' : masked_array(data = [2.0 3.0 -- --],
1028+ mask = [False False True True]...),
1029+ 'accuracy_score_split_0' : [0.8, 0.7, 0.8, 0.9],
1030+ 'accuracy_score_split_1' : [0.82, 0.5, 0.7, 0.78],
1031+ 'accuracy_score_mean' : [0.81, 0.60, 0.75, 0.82],
1032+ 'candidate_rank' : [2, 4, 3, 1],
1033+ }
9311034
9321035 best_estimator_ : estimator
9331036 Estimator that was chosen by the search, i.e. estimator
@@ -970,6 +1073,7 @@ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
9701073 error_score = 'raise' ):
9711074
9721075 self .param_distributions = param_distributions
1076+ self .parameter_names_ = _check_param_grid_or_dist (param_distributions )
9731077 self .n_iter = n_iter
9741078 self .random_state = random_state
9751079 super (RandomizedSearchCV , self ).__init__ (
0 commit comments