28
28
29
29
def _rfe_single_fit (rfe , estimator , X , y , train , test , scorer ):
30
30
"""
31
- Return the score for a fit across one fold.
31
+ Return the score and n_features per step for a fit across one fold.
32
32
"""
33
33
X_train , y_train = _safe_split (estimator , X , y , train )
34
34
X_test , y_test = _safe_split (estimator , X , y , test , train )
35
- return rfe ._fit (
35
+
36
+ rfe ._fit (
36
37
X_train ,
37
38
y_train ,
38
39
lambda estimator , features : _score (
@@ -43,7 +44,9 @@ def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer):
43
44
scorer ,
44
45
score_params = None ,
45
46
),
46
- ).scores_
47
+ )
48
+
49
+ return rfe .step_scores_ , rfe .step_n_features_
47
50
48
51
49
52
def _estimator_has (attr ):
@@ -264,10 +267,9 @@ def fit(self, X, y, **fit_params):
264
267
return self ._fit (X , y , ** fit_params )
265
268
266
269
def _fit (self , X , y , step_score = None , ** fit_params ):
267
- # Parameter step_score controls the calculation of self.scores_
268
- # step_score is not exposed to users
269
- # and is used when implementing RFECV
270
- # self.scores_ will not be calculated when calling _fit through fit
270
+ # Parameter step_score controls the calculation of self.step_scores_
271
+ # step_score is not exposed to users and is used when implementing RFECV
272
+ # self.step_scores_ will not be calculated when calling _fit through fit
271
273
272
274
X , y = self ._validate_data (
273
275
X ,
@@ -296,7 +298,8 @@ def _fit(self, X, y, step_score=None, **fit_params):
296
298
ranking_ = np .ones (n_features , dtype = int )
297
299
298
300
if step_score :
299
- self .scores_ = []
301
+ self .step_n_features_ = []
302
+ self .step_scores_ = []
300
303
301
304
# Elimination
302
305
while np .sum (support_ ) > n_features_to_select :
@@ -328,7 +331,8 @@ def _fit(self, X, y, step_score=None, **fit_params):
328
331
# because 'estimator' must use features
329
332
# that have not been eliminated yet
330
333
if step_score :
331
- self .scores_ .append (step_score (estimator , features ))
334
+ self .step_n_features_ .append (len (features ))
335
+ self .step_scores_ .append (step_score (estimator , features ))
332
336
support_ [features [ranks ][:threshold ]] = False
333
337
ranking_ [np .logical_not (support_ )] += 1
334
338
@@ -339,7 +343,8 @@ def _fit(self, X, y, step_score=None, **fit_params):
339
343
340
344
# Compute step score when only n_features_to_select features left
341
345
if step_score :
342
- self .scores_ .append (step_score (self .estimator_ , features ))
346
+ self .step_n_features_ .append (len (features ))
347
+ self .step_scores_ .append (step_score (self .estimator_ , features ))
343
348
self .n_features_ = support_ .sum ()
344
349
self .support_ = support_
345
350
self .ranking_ = ranking_
@@ -581,6 +586,9 @@ class RFECV(RFE):
581
586
std_test_score : ndarray of shape (n_subsets_of_features,)
582
587
Standard deviation of scores over the folds.
583
588
589
+ n_features : ndarray of shape (n_subsets_of_features,)
590
+ Number of features used at each step.
591
+
584
592
.. versionadded:: 1.0
585
593
586
594
n_features_ : int
@@ -718,12 +726,6 @@ def fit(self, X, y, groups=None):
718
726
# Initialization
719
727
cv = check_cv (self .cv , y , classifier = is_classifier (self .estimator ))
720
728
scorer = check_scoring (self .estimator , scoring = self .scoring )
721
- n_features = X .shape [1 ]
722
-
723
- if 0.0 < self .step < 1.0 :
724
- step = int (max (1 , self .step * n_features ))
725
- else :
726
- step = int (self .step )
727
729
728
730
# Build an RFE object, which will evaluate and score each possible
729
731
# feature count, down to self.min_features_to_select
@@ -753,18 +755,18 @@ def fit(self, X, y, groups=None):
753
755
parallel = Parallel (n_jobs = self .n_jobs )
754
756
func = delayed (_rfe_single_fit )
755
757
756
- scores = parallel (
758
+ scores_features = parallel (
757
759
func (rfe , self .estimator , X , y , train , test , scorer )
758
760
for train , test in cv .split (X , y , groups )
759
761
)
762
+ scores , step_n_features = zip (* scores_features )
760
763
764
+ step_n_features_rev = np .array (step_n_features [0 ])[::- 1 ]
761
765
scores = np .array (scores )
762
- scores_sum = np .sum (scores , axis = 0 )
763
- scores_sum_rev = scores_sum [::- 1 ]
764
- argmax_idx = len (scores_sum ) - np .argmax (scores_sum_rev ) - 1
765
- n_features_to_select = max (
766
- n_features - (argmax_idx * step ), self .min_features_to_select
767
- )
766
+
767
+ # Reverse order such that lowest number of features is selected in case of tie.
768
+ scores_sum_rev = np .sum (scores , axis = 0 )[::- 1 ]
769
+ n_features_to_select = step_n_features_rev [np .argmax (scores_sum_rev )]
768
770
769
771
# Re-execute an elimination with best_k over the whole set
770
772
rfe = RFE (
@@ -786,11 +788,10 @@ def fit(self, X, y, groups=None):
786
788
787
789
# reverse to stay consistent with before
788
790
scores_rev = scores [:, ::- 1 ]
789
- self .cv_results_ = {}
790
- self .cv_results_ ["mean_test_score" ] = np .mean (scores_rev , axis = 0 )
791
- self .cv_results_ ["std_test_score" ] = np .std (scores_rev , axis = 0 )
792
-
793
- for i in range (scores .shape [0 ]):
794
- self .cv_results_ [f"split{ i } _test_score" ] = scores_rev [i ]
795
-
791
+ self .cv_results_ = {
792
+ "mean_test_score" : np .mean (scores_rev , axis = 0 ),
793
+ "std_test_score" : np .std (scores_rev , axis = 0 ),
794
+ ** {f"split{ i } _test_score" : scores_rev [i ] for i in range (scores .shape [0 ])},
795
+ "n_features" : step_n_features_rev ,
796
+ }
796
797
return self
0 commit comments