@@ -90,21 +90,20 @@ def check_classification_toy(name):
90
90
"""Check classification on a toy dataset."""
91
91
ForestClassifier = FOREST_CLASSIFIERS [name ]
92
92
93
- for presort in True , False :
94
- clf = ForestClassifier (n_estimators = 10 , random_state = 1 , presort = presort )
95
- clf .fit (X , y )
96
- assert_array_equal (clf .predict (T ), true_result )
97
- assert_equal (10 , len (clf ))
93
+ clf = ForestClassifier (n_estimators = 10 , random_state = 1 )
94
+ clf .fit (X , y )
95
+ assert_array_equal (clf .predict (T ), true_result )
96
+ assert_equal (10 , len (clf ))
98
97
99
- clf = ForestClassifier (n_estimators = 10 , max_features = 1 ,
100
- random_state = 1 , presort = presort )
101
- clf .fit (X , y )
102
- assert_array_equal (clf .predict (T ), true_result )
103
- assert_equal (10 , len (clf ))
98
+ clf = ForestClassifier (n_estimators = 10 , max_features = 1 ,
99
+ random_state = 1 )
100
+ clf .fit (X , y )
101
+ assert_array_equal (clf .predict (T ), true_result )
102
+ assert_equal (10 , len (clf ))
104
103
105
- # also test apply
106
- leaf_indices = clf .apply (X )
107
- assert_equal (leaf_indices .shape , (len (X ), clf .n_estimators ))
104
+ # also test apply
105
+ leaf_indices = clf .apply (X )
106
+ assert_equal (leaf_indices .shape , (len (X ), clf .n_estimators ))
108
107
109
108
110
109
def test_classification_toy ():
@@ -116,20 +115,19 @@ def check_iris_criterion(name, criterion):
116
115
# Check consistency on dataset iris.
117
116
ForestClassifier = FOREST_CLASSIFIERS [name ]
118
117
119
- for presort in True , False :
120
- clf = ForestClassifier (n_estimators = 10 , criterion = criterion ,
121
- random_state = 1 , presort = presort )
122
- clf .fit (iris .data , iris .target )
123
- score = clf .score (iris .data , iris .target )
124
- assert_greater (score , 0.9 , "Failed with criterion %s and score = %f"
125
- % (criterion , score ))
118
+ clf = ForestClassifier (n_estimators = 10 , criterion = criterion ,
119
+ random_state = 1 )
120
+ clf .fit (iris .data , iris .target )
121
+ score = clf .score (iris .data , iris .target )
122
+ assert_greater (score , 0.9 , "Failed with criterion %s and score = %f"
123
+ % (criterion , score ))
126
124
127
- clf = ForestClassifier (n_estimators = 10 , criterion = criterion ,
128
- max_features = 2 , random_state = 1 , presort = presort )
129
- clf .fit (iris .data , iris .target )
130
- score = clf .score (iris .data , iris .target )
131
- assert_greater (score , 0.5 , "Failed with criterion %s and score = %f"
132
- % (criterion , score ))
125
+ clf = ForestClassifier (n_estimators = 10 , criterion = criterion ,
126
+ max_features = 2 , random_state = 1 )
127
+ clf .fit (iris .data , iris .target )
128
+ score = clf .score (iris .data , iris .target )
129
+ assert_greater (score , 0.5 , "Failed with criterion %s and score = %f"
130
+ % (criterion , score ))
133
131
134
132
135
133
def test_iris ():
@@ -141,20 +139,19 @@ def check_boston_criterion(name, criterion):
141
139
# Check consistency on dataset boston house prices.
142
140
ForestRegressor = FOREST_REGRESSORS [name ]
143
141
144
- for presort in True , False :
145
- clf = ForestRegressor (n_estimators = 5 , criterion = criterion ,
146
- random_state = 1 , presort = presort )
147
- clf .fit (boston .data , boston .target )
148
- score = clf .score (boston .data , boston .target )
149
- assert_greater (score , 0.95 , "Failed with max_features=None, criterion %s "
150
- "and score = %f" % (criterion , score ))
142
+ clf = ForestRegressor (n_estimators = 5 , criterion = criterion ,
143
+ random_state = 1 )
144
+ clf .fit (boston .data , boston .target )
145
+ score = clf .score (boston .data , boston .target )
146
+ assert_greater (score , 0.95 , "Failed with max_features=None, criterion %s "
147
+ "and score = %f" % (criterion , score ))
151
148
152
- clf = ForestRegressor (n_estimators = 5 , criterion = criterion ,
153
- max_features = 6 , random_state = 1 , presort = presort )
154
- clf .fit (boston .data , boston .target )
155
- score = clf .score (boston .data , boston .target )
156
- assert_greater (score , 0.95 , "Failed with max_features=6, criterion %s "
157
- "and score = %f" % (criterion , score ))
149
+ clf = ForestRegressor (n_estimators = 5 , criterion = criterion ,
150
+ max_features = 6 , random_state = 1 )
151
+ clf .fit (boston .data , boston .target )
152
+ score = clf .score (boston .data , boston .target )
153
+ assert_greater (score , 0.95 , "Failed with max_features=6, criterion %s "
154
+ "and score = %f" % (criterion , score ))
158
155
159
156
160
157
def test_boston ():
@@ -199,38 +196,37 @@ def test_probability():
199
196
def check_importances (X , y , name , criterion ):
200
197
ForestEstimator = FOREST_ESTIMATORS [name ]
201
198
202
- for presort in True , False :
203
- est = ForestEstimator (n_estimators = 20 , criterion = criterion ,
204
- random_state = 0 , presort = presort )
205
- est .fit (X , y )
206
- importances = est .feature_importances_
207
- n_important = np .sum (importances > 0.1 )
208
- assert_equal (importances .shape [0 ], 10 )
209
- assert_equal (n_important , 3 )
210
-
211
- X_new = est .transform (X , threshold = "mean" )
212
- assert_less (X_new .shape [1 ], X .shape [1 ])
213
-
214
- # Check with parallel
215
- importances = est .feature_importances_
216
- est .set_params (n_jobs = 2 )
217
- importances_parrallel = est .feature_importances_
218
- assert_array_almost_equal (importances , importances_parrallel )
219
-
220
- # Check with sample weights
221
- sample_weight = check_random_state (0 ).randint (1 , 10 , len (X ))
199
+ est = ForestEstimator (n_estimators = 20 , criterion = criterion ,
200
+ random_state = 0 )
201
+ est .fit (X , y )
202
+ importances = est .feature_importances_
203
+ n_important = np .sum (importances > 0.1 )
204
+ assert_equal (importances .shape [0 ], 10 )
205
+ assert_equal (n_important , 3 )
206
+
207
+ X_new = est .transform (X , threshold = "mean" )
208
+ assert_less (X_new .shape [1 ], X .shape [1 ])
209
+
210
+ # Check with parallel
211
+ importances = est .feature_importances_
212
+ est .set_params (n_jobs = 2 )
213
+ importances_parrallel = est .feature_importances_
214
+ assert_array_almost_equal (importances , importances_parrallel )
215
+
216
+ # Check with sample weights
217
+ sample_weight = check_random_state (0 ).randint (1 , 10 , len (X ))
218
+ est = ForestEstimator (n_estimators = 20 , random_state = 0 ,
219
+ criterion = criterion )
220
+ est .fit (X , y , sample_weight = sample_weight )
221
+ importances = est .feature_importances_
222
+ assert_true (np .all (importances >= 0.0 ))
223
+
224
+ for scale in [0.5 , 10 , 100 ]:
222
225
est = ForestEstimator (n_estimators = 20 , random_state = 0 ,
223
- criterion = criterion , presort = presort )
224
- est .fit (X , y , sample_weight = sample_weight )
225
- importances = est .feature_importances_
226
- assert_true (np .all (importances >= 0.0 ))
227
-
228
- for scale in [0.5 , 10 , 100 ]:
229
- est = ForestEstimator (n_estimators = 20 , random_state = 0 ,
230
- criterion = criterion , presort = presort )
231
- est .fit (X , y , sample_weight = scale * sample_weight )
232
- importances_bis = est .feature_importances_
233
- assert_less (np .abs (importances - importances_bis ).mean (), 0.001 )
226
+ criterion = criterion )
227
+ est .fit (X , y , sample_weight = scale * sample_weight )
228
+ importances_bis = est .feature_importances_
229
+ assert_less (np .abs (importances - importances_bis ).mean (), 0.001 )
234
230
235
231
236
232
def test_importances ():
@@ -325,20 +321,18 @@ def mdi_importance(X_m, X, y):
325
321
for i in range (n_features ):
326
322
true_importances [i ] = mdi_importance (i , X , y )
327
323
328
- for presort in True , False :
329
- # Estimate importances with totally randomized trees
330
- clf = ExtraTreesClassifier (n_estimators = 500 ,
331
- max_features = 1 ,
332
- criterion = "entropy" ,
333
- random_state = 0 ,
334
- presort = presort ).fit (X , y )
324
+ # Estimate importances with totally randomized trees
325
+ clf = ExtraTreesClassifier (n_estimators = 500 ,
326
+ max_features = 1 ,
327
+ criterion = "entropy" ,
328
+ random_state = 0 ).fit (X , y )
335
329
336
- importances = sum (tree .tree_ .compute_feature_importances (normalize = False )
337
- for tree in clf .estimators_ ) / clf .n_estimators
330
+ importances = sum (tree .tree_ .compute_feature_importances (normalize = False )
331
+ for tree in clf .estimators_ ) / clf .n_estimators
338
332
339
- # Check correctness
340
- assert_almost_equal (entropy (y ), sum (importances ))
341
- assert_less (np .abs (true_importances - importances ).mean (), 0.01 )
333
+ # Check correctness
334
+ assert_almost_equal (entropy (y ), sum (importances ))
335
+ assert_less (np .abs (true_importances - importances ).mean (), 0.01 )
342
336
343
337
344
338
def check_unfitted_feature_importances (name ):
@@ -488,23 +482,21 @@ def check_multioutput(name):
488
482
X_test = [[- 1 , - 1 ], [1 , 1 ], [- 1 , 1 ], [1 , - 1 ]]
489
483
y_test = [[- 1 , 0 ], [1 , 1 ], [- 1 , 2 ], [1 , 3 ]]
490
484
491
- for presort in True , False :
492
- est = FOREST_ESTIMATORS [name ](random_state = 0 , bootstrap = False ,
493
- presort = presort )
494
- y_pred = est .fit (X_train , y_train ).predict (X_test )
495
- assert_array_almost_equal (y_pred , y_test )
496
-
497
- if name in FOREST_CLASSIFIERS :
498
- with np .errstate (divide = "ignore" ):
499
- proba = est .predict_proba (X_test )
500
- assert_equal (len (proba ), 2 )
501
- assert_equal (proba [0 ].shape , (4 , 2 ))
502
- assert_equal (proba [1 ].shape , (4 , 4 ))
503
-
504
- log_proba = est .predict_log_proba (X_test )
505
- assert_equal (len (log_proba ), 2 )
506
- assert_equal (log_proba [0 ].shape , (4 , 2 ))
507
- assert_equal (log_proba [1 ].shape , (4 , 4 ))
485
+ est = FOREST_ESTIMATORS [name ](random_state = 0 , bootstrap = False )
486
+ y_pred = est .fit (X_train , y_train ).predict (X_test )
487
+ assert_array_almost_equal (y_pred , y_test )
488
+
489
+ if name in FOREST_CLASSIFIERS :
490
+ with np .errstate (divide = "ignore" ):
491
+ proba = est .predict_proba (X_test )
492
+ assert_equal (len (proba ), 2 )
493
+ assert_equal (proba [0 ].shape , (4 , 2 ))
494
+ assert_equal (proba [1 ].shape , (4 , 4 ))
495
+
496
+ log_proba = est .predict_log_proba (X_test )
497
+ assert_equal (len (log_proba ), 2 )
498
+ assert_equal (log_proba [0 ].shape , (4 , 2 ))
499
+ assert_equal (log_proba [1 ].shape , (4 , 4 ))
508
500
509
501
510
502
def test_multioutput ():
0 commit comments