@@ -206,21 +206,19 @@ def check_importances(X, y, name, criterion):
206
206
assert_less (0 < X_new .shape [1 ], X .shape [1 ])
207
207
208
208
# Check with sample weights
209
- sample_weight = np .ones (y .shape )
210
- sample_weight [y == 1 ] *= 100
211
-
209
+ sample_weight = check_random_state (0 ).randint (1 , 10 , len (X ))
212
210
est = ForestEstimator (n_estimators = 20 , random_state = 0 ,
213
211
criterion = criterion )
214
212
est .fit (X , y , sample_weight = sample_weight )
215
213
importances = est .feature_importances_
216
214
assert_true (np .all (importances >= 0.0 ))
217
215
218
- for scale in [3 , 10 , 1000 , 100000 ]:
216
+ for scale in [10 , 100 , 1000 ]:
219
217
est = ForestEstimator (n_estimators = 20 , random_state = 0 ,
220
218
criterion = criterion )
221
219
est .fit (X , y , sample_weight = scale * sample_weight )
222
220
importances_bis = est .feature_importances_
223
- assert_almost_equal ( importances , importances_bis )
221
+ assert_less ( np . abs ( importances - importances_bis ). mean (), 0.0001 )
224
222
225
223
226
224
def test_importances ():
@@ -232,7 +230,7 @@ def test_importances():
232
230
for name , criterion in product (FOREST_CLASSIFIERS , ["gini" , "entropy" ]):
233
231
yield check_importances , X , y , name , criterion
234
232
235
- for name , criterion in product (FOREST_REGRESSORS , ["mse" ]):
233
+ for name , criterion in product (FOREST_REGRESSORS , ["mse" , "friedman_mse" ]):
236
234
yield check_importances , X , y , name , criterion
237
235
238
236
@@ -242,10 +240,7 @@ def test_importances_asymptotic():
242
240
# Understanding variable importances in forests of randomized trees, 2013).
243
241
244
242
def binomial (k , n ):
245
- if k < 0 or k > n :
246
- return 0
247
- else :
248
- return comb (int (n ), int (k ), exact = True )
243
+ return 0 if k < 0 or k > n else comb (int (n ), int (k ), exact = True )
249
244
250
245
def entropy (samples ):
251
246
e = 0.
@@ -263,11 +258,9 @@ def mdi_importance(X_m, X, y):
263
258
264
259
variables = list (range (p ))
265
260
variables .pop (X_m )
266
- imp = 0.
261
+ values = [ np . unique ( X [:, i ]) for i in range ( p )]
267
262
268
- values = []
269
- for i in range (p ):
270
- values .append (np .unique (X [:, i ]))
263
+ imp = 0.
271
264
272
265
for k in range (p ):
273
266
# Weight of each B of size k
@@ -331,7 +324,7 @@ def mdi_importance(X_m, X, y):
331
324
332
325
# Check correctness
333
326
assert_almost_equal (entropy (y ), sum (importances ))
334
- assert_less ((( true_importances - importances ) ** 2 ). sum (), 0.0005 )
327
+ assert_less (np . abs ( true_importances - importances ). mean (), 0.01 )
335
328
336
329
337
330
def check_unfitted_feature_importances (name ):
0 commit comments