FIX unit tests

jmschrei · jmschrei · commit 48e826fee472 · 2015-09-21T00:55:52.000-07:00
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
@@ -90,21 +90,20 @@ def check_classification_toy(name):
     """Check classification on a toy dataset."""
     ForestClassifier = FOREST_CLASSIFIERS[name]
 
-    for presort in True, False:
-        clf = ForestClassifier(n_estimators=10, random_state=1, presort=presort)
-        clf.fit(X, y)
-        assert_array_equal(clf.predict(T), true_result)
-        assert_equal(10, len(clf))
+    clf = ForestClassifier(n_estimators=10, random_state=1)
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T), true_result)
+    assert_equal(10, len(clf))
 
-        clf = ForestClassifier(n_estimators=10, max_features=1, 
-                               random_state=1, presort=presort)
-        clf.fit(X, y)
-        assert_array_equal(clf.predict(T), true_result)
-        assert_equal(10, len(clf))
+    clf = ForestClassifier(n_estimators=10, max_features=1, 
+                           random_state=1)
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T), true_result)
+    assert_equal(10, len(clf))
 
-        # also test apply
-        leaf_indices = clf.apply(X)
-        assert_equal(leaf_indices.shape, (len(X), clf.n_estimators))
+    # also test apply
+    leaf_indices = clf.apply(X)
+    assert_equal(leaf_indices.shape, (len(X), clf.n_estimators))
 
 
 def test_classification_toy():
@@ -116,20 +115,19 @@ def check_iris_criterion(name, criterion):
     # Check consistency on dataset iris.
     ForestClassifier = FOREST_CLASSIFIERS[name]
 
-    for presort in True, False:
-        clf = ForestClassifier(n_estimators=10, criterion=criterion,
-                               random_state=1, presort=presort)
-        clf.fit(iris.data, iris.target)
-        score = clf.score(iris.data, iris.target)
-        assert_greater(score, 0.9, "Failed with criterion %s and score = %f"
-                                   % (criterion, score))
+    clf = ForestClassifier(n_estimators=10, criterion=criterion,
+                           random_state=1)
+    clf.fit(iris.data, iris.target)
+    score = clf.score(iris.data, iris.target)
+    assert_greater(score, 0.9, "Failed with criterion %s and score = %f"
+                               % (criterion, score))
 
-        clf = ForestClassifier(n_estimators=10, criterion=criterion,
-                               max_features=2, random_state=1, presort=presort)
-        clf.fit(iris.data, iris.target)
-        score = clf.score(iris.data, iris.target)
-        assert_greater(score, 0.5, "Failed with criterion %s and score = %f"
-                                   % (criterion, score))
+    clf = ForestClassifier(n_estimators=10, criterion=criterion,
+                           max_features=2, random_state=1)
+    clf.fit(iris.data, iris.target)
+    score = clf.score(iris.data, iris.target)
+    assert_greater(score, 0.5, "Failed with criterion %s and score = %f"
+                               % (criterion, score))
 
 
 def test_iris():
@@ -141,20 +139,19 @@ def check_boston_criterion(name, criterion):
     # Check consistency on dataset boston house prices.
     ForestRegressor = FOREST_REGRESSORS[name]
 
-    for presort in True, False:
-        clf = ForestRegressor(n_estimators=5, criterion=criterion, 
-                              random_state=1, presort=presort)
-        clf.fit(boston.data, boston.target)
-        score = clf.score(boston.data, boston.target)
-        assert_greater(score, 0.95, "Failed with max_features=None, criterion %s "
-                                    "and score = %f" % (criterion, score))
+    clf = ForestRegressor(n_estimators=5, criterion=criterion, 
+                          random_state=1)
+    clf.fit(boston.data, boston.target)
+    score = clf.score(boston.data, boston.target)
+    assert_greater(score, 0.95, "Failed with max_features=None, criterion %s "
+                                "and score = %f" % (criterion, score))
 
-        clf = ForestRegressor(n_estimators=5, criterion=criterion,
-                              max_features=6, random_state=1, presort=presort)
-        clf.fit(boston.data, boston.target)
-        score = clf.score(boston.data, boston.target)
-        assert_greater(score, 0.95, "Failed with max_features=6, criterion %s "
-                                    "and score = %f" % (criterion, score))
+    clf = ForestRegressor(n_estimators=5, criterion=criterion,
+                          max_features=6, random_state=1)
+    clf.fit(boston.data, boston.target)
+    score = clf.score(boston.data, boston.target)
+    assert_greater(score, 0.95, "Failed with max_features=6, criterion %s "
+                                "and score = %f" % (criterion, score))
 
 
 def test_boston():
@@ -199,38 +196,37 @@ def test_probability():
 def check_importances(X, y, name, criterion):
     ForestEstimator = FOREST_ESTIMATORS[name]
 
-    for presort in True, False:
-        est = ForestEstimator(n_estimators=20, criterion=criterion,
-                              random_state=0, presort=presort)
-        est.fit(X, y)
-        importances = est.feature_importances_
-        n_important = np.sum(importances > 0.1)
-        assert_equal(importances.shape[0], 10)
-        assert_equal(n_important, 3)
-
-        X_new = est.transform(X, threshold="mean")
-        assert_less(X_new.shape[1], X.shape[1])
-
-        # Check with parallel
-        importances = est.feature_importances_
-        est.set_params(n_jobs=2)
-        importances_parrallel = est.feature_importances_
-        assert_array_almost_equal(importances, importances_parrallel)
-
-        # Check with sample weights
-        sample_weight = check_random_state(0).randint(1, 10, len(X))
+    est = ForestEstimator(n_estimators=20, criterion=criterion,
+                          random_state=0)
+    est.fit(X, y)
+    importances = est.feature_importances_
+    n_important = np.sum(importances > 0.1)
+    assert_equal(importances.shape[0], 10)
+    assert_equal(n_important, 3)
+
+    X_new = est.transform(X, threshold="mean")
+    assert_less(X_new.shape[1], X.shape[1])
+
+    # Check with parallel
+    importances = est.feature_importances_
+    est.set_params(n_jobs=2)
+    importances_parrallel = est.feature_importances_
+    assert_array_almost_equal(importances, importances_parrallel)
+
+    # Check with sample weights
+    sample_weight = check_random_state(0).randint(1, 10, len(X))
+    est = ForestEstimator(n_estimators=20, random_state=0,
+                          criterion=criterion)
+    est.fit(X, y, sample_weight=sample_weight)
+    importances = est.feature_importances_
+    assert_true(np.all(importances >= 0.0))
+
+    for scale in [0.5, 10, 100]:
         est = ForestEstimator(n_estimators=20, random_state=0,
-                              criterion=criterion, presort=presort)
-        est.fit(X, y, sample_weight=sample_weight)
-        importances = est.feature_importances_
-        assert_true(np.all(importances >= 0.0))
-
-        for scale in [0.5, 10, 100]:
-            est = ForestEstimator(n_estimators=20, random_state=0,
-                                  criterion=criterion, presort=presort)
-            est.fit(X, y, sample_weight=scale * sample_weight)
-            importances_bis = est.feature_importances_
-            assert_less(np.abs(importances - importances_bis).mean(), 0.001)
+                              criterion=criterion)
+        est.fit(X, y, sample_weight=scale * sample_weight)
+        importances_bis = est.feature_importances_
+        assert_less(np.abs(importances - importances_bis).mean(), 0.001)
 
 
 def test_importances():
@@ -325,20 +321,18 @@ def mdi_importance(X_m, X, y):
     for i in range(n_features):
         true_importances[i] = mdi_importance(i, X, y)
 
-    for presort in True, False:
-        # Estimate importances with totally randomized trees
-        clf = ExtraTreesClassifier(n_estimators=500,
-                                   max_features=1,
-                                   criterion="entropy",
-                                   random_state=0,
-                                   presort=presort).fit(X, y)
+    # Estimate importances with totally randomized trees
+    clf = ExtraTreesClassifier(n_estimators=500,
+                               max_features=1,
+                               criterion="entropy",
+                               random_state=0).fit(X, y)
 
-        importances = sum(tree.tree_.compute_feature_importances(normalize=False)
-                          for tree in clf.estimators_) / clf.n_estimators
+    importances = sum(tree.tree_.compute_feature_importances(normalize=False)
+                      for tree in clf.estimators_) / clf.n_estimators
 
-        # Check correctness
-        assert_almost_equal(entropy(y), sum(importances))
-        assert_less(np.abs(true_importances - importances).mean(), 0.01)
+    # Check correctness
+    assert_almost_equal(entropy(y), sum(importances))
+    assert_less(np.abs(true_importances - importances).mean(), 0.01)
 
 
 def check_unfitted_feature_importances(name):
@@ -488,23 +482,21 @@ def check_multioutput(name):
     X_test = [[-1, -1], [1, 1], [-1, 1], [1, -1]]
     y_test = [[-1, 0], [1, 1], [-1, 2], [1, 3]]
 
-    for presort in True, False:
-        est = FOREST_ESTIMATORS[name](random_state=0, bootstrap=False,
-                                      presort=presort)
-        y_pred = est.fit(X_train, y_train).predict(X_test)
-        assert_array_almost_equal(y_pred, y_test)
-
-        if name in FOREST_CLASSIFIERS:
-            with np.errstate(divide="ignore"):
-                proba = est.predict_proba(X_test)
-                assert_equal(len(proba), 2)
-                assert_equal(proba[0].shape, (4, 2))
-                assert_equal(proba[1].shape, (4, 4))
-
-                log_proba = est.predict_log_proba(X_test)
-                assert_equal(len(log_proba), 2)
-                assert_equal(log_proba[0].shape, (4, 2))
-                assert_equal(log_proba[1].shape, (4, 4))
+    est = FOREST_ESTIMATORS[name](random_state=0, bootstrap=False)
+    y_pred = est.fit(X_train, y_train).predict(X_test)
+    assert_array_almost_equal(y_pred, y_test)
+
+    if name in FOREST_CLASSIFIERS:
+        with np.errstate(divide="ignore"):
+            proba = est.predict_proba(X_test)
+            assert_equal(len(proba), 2)
+            assert_equal(proba[0].shape, (4, 2))
+            assert_equal(proba[1].shape, (4, 4))
+
+            log_proba = est.predict_log_proba(X_test)
+            assert_equal(len(log_proba), 2)
+            assert_equal(log_proba[0].shape, (4, 2))
+            assert_equal(log_proba[1].shape, (4, 4))
 
 
 def test_multioutput():