TST stronger tests for arbitrary classes. make explicit what works and what doesn't.

amueller · amueller · commit 9ae30945e8eb · 2013-02-23T15:47:42.000+01:00
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -532,33 +532,50 @@ def test_classifiers_train():
 def test_classifiers_classes():
     # test if classifiers can cope with non-consecutive classes
     classifiers = all_estimators(type_filter='classifier')
-    X, y = make_blobs(random_state=12345)
-    X, y = shuffle(X, y, random_state=7)
+    iris = load_iris()
+    X, y = iris.data, iris.target
+    X, y = shuffle(X, y, random_state=1)
     X = StandardScaler().fit_transform(X)
-    y = 2 * y + 1
-    classes = np.unique(y)
-    # TODO: make work with next line :)
-    # y = y.astype(np.str)
+    y_names = iris.target_names[y]
+    y_str_numbers = (2 * y + 1).astype(np.str)
     for name, Clf in classifiers:
         if name in dont_test:
             continue
         if Clf in [MultinomialNB, BernoulliNB]:
             # TODO also test these!
             continue
+        if name in ["LabelPropagation", "LabelSpreading"]:
+            # TODO some complication with -1 label
+            y_ = y
+        elif name in ["RandomForestClassifier", "ExtraTreesClassifier"]:
+            # TODO not so easy because of multi-output
+            y_ = y_str_numbers
+        else:
+            y_ = y_names
 
+        classes = np.unique(y_)
         # catch deprecation warnings
         with warnings.catch_warnings(record=True):
             clf = Clf()
         # fit
-        clf.fit(X, y)
+        try:
+            clf.fit(X, y_)
+        except Exception as e:
+            print(e)
+
         y_pred = clf.predict(X)
         # training set performance
-        assert_array_equal(np.unique(y), np.unique(y_pred))
-        assert_greater(accuracy_score(y, y_pred), 0.78,
-                       "accuracy of %s not greater than 0.78" % str(Clf))
-        assert_array_equal(
-            clf.classes_, classes,
-            "Unexpected classes_ attribute for %r" % clf)
+        assert_array_equal(np.unique(y_), np.unique(y_pred))
+        accuracy = accuracy_score(y_, y_pred)
+        assert_greater(accuracy, 0.78,
+                       "accuracy %f of %s not greater than 0.78"
+                       % (accuracy, name))
+        #assert_array_equal(
+            #clf.classes_, classes,
+            #"Unexpected classes_ attribute for %r" % clf)
+        if np.any(clf.classes_ != classes):
+            print("Unexpected classes_ attribute for %r: expected %s, got %s" %
+                  (clf, classes, clf.classes_))
 
 
 def test_regressors_int():