From 29406f3ab1590ffa6aaac934018e46944d15b404 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Fri, 30 Jun 2017 14:44:28 +0800 Subject: [PATCH 1/2] change dataset of ClassifierChain test --- sklearn/tests/test_multioutput.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 00085a32af94f..7b0d3a94aab24 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -470,22 +470,23 @@ def test_classifier_chain_vs_independent_models(): # Verify that an ensemble of classifier chains (each of length # N) can achieve a higher Jaccard similarity score than N independent # models - yeast = fetch_mldata('yeast') - X = yeast['data'] - Y = yeast['target'].transpose().toarray() - X_train = X[:2000, :] - X_test = X[2000:, :] - Y_train = Y[:2000, :] - Y_test = Y[2000:, :] + X, y = make_classification(n_samples=1000, + n_features=100, + n_classes=16, + n_informative=10, + random_state=0) + Y_multi = np.array([[int(yyy) for yyy in format(yy, '#06b')[2:]] + for yy in y]) + X_train = X[:600, :] + X_test = X[600:, :] + Y_train = Y_multi[:600, :] + Y_test = Y_multi[600:, :] ovr = OneVsRestClassifier(LogisticRegression()) ovr.fit(X_train, Y_train) Y_pred_ovr = ovr.predict(X_test) - chain = ClassifierChain(LogisticRegression(), - order=np.array([0, 2, 4, 6, 8, 10, - 12, 1, 3, 5, 7, 9, - 11, 13])) + chain = ClassifierChain(LogisticRegression()) chain.fit(X_train, Y_train) Y_pred_chain = chain.predict(X_test) From ab983fcb9a34a6eab2a47b529b9695952064501a Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Sat, 1 Jul 2017 20:00:23 +0800 Subject: [PATCH 2/2] random state --- sklearn/tests/test_multioutput.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 7b0d3a94aab24..0c58d04c27581 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -356,7 +356,8 @@ def generate_multilabel_dataset_with_correlations(): X, y = make_classification(n_samples=1000, n_features=100, n_classes=16, - n_informative=10) + n_informative=10, + random_state=0) Y_multi = np.array([[int(yyy) for yyy in format(yy, '#06b')[2:]] for yy in y]) @@ -470,17 +471,11 @@ def test_classifier_chain_vs_independent_models(): # Verify that an ensemble of classifier chains (each of length # N) can achieve a higher Jaccard similarity score than N independent # models - X, y = make_classification(n_samples=1000, - n_features=100, - n_classes=16, - n_informative=10, - random_state=0) - Y_multi = np.array([[int(yyy) for yyy in format(yy, '#06b')[2:]] - for yy in y]) + X, Y = generate_multilabel_dataset_with_correlations() X_train = X[:600, :] X_test = X[600:, :] - Y_train = Y_multi[:600, :] - Y_test = Y_multi[600:, :] + Y_train = Y[:600, :] + Y_test = Y[600:, :] ovr = OneVsRestClassifier(LogisticRegression()) ovr.fit(X_train, Y_train)