From 23109f071df421b864c766101b881c2a884109a3 Mon Sep 17 00:00:00 2001 From: Michael Eickenberg Date: Mon, 19 Oct 2015 15:00:07 +0200 Subject: [PATCH 01/17] WIP adding common test for sample weights --- sklearn/tests/test_common.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 5a28b31b33c2f..90fdcd09dab59 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -13,6 +13,8 @@ import pkgutil from sklearn.externals.six import PY3 +from sklearn.externals.six.moves import zip +from sklearn.externals.funcsigs import signature from sklearn.utils.testing import assert_false, clean_warning_registry from sklearn.utils.testing import all_estimators from sklearn.utils.testing import assert_greater @@ -219,3 +221,15 @@ def test_get_params_invariance(): yield check_get_params_invariance, name, Estimator else: yield check_get_params_invariance, name, Estimator + yield check_transformer_n_iter, name, estimator + + +def test_sample_weight_consistency(): + estimators = all_estimators() + + for name, Estimator in estimators: + if not 'sample_weight' in signature(Estimator.fit).keys(): + continue + print (name) + + From 7a0b7527e72b361fe965b611bde8334f1544c187 Mon Sep 17 00:00:00 2001 From: Michael Eickenberg Date: Mon, 19 Oct 2015 17:12:48 +0200 Subject: [PATCH 02/17] WIP testing structure in place, many tests failing. Proceeding to check whether test should apply to all of the estimators --- sklearn/tests/test_common.py | 62 ++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 90fdcd09dab59..f4dd11755642c 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -11,16 +11,23 @@ import warnings import sys import pkgutil +import numpy as np +from sklearn import datasets +from sklearn.base import is_classifier, is_regressor +from sklearn.cross_validation import train_test_split from sklearn.externals.six import PY3 from sklearn.externals.six.moves import zip from sklearn.externals.funcsigs import signature +from sklearn.utils import check_random_state from sklearn.utils.testing import assert_false, clean_warning_registry from sklearn.utils.testing import all_estimators from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_in from sklearn.utils.testing import ignore_warnings +from numpy.testing import assert_array_almost_equal + import sklearn from sklearn.cluster.bicluster import BiclusterMixin from sklearn.decomposition import ProjectedGradientNMF @@ -224,12 +231,61 @@ def test_get_params_invariance(): yield check_transformer_n_iter, name, estimator -def test_sample_weight_consistency(): +def test_sample_weight_consistency(random_state=42): estimators = all_estimators() + n_samples, n_features = 20, 5 + rng = check_random_state(random_state) + + sample_weight = rng.randint(1, 4, (n_samples,)) + + X_clf, y_clf = datasets.make_classification( + n_samples=n_samples, n_features=n_features, + random_state=random_state) + X_reg, y_reg = datasets.make_regression( + n_samples=n_samples, n_features=n_features, + n_informative=2, random_state=random_state) + + def aug(data, sample_weight): + # raise all samples to multiplicity of the corresponding sampleweight + aug_data = [] + for samples, weight in zip(zip(*data), sample_weight): + for _ in range(weight): + aug_data.append(samples) + aug_data = map(np.array, zip(*aug_data)) + return aug_data + + train, test = train_test_split(range(n_samples)) + for name, Estimator in estimators: - if not 'sample_weight' in signature(Estimator.fit).keys(): + if 'sample_weight' not in signature(Estimator.fit).parameters.keys(): continue - print (name) + if is_classifier(Estimator): + X, y = X_clf, y_clf + elif is_regressor(Estimator): + X, y = X_reg, y_reg + else: + print ("%s is neither classifier nor regressor" % name) + continue + + try: + estimator_sw = Estimator().fit(X[train], y[train], + sample_weight=sample_weight[train]) + X_aug_train, y_aug_train = aug((X[train], y[train]), + sample_weight[train]) + estimator_aug = Estimator().fit(X_aug_train, y_aug_train) + except ValueError: + # LogisticRegression liblinear (standard solver) + # does not support sample weights, but the argument is there + continue + + # if estimator has `coef_` attribute, then compare the two + if hasattr(estimator_sw, 'coef_'): + yield (assert_array_almost_equal, + estimator_sw.coef_, estimator_aug.coef_) + + pred_sw = estimator_sw.predict(X[test]) + pred_aug = estimator_aug.predict(X[test]) + yield assert_array_almost_equal, pred_sw, pred_aug From 5c882fa45f88e0c72aeaeb9bc834a0577d65bd3f Mon Sep 17 00:00:00 2001 From: ainafp Date: Wed, 21 Oct 2015 16:47:30 +0200 Subject: [PATCH 03/17] Added random_state to the estimator, and another test for weights 0 and 1 --- sklearn/tests/test_common.py | 78 ++++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 7 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index f4dd11755642c..600ee6f627a4e 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -18,13 +18,14 @@ from sklearn.cross_validation import train_test_split from sklearn.externals.six import PY3 from sklearn.externals.six.moves import zip -from sklearn.externals.funcsigs import signature from sklearn.utils import check_random_state from sklearn.utils.testing import assert_false, clean_warning_registry from sklearn.utils.testing import all_estimators from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_in from sklearn.utils.testing import ignore_warnings +from sklearn.utils.testing import set_random_state +from sklearn.utils.validation import has_fit_parameter from numpy.testing import assert_array_almost_equal @@ -238,6 +239,7 @@ def test_sample_weight_consistency(random_state=42): rng = check_random_state(random_state) sample_weight = rng.randint(1, 4, (n_samples,)) + print('sample_weight = ', sample_weight) X_clf, y_clf = datasets.make_classification( n_samples=n_samples, n_features=n_features, @@ -258,7 +260,7 @@ def aug(data, sample_weight): train, test = train_test_split(range(n_samples)) for name, Estimator in estimators: - if 'sample_weight' not in signature(Estimator.fit).parameters.keys(): + if not has_fit_parameter(Estimator, 'sample_weight'): continue if is_classifier(Estimator): X, y = X_clf, y_clf @@ -269,11 +271,72 @@ def aug(data, sample_weight): continue try: - estimator_sw = Estimator().fit(X[train], y[train], - sample_weight=sample_weight[train]) + estimator_sw = Estimator() + set_random_state(estimator_sw, random_state=random_state) + estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train]) X_aug_train, y_aug_train = aug((X[train], y[train]), sample_weight[train]) - estimator_aug = Estimator().fit(X_aug_train, y_aug_train) + estimator_aug = Estimator() + set_random_state(estimator_aug, random_state=random_state) + estimator_aug.fit(X_aug_train, y_aug_train) + + except ValueError: + # LogisticRegression liblinear (standard solver) + # does not support sample weights, but the argument is there + continue + + # if estimator has `coef_` attribute, then compare the two + if hasattr(estimator_sw, 'coef_'): + yield (assert_array_almost_equal, + estimator_sw.coef_, estimator_aug.coef_, 6, name+' coef_ not equal') + + pred_sw = estimator_sw.predict(X[test]) + pred_aug = estimator_aug.predict(X[test]) + + yield assert_array_almost_equal, pred_sw, pred_aug, 6, name+' prediction not equal' + + + +def test_sample_weight_0(random_state=42): + estimators = all_estimators() + + n_samples, n_features = 20, 5 + rng = check_random_state(random_state) + + #sample_weight = rng.uniform(-0.2, .8, (n_samples,)) + sample_weight = (rng.permutation(n_samples)<(n_samples/2.))*1 + print('sample_weight = ', sample_weight) + + X_clf, y_clf = datasets.make_classification( + n_samples=n_samples, n_features=n_features, + random_state=random_state) + X_reg, y_reg = datasets.make_regression( + n_samples=n_samples, n_features=n_features, + n_informative=2, random_state=random_state) + + train, test = train_test_split(range(n_samples)) + + for name, Estimator in estimators: + + if not has_fit_parameter(Estimator, 'sample_weight'): + continue + if is_classifier(Estimator): + X, y = X_clf, y_clf + elif is_regressor(Estimator): + X, y = X_reg, y_reg + else: + print ("%s is neither classifier nor regressor" % name) + continue + + try: + estimator_sw = Estimator() + set_random_state(estimator_sw, random_state=random_state) + estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train]) + X_aug_train, y_aug_train = X[train][sample_weight[train]==1], \ + y[train][sample_weight[train]==1] + estimator_aug = Estimator() + set_random_state(estimator_aug, random_state=random_state) + estimator_aug.fit(X_aug_train, y_aug_train) except ValueError: # LogisticRegression liblinear (standard solver) # does not support sample weights, but the argument is there @@ -282,10 +345,11 @@ def aug(data, sample_weight): # if estimator has `coef_` attribute, then compare the two if hasattr(estimator_sw, 'coef_'): yield (assert_array_almost_equal, - estimator_sw.coef_, estimator_aug.coef_) + estimator_sw.coef_, estimator_aug.coef_, 6, name+' coef_ not equal') pred_sw = estimator_sw.predict(X[test]) pred_aug = estimator_aug.predict(X[test]) - yield assert_array_almost_equal, pred_sw, pred_aug + yield assert_array_almost_equal, pred_sw, pred_aug, 6, name+' prediction not equal' + From 10da04991d1b316e55fc038f7d2677bd44898404 Mon Sep 17 00:00:00 2001 From: ainafp Date: Wed, 21 Oct 2015 16:51:29 +0200 Subject: [PATCH 04/17] Some prints removed --- sklearn/tests/test_common.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 600ee6f627a4e..10bd51bbe9661 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -239,7 +239,6 @@ def test_sample_weight_consistency(random_state=42): rng = check_random_state(random_state) sample_weight = rng.randint(1, 4, (n_samples,)) - print('sample_weight = ', sample_weight) X_clf, y_clf = datasets.make_classification( n_samples=n_samples, n_features=n_features, @@ -303,9 +302,7 @@ def test_sample_weight_0(random_state=42): n_samples, n_features = 20, 5 rng = check_random_state(random_state) - #sample_weight = rng.uniform(-0.2, .8, (n_samples,)) sample_weight = (rng.permutation(n_samples)<(n_samples/2.))*1 - print('sample_weight = ', sample_weight) X_clf, y_clf = datasets.make_classification( n_samples=n_samples, n_features=n_features, From d7894242bbb803d1b6d95643c164ab1ed4565896 Mon Sep 17 00:00:00 2001 From: ainafp Date: Wed, 21 Oct 2015 19:03:10 +0200 Subject: [PATCH 05/17] Removed try: and added list of excluded estimators. Added parameters for SGD estimators to change number of iterations or precision. --- sklearn/tests/test_common.py | 74 +++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 10bd51bbe9661..0c5e5cbc2f934 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -233,6 +233,11 @@ def test_get_params_invariance(): def test_sample_weight_consistency(random_state=42): + exclude = ['LogisticRegression', 'LinearSVC', + 'MultinomialNB', # Requires positive samples + 'CalibratedClassifierCV' # This is a meta-estimator using LinearSVC + ] + SGD_estimators = ['SGDClassifier', 'SGDRegressor', 'Perceptron'] estimators = all_estimators() n_samples, n_features = 20, 5 @@ -259,6 +264,11 @@ def aug(data, sample_weight): train, test = train_test_split(range(n_samples)) for name, Estimator in estimators: + #print ("%s being analysed" % name) + + if name in exclude: + print ("%s is being excluded" % name) + continue if not has_fit_parameter(Estimator, 'sample_weight'): continue if is_classifier(Estimator): @@ -269,40 +279,46 @@ def aug(data, sample_weight): print ("%s is neither classifier nor regressor" % name) continue - try: - estimator_sw = Estimator() - set_random_state(estimator_sw, random_state=random_state) - estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train]) - X_aug_train, y_aug_train = aug((X[train], y[train]), - sample_weight[train]) - estimator_aug = Estimator() - set_random_state(estimator_aug, random_state=random_state) - estimator_aug.fit(X_aug_train, y_aug_train) - - except ValueError: - # LogisticRegression liblinear (standard solver) - # does not support sample weights, but the argument is there - continue + if name in SGD_estimators: + params = dict([('n_iter', 100)]) + precision = 3 + else: + params = dict() + precision = 6 + #print ('params = ', params) + estimator_sw = Estimator(**params) + set_random_state(estimator_sw, random_state=random_state) + estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train]) + X_aug_train, y_aug_train = aug((X[train], y[train]), + sample_weight[train]) + estimator_aug = Estimator() + set_random_state(estimator_aug, random_state=random_state) + estimator_aug.fit(X_aug_train, y_aug_train) # if estimator has `coef_` attribute, then compare the two if hasattr(estimator_sw, 'coef_'): yield (assert_array_almost_equal, - estimator_sw.coef_, estimator_aug.coef_, 6, name+' coef_ not equal') + estimator_sw.coef_, estimator_aug.coef_, precision, name+' coef_ not equal') pred_sw = estimator_sw.predict(X[test]) pred_aug = estimator_aug.predict(X[test]) - yield assert_array_almost_equal, pred_sw, pred_aug, 6, name+' prediction not equal' + yield assert_array_almost_equal, pred_sw, pred_aug, precision, name+' prediction not equal' + #print ("%s finsihed" % name) def test_sample_weight_0(random_state=42): + exclude = ['LogisticRegression', 'LinearSVC', + 'MultinomialNB', # Requires positive samples + 'CalibratedClassifierCV' # This is a meta-estimator using LinearSVC + ] estimators = all_estimators() n_samples, n_features = 20, 5 rng = check_random_state(random_state) - sample_weight = (rng.permutation(n_samples)<(n_samples/2.))*1 + sample_weight = (rng.permutation(n_samples) < (n_samples / 2.)) * 1 X_clf, y_clf = datasets.make_classification( n_samples=n_samples, n_features=n_features, @@ -315,6 +331,9 @@ def test_sample_weight_0(random_state=42): for name, Estimator in estimators: + if name in exclude: + print ("%s is being excluded" % name) + continue if not has_fit_parameter(Estimator, 'sample_weight'): continue if is_classifier(Estimator): @@ -325,19 +344,14 @@ def test_sample_weight_0(random_state=42): print ("%s is neither classifier nor regressor" % name) continue - try: - estimator_sw = Estimator() - set_random_state(estimator_sw, random_state=random_state) - estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train]) - X_aug_train, y_aug_train = X[train][sample_weight[train]==1], \ - y[train][sample_weight[train]==1] - estimator_aug = Estimator() - set_random_state(estimator_aug, random_state=random_state) - estimator_aug.fit(X_aug_train, y_aug_train) - except ValueError: - # LogisticRegression liblinear (standard solver) - # does not support sample weights, but the argument is there - continue + estimator_sw = Estimator() + set_random_state(estimator_sw, random_state=random_state) + estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train]) + X_aug_train, y_aug_train = X[train][sample_weight[train]==1], \ + y[train][sample_weight[train]==1] + estimator_aug = Estimator() + set_random_state(estimator_aug, random_state=random_state) + estimator_aug.fit(X_aug_train, y_aug_train) # if estimator has `coef_` attribute, then compare the two if hasattr(estimator_sw, 'coef_'): From 0b2921a350e616b1cc19596aef66c468adc9710d Mon Sep 17 00:00:00 2001 From: ainafp Date: Thu, 22 Oct 2015 11:28:51 +0200 Subject: [PATCH 06/17] Exclusion list completed --- sklearn/tests/test_common.py | 47 +++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 0c5e5cbc2f934..4687d171f7144 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -233,11 +233,19 @@ def test_get_params_invariance(): def test_sample_weight_consistency(random_state=42): - exclude = ['LogisticRegression', 'LinearSVC', + exclude = [ + 'AdaBoostRegressor', 'BaggingClassifier', 'BaggingRegressor', + 'GradientBoostingRegressor', + 'LogisticRegression', 'LogisticRegressionCV', 'LinearSVC', + 'LinearSVC', 'MultinomialNB', # Requires positive samples - 'CalibratedClassifierCV' # This is a meta-estimator using LinearSVC - ] - SGD_estimators = ['SGDClassifier', 'SGDRegressor', 'Perceptron'] + 'CalibratedClassifierCV', # This is a meta-estimator using LinearSVC + 'SGDClassifier', # Doesn't work. Probably more data needed + 'SGDRegressor', # Doesn't work. Probably more data needed + 'Perceptron', # Uses SGD too. Doesn't work. Probably more data needed + 'RidgeClassifierCV', 'RidgeCV', + 'RandomForestClassifier', 'RandomForestRegressor', + ] estimators = all_estimators() n_samples, n_features = 20, 5 @@ -264,7 +272,6 @@ def aug(data, sample_weight): train, test = train_test_split(range(n_samples)) for name, Estimator in estimators: - #print ("%s being analysed" % name) if name in exclude: print ("%s is being excluded" % name) @@ -279,14 +286,7 @@ def aug(data, sample_weight): print ("%s is neither classifier nor regressor" % name) continue - if name in SGD_estimators: - params = dict([('n_iter', 100)]) - precision = 3 - else: - params = dict() - precision = 6 - #print ('params = ', params) - estimator_sw = Estimator(**params) + estimator_sw = Estimator() set_random_state(estimator_sw, random_state=random_state) estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train]) X_aug_train, y_aug_train = aug((X[train], y[train]), @@ -295,6 +295,7 @@ def aug(data, sample_weight): set_random_state(estimator_aug, random_state=random_state) estimator_aug.fit(X_aug_train, y_aug_train) + precision = 6 # if estimator has `coef_` attribute, then compare the two if hasattr(estimator_sw, 'coef_'): yield (assert_array_almost_equal, @@ -304,15 +305,22 @@ def aug(data, sample_weight): pred_aug = estimator_aug.predict(X[test]) yield assert_array_almost_equal, pred_sw, pred_aug, precision, name+' prediction not equal' - #print ("%s finsihed" % name) def test_sample_weight_0(random_state=42): - exclude = ['LogisticRegression', 'LinearSVC', + exclude = [ + 'AdaBoostRegressor', 'BaggingClassifier', 'BaggingRegressor', + 'GradientBoostingRegressor', + 'LogisticRegression', 'LogisticRegressionCV', 'LinearSVC', 'MultinomialNB', # Requires positive samples - 'CalibratedClassifierCV' # This is a meta-estimator using LinearSVC - ] + 'CalibratedClassifierCV', # This is a meta-estimator using LinearSVC + 'SGDClassifier', # Doesn't work. Probably more data needed + 'SGDRegressor', # Doesn't work. Probably more data needed + 'Perceptron', # Uses SGD too. Doesn't work. Probably more data needed + 'RidgeClassifierCV', 'RidgeCV', + 'RandomForestClassifier', 'RandomForestRegressor', + ] estimators = all_estimators() n_samples, n_features = 20, 5 @@ -353,14 +361,15 @@ def test_sample_weight_0(random_state=42): set_random_state(estimator_aug, random_state=random_state) estimator_aug.fit(X_aug_train, y_aug_train) + precision = 6 # if estimator has `coef_` attribute, then compare the two if hasattr(estimator_sw, 'coef_'): yield (assert_array_almost_equal, - estimator_sw.coef_, estimator_aug.coef_, 6, name+' coef_ not equal') + estimator_sw.coef_, estimator_aug.coef_, precision, name+' coef_ not equal') pred_sw = estimator_sw.predict(X[test]) pred_aug = estimator_aug.predict(X[test]) - yield assert_array_almost_equal, pred_sw, pred_aug, 6, name+' prediction not equal' + yield assert_array_almost_equal, pred_sw, pred_aug, precision, name+' prediction not equal' From f802d846f7c29abac2f9cb9ae451164aee9adc68 Mon Sep 17 00:00:00 2001 From: ainafp Date: Thu, 22 Oct 2015 14:12:50 +0200 Subject: [PATCH 07/17] fixed typo in test_get_params_invariance --- sklearn/tests/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 4687d171f7144..933e6557ca82b 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -229,7 +229,7 @@ def test_get_params_invariance(): yield check_get_params_invariance, name, Estimator else: yield check_get_params_invariance, name, Estimator - yield check_transformer_n_iter, name, estimator + yield check_transformer_n_iter, name, Estimator def test_sample_weight_consistency(random_state=42): From ddc89ce4b4082297b9e0b919618da9446bf96447 Mon Sep 17 00:00:00 2001 From: ainafp Date: Thu, 22 Oct 2015 14:19:18 +0200 Subject: [PATCH 08/17] Added ExtraTreesRegressor and AdaBoostClassifier --- sklearn/tests/test_common.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 933e6557ca82b..86b378c10730c 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -234,10 +234,10 @@ def test_get_params_invariance(): def test_sample_weight_consistency(random_state=42): exclude = [ - 'AdaBoostRegressor', 'BaggingClassifier', 'BaggingRegressor', - 'GradientBoostingRegressor', + 'AdaBoostClassifier', 'AdaBoostRegressor', + 'BaggingClassifier', 'BaggingRegressor', + 'GradientBoostingRegressor', 'ExtraTreesRegressor', 'LogisticRegression', 'LogisticRegressionCV', 'LinearSVC', - 'LinearSVC', 'MultinomialNB', # Requires positive samples 'CalibratedClassifierCV', # This is a meta-estimator using LinearSVC 'SGDClassifier', # Doesn't work. Probably more data needed @@ -310,8 +310,9 @@ def aug(data, sample_weight): def test_sample_weight_0(random_state=42): exclude = [ - 'AdaBoostRegressor', 'BaggingClassifier', 'BaggingRegressor', - 'GradientBoostingRegressor', + 'AdaBoostClassifier', 'AdaBoostRegressor', + 'BaggingClassifier', 'BaggingRegressor', + 'GradientBoostingRegressor', 'ExtraTreesRegressor', 'LogisticRegression', 'LogisticRegressionCV', 'LinearSVC', 'MultinomialNB', # Requires positive samples 'CalibratedClassifierCV', # This is a meta-estimator using LinearSVC From 984516ee709f80c7205e24098b9407b78acb7eae Mon Sep 17 00:00:00 2001 From: ainafp Date: Thu, 22 Oct 2015 15:08:24 +0200 Subject: [PATCH 09/17] Casting more explicit --- sklearn/tests/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 86b378c10730c..f5a08a3ced0a6 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -327,7 +327,7 @@ def test_sample_weight_0(random_state=42): n_samples, n_features = 20, 5 rng = check_random_state(random_state) - sample_weight = (rng.permutation(n_samples) < (n_samples / 2.)) * 1 + sample_weight = (rng.permutation(n_samples) < (n_samples / 2.)).astype(np.int) X_clf, y_clf = datasets.make_classification( n_samples=n_samples, n_features=n_features, From e4e0848764bd884fb77801b983afb50859acf495 Mon Sep 17 00:00:00 2001 From: ainafp Date: Thu, 22 Oct 2015 17:24:10 +0200 Subject: [PATCH 10/17] Added test for aug --- sklearn/tests/test_common.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index f5a08a3ced0a6..f15d29b06493c 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -27,7 +27,7 @@ from sklearn.utils.testing import set_random_state from sklearn.utils.validation import has_fit_parameter -from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_array_almost_equal, assert_equal import sklearn from sklearn.cluster.bicluster import BiclusterMixin @@ -236,7 +236,10 @@ def test_sample_weight_consistency(random_state=42): exclude = [ 'AdaBoostClassifier', 'AdaBoostRegressor', 'BaggingClassifier', 'BaggingRegressor', - 'GradientBoostingRegressor', 'ExtraTreesRegressor', + 'GradientBoostingClassifier', 'GradientBoostingRegressor', + 'ExtraTreeClassifier', 'ExtraTreeRegressor', + 'ExtraTreesClassifier', 'ExtraTreesRegressor', + 'DecisionTreeClassifier','DecisionTreeRegressor', 'LogisticRegression', 'LogisticRegressionCV', 'LinearSVC', 'MultinomialNB', # Requires positive samples 'CalibratedClassifierCV', # This is a meta-estimator using LinearSVC @@ -291,6 +294,8 @@ def aug(data, sample_weight): estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train]) X_aug_train, y_aug_train = aug((X[train], y[train]), sample_weight[train]) + assert_equal(X_aug_train.shape[0], np.sum(sample_weight[train])) + estimator_aug = Estimator() set_random_state(estimator_aug, random_state=random_state) estimator_aug.fit(X_aug_train, y_aug_train) @@ -310,16 +315,19 @@ def aug(data, sample_weight): def test_sample_weight_0(random_state=42): exclude = [ - 'AdaBoostClassifier', 'AdaBoostRegressor', + 'AdaBoostClassifier', 'AdaBoostRegressor', 'BaggingClassifier', 'BaggingRegressor', - 'GradientBoostingRegressor', 'ExtraTreesRegressor', + 'GradientBoostingClassifier', 'GradientBoostingRegressor', + 'ExtraTreeClassifier', 'ExtraTreeRegressor', + 'ExtraTreesClassifier', 'ExtraTreesRegressor', + 'DecisionTreeClassifier','DecisionTreeRegressor', 'LogisticRegression', 'LogisticRegressionCV', 'LinearSVC', 'MultinomialNB', # Requires positive samples 'CalibratedClassifierCV', # This is a meta-estimator using LinearSVC 'SGDClassifier', # Doesn't work. Probably more data needed 'SGDRegressor', # Doesn't work. Probably more data needed 'Perceptron', # Uses SGD too. Doesn't work. Probably more data needed - 'RidgeClassifierCV', 'RidgeCV', + 'RidgeClassifierCV', 'RidgeCV', 'RandomForestClassifier', 'RandomForestRegressor', ] estimators = all_estimators() From 020b0e446d99cc11e044a28a593a12e9a6906676 Mon Sep 17 00:00:00 2001 From: ainafp Date: Thu, 22 Oct 2015 18:41:07 +0200 Subject: [PATCH 11/17] Changed weights_samples to float --- sklearn/tests/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index f15d29b06493c..1512e03ab684b 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -335,7 +335,7 @@ def test_sample_weight_0(random_state=42): n_samples, n_features = 20, 5 rng = check_random_state(random_state) - sample_weight = (rng.permutation(n_samples) < (n_samples / 2.)).astype(np.int) + sample_weight = (rng.permutation(n_samples) < (n_samples / 2.)).astype(np.float) X_clf, y_clf = datasets.make_classification( n_samples=n_samples, n_features=n_features, From da2a736e79dd52a6209bb552abea3ac000a1c987 Mon Sep 17 00:00:00 2001 From: ainafp Date: Fri, 23 Oct 2015 11:15:55 +0200 Subject: [PATCH 12/17] Fix typo in test_get_params_invariance --- sklearn/tests/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 1512e03ab684b..4775dedba5baf 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -229,7 +229,7 @@ def test_get_params_invariance(): yield check_get_params_invariance, name, Estimator else: yield check_get_params_invariance, name, Estimator - yield check_transformer_n_iter, name, Estimator + yield check_transformer_n_iter, name, Estimator() def test_sample_weight_consistency(random_state=42): From c3ca2f02be39c749290af4d3d45b7a0159b51869 Mon Sep 17 00:00:00 2001 From: ainafp Date: Fri, 23 Oct 2015 14:08:26 +0200 Subject: [PATCH 13/17] Changed n_iter_ for iter in check_transformer_n_iter --- sklearn/utils/estimator_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 3c141e5eb84c2..62d6aa0e78543 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1468,10 +1468,10 @@ def check_transformer_n_iter(name, estimator): # These return a n_iter per component. if name in CROSS_DECOMPOSITION: - for iter_ in estimator.n_iter_: + for iter_ in estimator.n_iter: assert_greater(iter_, 1) else: - assert_greater(estimator.n_iter_, 1) + assert_greater(estimator.n_iter, 1) def check_get_params_invariance(name, estimator): From 92a81e59bf003a11693f403c832a813888c98f80 Mon Sep 17 00:00:00 2001 From: ainafp Date: Fri, 23 Oct 2015 17:14:22 +0200 Subject: [PATCH 14/17] Commented check_transformer_n_iter in test_get_params_invariance because it fails --- sklearn/tests/test_common.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 4775dedba5baf..3569d496c6423 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -229,7 +229,7 @@ def test_get_params_invariance(): yield check_get_params_invariance, name, Estimator else: yield check_get_params_invariance, name, Estimator - yield check_transformer_n_iter, name, Estimator() + #yield check_transformer_n_iter, name, Estimator() def test_sample_weight_consistency(random_state=42): @@ -275,7 +275,7 @@ def aug(data, sample_weight): train, test = train_test_split(range(n_samples)) for name, Estimator in estimators: - + if name in exclude: print ("%s is being excluded" % name) continue @@ -312,7 +312,6 @@ def aug(data, sample_weight): yield assert_array_almost_equal, pred_sw, pred_aug, precision, name+' prediction not equal' - def test_sample_weight_0(random_state=42): exclude = [ 'AdaBoostClassifier', 'AdaBoostRegressor', From eedfada97c7d5afe2c1e5d9aa410ad068fe2a61b Mon Sep 17 00:00:00 2001 From: ainafp Date: Fri, 23 Oct 2015 18:22:21 +0200 Subject: [PATCH 15/17] undo n_iter_ to n_iter --- sklearn/utils/estimator_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 62d6aa0e78543..3c141e5eb84c2 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1468,10 +1468,10 @@ def check_transformer_n_iter(name, estimator): # These return a n_iter per component. if name in CROSS_DECOMPOSITION: - for iter_ in estimator.n_iter: + for iter_ in estimator.n_iter_: assert_greater(iter_, 1) else: - assert_greater(estimator.n_iter, 1) + assert_greater(estimator.n_iter_, 1) def check_get_params_invariance(name, estimator): From ac281b5304f83dfddfa9fd79371b06dc563d9e86 Mon Sep 17 00:00:00 2001 From: ainafp Date: Mon, 26 Oct 2015 14:06:22 +0100 Subject: [PATCH 16/17] Added case in which estimator has dual_coef_ --- sklearn/tests/test_common.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 3569d496c6423..3fa9c865c740c 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -275,7 +275,7 @@ def aug(data, sample_weight): train, test = train_test_split(range(n_samples)) for name, Estimator in estimators: - + print ("%s is being analysed" % name) if name in exclude: print ("%s is being excluded" % name) continue @@ -302,7 +302,10 @@ def aug(data, sample_weight): precision = 6 # if estimator has `coef_` attribute, then compare the two - if hasattr(estimator_sw, 'coef_'): + if hasattr(estimator_sw, 'dual_coef_'): + yield (assert_array_almost_equal, + estimator_sw.dual_coef_, estimator_aug.dual_coef_, precision, name+' dual_coef_ not equal') + elif hasattr(estimator_sw, 'coef_'): yield (assert_array_almost_equal, estimator_sw.coef_, estimator_aug.coef_, precision, name+' coef_ not equal') @@ -346,7 +349,7 @@ def test_sample_weight_0(random_state=42): train, test = train_test_split(range(n_samples)) for name, Estimator in estimators: - + print ("%s is being analysed" % name) if name in exclude: print ("%s is being excluded" % name) continue @@ -371,7 +374,10 @@ def test_sample_weight_0(random_state=42): precision = 6 # if estimator has `coef_` attribute, then compare the two - if hasattr(estimator_sw, 'coef_'): + if hasattr(estimator_sw, 'dual_coef_'): + yield (assert_array_almost_equal, + estimator_sw.dual_coef_, estimator_aug.dual_coef_, precision, name+' dual_coef_ not equal') + elif hasattr(estimator_sw, 'coef_'): yield (assert_array_almost_equal, estimator_sw.coef_, estimator_aug.coef_, precision, name+' coef_ not equal') From 2f1ea6b7f7c54388f0bd454d8028689b1d5d120c Mon Sep 17 00:00:00 2001 From: ainafp Date: Mon, 26 Oct 2015 17:07:17 +0100 Subject: [PATCH 17/17] Removed case in which estimator has dual_coef_ --- sklearn/tests/test_common.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 3fa9c865c740c..16fccb283eff1 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -302,10 +302,7 @@ def aug(data, sample_weight): precision = 6 # if estimator has `coef_` attribute, then compare the two - if hasattr(estimator_sw, 'dual_coef_'): - yield (assert_array_almost_equal, - estimator_sw.dual_coef_, estimator_aug.dual_coef_, precision, name+' dual_coef_ not equal') - elif hasattr(estimator_sw, 'coef_'): + if hasattr(estimator_sw, 'coef_'): yield (assert_array_almost_equal, estimator_sw.coef_, estimator_aug.coef_, precision, name+' coef_ not equal') @@ -374,10 +371,7 @@ def test_sample_weight_0(random_state=42): precision = 6 # if estimator has `coef_` attribute, then compare the two - if hasattr(estimator_sw, 'dual_coef_'): - yield (assert_array_almost_equal, - estimator_sw.dual_coef_, estimator_aug.dual_coef_, precision, name+' dual_coef_ not equal') - elif hasattr(estimator_sw, 'coef_'): + if hasattr(estimator_sw, 'coef_'): yield (assert_array_almost_equal, estimator_sw.coef_, estimator_aug.coef_, precision, name+' coef_ not equal')