From 23109f071df421b864c766101b881c2a884109a3 Mon Sep 17 00:00:00 2001
From: Michael Eickenberg <michael.eickenberg@gmail.com>
Date: Mon, 19 Oct 2015 15:00:07 +0200
Subject: [PATCH 01/17] WIP adding common test for sample weights

---
 sklearn/tests/test_common.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 5a28b31b33c2f..90fdcd09dab59 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -13,6 +13,8 @@
 import pkgutil
 
 from sklearn.externals.six import PY3
+from sklearn.externals.six.moves import zip
+from sklearn.externals.funcsigs import signature
 from sklearn.utils.testing import assert_false, clean_warning_registry
 from sklearn.utils.testing import all_estimators
 from sklearn.utils.testing import assert_greater
@@ -219,3 +221,15 @@ def test_get_params_invariance():
                     yield check_get_params_invariance, name, Estimator
             else:
                 yield check_get_params_invariance, name, Estimator
+            yield check_transformer_n_iter, name, estimator
+
+
+def test_sample_weight_consistency():
+    estimators = all_estimators()
+
+    for name, Estimator in estimators:
+        if not 'sample_weight' in signature(Estimator.fit).keys():
+            continue
+        print (name)
+
+

From 7a0b7527e72b361fe965b611bde8334f1544c187 Mon Sep 17 00:00:00 2001
From: Michael Eickenberg <michael.eickenberg@gmail.com>
Date: Mon, 19 Oct 2015 17:12:48 +0200
Subject: [PATCH 02/17] WIP testing structure in place, many tests failing.
 Proceeding to check whether test should apply to all of the estimators

---
 sklearn/tests/test_common.py | 62 ++++++++++++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 3 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 90fdcd09dab59..f4dd11755642c 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -11,16 +11,23 @@
 import warnings
 import sys
 import pkgutil
+import numpy as np
 
+from sklearn import datasets
+from sklearn.base import is_classifier, is_regressor
+from sklearn.cross_validation import train_test_split
 from sklearn.externals.six import PY3
 from sklearn.externals.six.moves import zip
 from sklearn.externals.funcsigs import signature
+from sklearn.utils import check_random_state
 from sklearn.utils.testing import assert_false, clean_warning_registry
 from sklearn.utils.testing import all_estimators
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_in
 from sklearn.utils.testing import ignore_warnings
 
+from numpy.testing import assert_array_almost_equal
+
 import sklearn
 from sklearn.cluster.bicluster import BiclusterMixin
 from sklearn.decomposition import ProjectedGradientNMF
@@ -224,12 +231,61 @@ def test_get_params_invariance():
             yield check_transformer_n_iter, name, estimator
 
 
-def test_sample_weight_consistency():
+def test_sample_weight_consistency(random_state=42):
     estimators = all_estimators()
 
+    n_samples, n_features = 20, 5
+    rng = check_random_state(random_state)
+
+    sample_weight = rng.randint(1, 4, (n_samples,))
+
+    X_clf, y_clf = datasets.make_classification(
+        n_samples=n_samples, n_features=n_features,
+        random_state=random_state)
+    X_reg, y_reg = datasets.make_regression(
+        n_samples=n_samples, n_features=n_features,
+        n_informative=2, random_state=random_state)
+
+    def aug(data, sample_weight):
+        # raise all samples to multiplicity of the corresponding sampleweight
+        aug_data = []
+        for samples, weight in zip(zip(*data), sample_weight):
+            for _ in range(weight):
+                aug_data.append(samples)
+        aug_data = map(np.array, zip(*aug_data))
+        return aug_data
+
+    train, test = train_test_split(range(n_samples))
+
     for name, Estimator in estimators:
-        if not 'sample_weight' in signature(Estimator.fit).keys():
+        if 'sample_weight' not in signature(Estimator.fit).parameters.keys():
             continue
-        print (name)
+        if is_classifier(Estimator):
+            X, y = X_clf, y_clf
+        elif is_regressor(Estimator):
+            X, y = X_reg, y_reg
+        else:
+            print ("%s is neither classifier nor regressor" % name)
+            continue
+
+        try:
+            estimator_sw = Estimator().fit(X[train], y[train],
+                                       sample_weight=sample_weight[train])
+            X_aug_train, y_aug_train = aug((X[train], y[train]),
+                                       sample_weight[train])
+            estimator_aug = Estimator().fit(X_aug_train, y_aug_train)
+        except ValueError:
+            # LogisticRegression liblinear (standard solver)
+            # does not support sample weights, but the argument is there
+            continue
+
+        # if estimator has `coef_` attribute, then compare the two
+        if hasattr(estimator_sw, 'coef_'):
+            yield (assert_array_almost_equal,
+                   estimator_sw.coef_, estimator_aug.coef_)
+
+        pred_sw = estimator_sw.predict(X[test])
+        pred_aug = estimator_aug.predict(X[test])
 
+        yield assert_array_almost_equal, pred_sw, pred_aug
 

From 5c882fa45f88e0c72aeaeb9bc834a0577d65bd3f Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Wed, 21 Oct 2015 16:47:30 +0200
Subject: [PATCH 03/17] Added random_state to the estimator, and another test
 for weights 0 and 1

---
 sklearn/tests/test_common.py | 78 ++++++++++++++++++++++++++++++++----
 1 file changed, 71 insertions(+), 7 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index f4dd11755642c..600ee6f627a4e 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -18,13 +18,14 @@
 from sklearn.cross_validation import train_test_split
 from sklearn.externals.six import PY3
 from sklearn.externals.six.moves import zip
-from sklearn.externals.funcsigs import signature
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import assert_false, clean_warning_registry
 from sklearn.utils.testing import all_estimators
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_in
 from sklearn.utils.testing import ignore_warnings
+from sklearn.utils.testing import set_random_state
+from sklearn.utils.validation import has_fit_parameter
 
 from numpy.testing import assert_array_almost_equal
 
@@ -238,6 +239,7 @@ def test_sample_weight_consistency(random_state=42):
     rng = check_random_state(random_state)
 
     sample_weight = rng.randint(1, 4, (n_samples,))
+    print('sample_weight = ', sample_weight)
 
     X_clf, y_clf = datasets.make_classification(
         n_samples=n_samples, n_features=n_features,
@@ -258,7 +260,7 @@ def aug(data, sample_weight):
     train, test = train_test_split(range(n_samples))
 
     for name, Estimator in estimators:
-        if 'sample_weight' not in signature(Estimator.fit).parameters.keys():
+        if not has_fit_parameter(Estimator, 'sample_weight'):
             continue
         if is_classifier(Estimator):
             X, y = X_clf, y_clf
@@ -269,11 +271,72 @@ def aug(data, sample_weight):
             continue
 
         try:
-            estimator_sw = Estimator().fit(X[train], y[train],
-                                       sample_weight=sample_weight[train])
+            estimator_sw = Estimator()
+            set_random_state(estimator_sw, random_state=random_state)
+            estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train])
             X_aug_train, y_aug_train = aug((X[train], y[train]),
                                        sample_weight[train])
-            estimator_aug = Estimator().fit(X_aug_train, y_aug_train)
+            estimator_aug = Estimator()
+            set_random_state(estimator_aug, random_state=random_state)
+            estimator_aug.fit(X_aug_train, y_aug_train)
+
+        except ValueError:
+            # LogisticRegression liblinear (standard solver)
+            # does not support sample weights, but the argument is there
+            continue
+
+        # if estimator has `coef_` attribute, then compare the two
+        if hasattr(estimator_sw, 'coef_'):
+            yield (assert_array_almost_equal,
+                   estimator_sw.coef_, estimator_aug.coef_, 6, name+' coef_ not equal')
+
+        pred_sw = estimator_sw.predict(X[test])
+        pred_aug = estimator_aug.predict(X[test])
+
+        yield assert_array_almost_equal, pred_sw, pred_aug, 6, name+' prediction not equal'
+
+
+
+def test_sample_weight_0(random_state=42):
+    estimators = all_estimators()
+
+    n_samples, n_features = 20, 5
+    rng = check_random_state(random_state)
+
+    #sample_weight = rng.uniform(-0.2, .8, (n_samples,))
+    sample_weight = (rng.permutation(n_samples)<(n_samples/2.))*1
+    print('sample_weight = ', sample_weight)
+
+    X_clf, y_clf = datasets.make_classification(
+        n_samples=n_samples, n_features=n_features,
+        random_state=random_state)
+    X_reg, y_reg = datasets.make_regression(
+        n_samples=n_samples, n_features=n_features,
+        n_informative=2, random_state=random_state)
+
+    train, test = train_test_split(range(n_samples))
+
+    for name, Estimator in estimators:
+
+        if not has_fit_parameter(Estimator, 'sample_weight'):
+            continue
+        if is_classifier(Estimator):
+            X, y = X_clf, y_clf
+        elif is_regressor(Estimator):
+            X, y = X_reg, y_reg
+        else:
+            print ("%s is neither classifier nor regressor" % name)
+            continue
+
+        try:
+            estimator_sw = Estimator()
+            set_random_state(estimator_sw, random_state=random_state)
+            estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train])
+            X_aug_train, y_aug_train = X[train][sample_weight[train]==1], \
+                                       y[train][sample_weight[train]==1]
+            estimator_aug = Estimator()
+            set_random_state(estimator_aug, random_state=random_state)
+            estimator_aug.fit(X_aug_train, y_aug_train)
         except ValueError:
             # LogisticRegression liblinear (standard solver)
             # does not support sample weights, but the argument is there
@@ -282,10 +345,11 @@ def aug(data, sample_weight):
         # if estimator has `coef_` attribute, then compare the two
         if hasattr(estimator_sw, 'coef_'):
             yield (assert_array_almost_equal,
-                   estimator_sw.coef_, estimator_aug.coef_)
+                   estimator_sw.coef_, estimator_aug.coef_, 6, name+' coef_ not equal')
 
         pred_sw = estimator_sw.predict(X[test])
         pred_aug = estimator_aug.predict(X[test])
 
-        yield assert_array_almost_equal, pred_sw, pred_aug
+        yield assert_array_almost_equal, pred_sw, pred_aug, 6, name+' prediction not equal'
+
 

From 10da04991d1b316e55fc038f7d2677bd44898404 Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Wed, 21 Oct 2015 16:51:29 +0200
Subject: [PATCH 04/17] Some prints removed

---
 sklearn/tests/test_common.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 600ee6f627a4e..10bd51bbe9661 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -239,7 +239,6 @@ def test_sample_weight_consistency(random_state=42):
     rng = check_random_state(random_state)
 
     sample_weight = rng.randint(1, 4, (n_samples,))
-    print('sample_weight = ', sample_weight)
 
     X_clf, y_clf = datasets.make_classification(
         n_samples=n_samples, n_features=n_features,
@@ -303,9 +302,7 @@ def test_sample_weight_0(random_state=42):
     n_samples, n_features = 20, 5
     rng = check_random_state(random_state)
 
-    #sample_weight = rng.uniform(-0.2, .8, (n_samples,))
     sample_weight = (rng.permutation(n_samples)<(n_samples/2.))*1
-    print('sample_weight = ', sample_weight)
 
     X_clf, y_clf = datasets.make_classification(
         n_samples=n_samples, n_features=n_features,

From d7894242bbb803d1b6d95643c164ab1ed4565896 Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Wed, 21 Oct 2015 19:03:10 +0200
Subject: [PATCH 05/17] Removed try: and added list of excluded estimators.
 Added parameters for SGD estimators to change number of iterations or
 precision.

---
 sklearn/tests/test_common.py | 74 +++++++++++++++++++++---------------
 1 file changed, 44 insertions(+), 30 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 10bd51bbe9661..0c5e5cbc2f934 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -233,6 +233,11 @@ def test_get_params_invariance():
 
 
 def test_sample_weight_consistency(random_state=42):
+    exclude = ['LogisticRegression', 'LinearSVC',
+               'MultinomialNB',  # Requires positive samples
+               'CalibratedClassifierCV'  # This is a meta-estimator using LinearSVC
+              ]
+    SGD_estimators = ['SGDClassifier', 'SGDRegressor', 'Perceptron']
     estimators = all_estimators()
 
     n_samples, n_features = 20, 5
@@ -259,6 +264,11 @@ def aug(data, sample_weight):
     train, test = train_test_split(range(n_samples))
 
     for name, Estimator in estimators:
+        #print ("%s being analysed" % name)
+
+        if name in exclude:
+            print ("%s is being excluded" % name)
+            continue
         if not has_fit_parameter(Estimator, 'sample_weight'):
             continue
         if is_classifier(Estimator):
@@ -269,40 +279,46 @@ def aug(data, sample_weight):
             print ("%s is neither classifier nor regressor" % name)
             continue
 
-        try:
-            estimator_sw = Estimator()
-            set_random_state(estimator_sw, random_state=random_state)
-            estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train])
-            X_aug_train, y_aug_train = aug((X[train], y[train]),
-                                       sample_weight[train])
-            estimator_aug = Estimator()
-            set_random_state(estimator_aug, random_state=random_state)
-            estimator_aug.fit(X_aug_train, y_aug_train)
-
-        except ValueError:
-            # LogisticRegression liblinear (standard solver)
-            # does not support sample weights, but the argument is there
-            continue
+        if name in SGD_estimators:
+            params = dict([('n_iter', 100)])
+            precision = 3
+        else:
+            params = dict()
+            precision = 6
+        #print ('params = ', params)
+        estimator_sw = Estimator(**params)
+        set_random_state(estimator_sw, random_state=random_state)
+        estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train])
+        X_aug_train, y_aug_train = aug((X[train], y[train]),
+                                   sample_weight[train])
+        estimator_aug = Estimator()
+        set_random_state(estimator_aug, random_state=random_state)
+        estimator_aug.fit(X_aug_train, y_aug_train)
 
         # if estimator has `coef_` attribute, then compare the two
         if hasattr(estimator_sw, 'coef_'):
             yield (assert_array_almost_equal,
-                   estimator_sw.coef_, estimator_aug.coef_, 6, name+' coef_ not equal')
+                   estimator_sw.coef_, estimator_aug.coef_, precision, name+' coef_ not equal')
 
         pred_sw = estimator_sw.predict(X[test])
         pred_aug = estimator_aug.predict(X[test])
 
-        yield assert_array_almost_equal, pred_sw, pred_aug, 6, name+' prediction not equal'
+        yield assert_array_almost_equal, pred_sw, pred_aug, precision, name+' prediction not equal'
+        #print ("%s finsihed" % name)
 
 
 
 def test_sample_weight_0(random_state=42):
+    exclude = ['LogisticRegression', 'LinearSVC',
+               'MultinomialNB',  # Requires positive samples
+               'CalibratedClassifierCV'  # This is a meta-estimator using LinearSVC
+              ]
     estimators = all_estimators()
 
     n_samples, n_features = 20, 5
     rng = check_random_state(random_state)
 
-    sample_weight = (rng.permutation(n_samples)<(n_samples/2.))*1
+    sample_weight = (rng.permutation(n_samples) < (n_samples / 2.)) * 1
 
     X_clf, y_clf = datasets.make_classification(
         n_samples=n_samples, n_features=n_features,
@@ -315,6 +331,9 @@ def test_sample_weight_0(random_state=42):
 
     for name, Estimator in estimators:
 
+        if name in exclude:
+            print ("%s is being excluded" % name)
+            continue
         if not has_fit_parameter(Estimator, 'sample_weight'):
             continue
         if is_classifier(Estimator):
@@ -325,19 +344,14 @@ def test_sample_weight_0(random_state=42):
             print ("%s is neither classifier nor regressor" % name)
             continue
 
-        try:
-            estimator_sw = Estimator()
-            set_random_state(estimator_sw, random_state=random_state)
-            estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train])
-            X_aug_train, y_aug_train = X[train][sample_weight[train]==1], \
-                                       y[train][sample_weight[train]==1]
-            estimator_aug = Estimator()
-            set_random_state(estimator_aug, random_state=random_state)
-            estimator_aug.fit(X_aug_train, y_aug_train)
-        except ValueError:
-            # LogisticRegression liblinear (standard solver)
-            # does not support sample weights, but the argument is there
-            continue
+        estimator_sw = Estimator()
+        set_random_state(estimator_sw, random_state=random_state)
+        estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train])
+        X_aug_train, y_aug_train = X[train][sample_weight[train]==1], \
+                                   y[train][sample_weight[train]==1]
+        estimator_aug = Estimator()
+        set_random_state(estimator_aug, random_state=random_state)
+        estimator_aug.fit(X_aug_train, y_aug_train)
 
         # if estimator has `coef_` attribute, then compare the two
         if hasattr(estimator_sw, 'coef_'):

From 0b2921a350e616b1cc19596aef66c468adc9710d Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Thu, 22 Oct 2015 11:28:51 +0200
Subject: [PATCH 06/17] Exclusion list completed

---
 sklearn/tests/test_common.py | 47 +++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 0c5e5cbc2f934..4687d171f7144 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -233,11 +233,19 @@ def test_get_params_invariance():
 
 
 def test_sample_weight_consistency(random_state=42):
-    exclude = ['LogisticRegression', 'LinearSVC',
+    exclude = [
+               'AdaBoostRegressor', 'BaggingClassifier', 'BaggingRegressor',
+               'GradientBoostingRegressor',
+               'LogisticRegression', 'LogisticRegressionCV', 'LinearSVC',
+               'LinearSVC',
                'MultinomialNB',  # Requires positive samples
-               'CalibratedClassifierCV'  # This is a meta-estimator using LinearSVC
-              ]
-    SGD_estimators = ['SGDClassifier', 'SGDRegressor', 'Perceptron']
+               'CalibratedClassifierCV',  # This is a meta-estimator using LinearSVC
+               'SGDClassifier',  # Doesn't work. Probably more data needed
+               'SGDRegressor',  # Doesn't work. Probably more data needed
+               'Perceptron',  # Uses SGD too. Doesn't work. Probably more data needed
+               'RidgeClassifierCV', 'RidgeCV', 
+               'RandomForestClassifier', 'RandomForestRegressor',
+               ]
     estimators = all_estimators()
 
     n_samples, n_features = 20, 5
@@ -264,7 +272,6 @@ def aug(data, sample_weight):
     train, test = train_test_split(range(n_samples))
 
     for name, Estimator in estimators:
-        #print ("%s being analysed" % name)
 
         if name in exclude:
             print ("%s is being excluded" % name)
@@ -279,14 +286,7 @@ def aug(data, sample_weight):
             print ("%s is neither classifier nor regressor" % name)
             continue
 
-        if name in SGD_estimators:
-            params = dict([('n_iter', 100)])
-            precision = 3
-        else:
-            params = dict()
-            precision = 6
-        #print ('params = ', params)
-        estimator_sw = Estimator(**params)
+        estimator_sw = Estimator()
         set_random_state(estimator_sw, random_state=random_state)
         estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train])
         X_aug_train, y_aug_train = aug((X[train], y[train]),
@@ -295,6 +295,7 @@ def aug(data, sample_weight):
         set_random_state(estimator_aug, random_state=random_state)
         estimator_aug.fit(X_aug_train, y_aug_train)
 
+        precision = 6
         # if estimator has `coef_` attribute, then compare the two
         if hasattr(estimator_sw, 'coef_'):
             yield (assert_array_almost_equal,
@@ -304,15 +305,22 @@ def aug(data, sample_weight):
         pred_aug = estimator_aug.predict(X[test])
 
         yield assert_array_almost_equal, pred_sw, pred_aug, precision, name+' prediction not equal'
-        #print ("%s finsihed" % name)
 
 
 
 def test_sample_weight_0(random_state=42):
-    exclude = ['LogisticRegression', 'LinearSVC',
+    exclude = [
+               'AdaBoostRegressor', 'BaggingClassifier', 'BaggingRegressor',
+               'GradientBoostingRegressor',
+               'LogisticRegression', 'LogisticRegressionCV', 'LinearSVC',
                'MultinomialNB',  # Requires positive samples
-               'CalibratedClassifierCV'  # This is a meta-estimator using LinearSVC
-              ]
+               'CalibratedClassifierCV',  # This is a meta-estimator using LinearSVC
+               'SGDClassifier',  # Doesn't work. Probably more data needed
+               'SGDRegressor',  # Doesn't work. Probably more data needed
+               'Perceptron',  # Uses SGD too. Doesn't work. Probably more data needed
+               'RidgeClassifierCV', 'RidgeCV', 
+               'RandomForestClassifier', 'RandomForestRegressor',
+               ]
     estimators = all_estimators()
 
     n_samples, n_features = 20, 5
@@ -353,14 +361,15 @@ def test_sample_weight_0(random_state=42):
         set_random_state(estimator_aug, random_state=random_state)
         estimator_aug.fit(X_aug_train, y_aug_train)
 
+        precision = 6
         # if estimator has `coef_` attribute, then compare the two
         if hasattr(estimator_sw, 'coef_'):
             yield (assert_array_almost_equal,
-                   estimator_sw.coef_, estimator_aug.coef_, 6, name+' coef_ not equal')
+                   estimator_sw.coef_, estimator_aug.coef_, precision, name+' coef_ not equal')
 
         pred_sw = estimator_sw.predict(X[test])
         pred_aug = estimator_aug.predict(X[test])
 
-        yield assert_array_almost_equal, pred_sw, pred_aug, 6, name+' prediction not equal'
+        yield assert_array_almost_equal, pred_sw, pred_aug, precision, name+' prediction not equal'
 
 

From f802d846f7c29abac2f9cb9ae451164aee9adc68 Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Thu, 22 Oct 2015 14:12:50 +0200
Subject: [PATCH 07/17] fixed typo in test_get_params_invariance

---
 sklearn/tests/test_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 4687d171f7144..933e6557ca82b 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -229,7 +229,7 @@ def test_get_params_invariance():
                     yield check_get_params_invariance, name, Estimator
             else:
                 yield check_get_params_invariance, name, Estimator
-            yield check_transformer_n_iter, name, estimator
+            yield check_transformer_n_iter, name, Estimator
 
 
 def test_sample_weight_consistency(random_state=42):

From ddc89ce4b4082297b9e0b919618da9446bf96447 Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Thu, 22 Oct 2015 14:19:18 +0200
Subject: [PATCH 08/17] Added ExtraTreesRegressor and AdaBoostClassifier

---
 sklearn/tests/test_common.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 933e6557ca82b..86b378c10730c 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -234,10 +234,10 @@ def test_get_params_invariance():
 
 def test_sample_weight_consistency(random_state=42):
     exclude = [
-               'AdaBoostRegressor', 'BaggingClassifier', 'BaggingRegressor',
-               'GradientBoostingRegressor',
+               'AdaBoostClassifier', 'AdaBoostRegressor', 
+               'BaggingClassifier', 'BaggingRegressor',
+               'GradientBoostingRegressor', 'ExtraTreesRegressor',
                'LogisticRegression', 'LogisticRegressionCV', 'LinearSVC',
-               'LinearSVC',
                'MultinomialNB',  # Requires positive samples
                'CalibratedClassifierCV',  # This is a meta-estimator using LinearSVC
                'SGDClassifier',  # Doesn't work. Probably more data needed
@@ -310,8 +310,9 @@ def aug(data, sample_weight):
 
 def test_sample_weight_0(random_state=42):
     exclude = [
-               'AdaBoostRegressor', 'BaggingClassifier', 'BaggingRegressor',
-               'GradientBoostingRegressor',
+               'AdaBoostClassifier', 'AdaBoostRegressor', 
+               'BaggingClassifier', 'BaggingRegressor',
+               'GradientBoostingRegressor', 'ExtraTreesRegressor',
                'LogisticRegression', 'LogisticRegressionCV', 'LinearSVC',
                'MultinomialNB',  # Requires positive samples
                'CalibratedClassifierCV',  # This is a meta-estimator using LinearSVC

From 984516ee709f80c7205e24098b9407b78acb7eae Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Thu, 22 Oct 2015 15:08:24 +0200
Subject: [PATCH 09/17] Casting more explicit

---
 sklearn/tests/test_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 86b378c10730c..f5a08a3ced0a6 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -327,7 +327,7 @@ def test_sample_weight_0(random_state=42):
     n_samples, n_features = 20, 5
     rng = check_random_state(random_state)
 
-    sample_weight = (rng.permutation(n_samples) < (n_samples / 2.)) * 1
+    sample_weight = (rng.permutation(n_samples) < (n_samples / 2.)).astype(np.int)
 
     X_clf, y_clf = datasets.make_classification(
         n_samples=n_samples, n_features=n_features,

From e4e0848764bd884fb77801b983afb50859acf495 Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Thu, 22 Oct 2015 17:24:10 +0200
Subject: [PATCH 10/17] Added test for aug

---
 sklearn/tests/test_common.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index f5a08a3ced0a6..f15d29b06493c 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -27,7 +27,7 @@
 from sklearn.utils.testing import set_random_state
 from sklearn.utils.validation import has_fit_parameter
 
-from numpy.testing import assert_array_almost_equal
+from numpy.testing import assert_array_almost_equal, assert_equal
 
 import sklearn
 from sklearn.cluster.bicluster import BiclusterMixin
@@ -236,7 +236,10 @@ def test_sample_weight_consistency(random_state=42):
     exclude = [
                'AdaBoostClassifier', 'AdaBoostRegressor', 
                'BaggingClassifier', 'BaggingRegressor',
-               'GradientBoostingRegressor', 'ExtraTreesRegressor',
+               'GradientBoostingClassifier', 'GradientBoostingRegressor', 
+               'ExtraTreeClassifier', 'ExtraTreeRegressor',
+               'ExtraTreesClassifier', 'ExtraTreesRegressor',
+               'DecisionTreeClassifier','DecisionTreeRegressor',
                'LogisticRegression', 'LogisticRegressionCV', 'LinearSVC',
                'MultinomialNB',  # Requires positive samples
                'CalibratedClassifierCV',  # This is a meta-estimator using LinearSVC
@@ -291,6 +294,8 @@ def aug(data, sample_weight):
         estimator_sw.fit(X[train], y[train], sample_weight=sample_weight[train])
         X_aug_train, y_aug_train = aug((X[train], y[train]),
                                    sample_weight[train])
+        assert_equal(X_aug_train.shape[0], np.sum(sample_weight[train]))
+
         estimator_aug = Estimator()
         set_random_state(estimator_aug, random_state=random_state)
         estimator_aug.fit(X_aug_train, y_aug_train)
@@ -310,16 +315,19 @@ def aug(data, sample_weight):
 
 def test_sample_weight_0(random_state=42):
     exclude = [
-               'AdaBoostClassifier', 'AdaBoostRegressor', 
+               'AdaBoostClassifier', 'AdaBoostRegressor',
                'BaggingClassifier', 'BaggingRegressor',
-               'GradientBoostingRegressor', 'ExtraTreesRegressor',
+               'GradientBoostingClassifier', 'GradientBoostingRegressor',
+               'ExtraTreeClassifier', 'ExtraTreeRegressor',
+               'ExtraTreesClassifier', 'ExtraTreesRegressor',
+               'DecisionTreeClassifier','DecisionTreeRegressor',
                'LogisticRegression', 'LogisticRegressionCV', 'LinearSVC',
                'MultinomialNB',  # Requires positive samples
                'CalibratedClassifierCV',  # This is a meta-estimator using LinearSVC
                'SGDClassifier',  # Doesn't work. Probably more data needed
                'SGDRegressor',  # Doesn't work. Probably more data needed
                'Perceptron',  # Uses SGD too. Doesn't work. Probably more data needed
-               'RidgeClassifierCV', 'RidgeCV', 
+               'RidgeClassifierCV', 'RidgeCV',
                'RandomForestClassifier', 'RandomForestRegressor',
                ]
     estimators = all_estimators()

From 020b0e446d99cc11e044a28a593a12e9a6906676 Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Thu, 22 Oct 2015 18:41:07 +0200
Subject: [PATCH 11/17] Changed weights_samples to float

---
 sklearn/tests/test_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index f15d29b06493c..1512e03ab684b 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -335,7 +335,7 @@ def test_sample_weight_0(random_state=42):
     n_samples, n_features = 20, 5
     rng = check_random_state(random_state)
 
-    sample_weight = (rng.permutation(n_samples) < (n_samples / 2.)).astype(np.int)
+    sample_weight = (rng.permutation(n_samples) < (n_samples / 2.)).astype(np.float)
 
     X_clf, y_clf = datasets.make_classification(
         n_samples=n_samples, n_features=n_features,

From da2a736e79dd52a6209bb552abea3ac000a1c987 Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Fri, 23 Oct 2015 11:15:55 +0200
Subject: [PATCH 12/17] Fix typo in test_get_params_invariance

---
 sklearn/tests/test_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 1512e03ab684b..4775dedba5baf 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -229,7 +229,7 @@ def test_get_params_invariance():
                     yield check_get_params_invariance, name, Estimator
             else:
                 yield check_get_params_invariance, name, Estimator
-            yield check_transformer_n_iter, name, Estimator
+            yield check_transformer_n_iter, name, Estimator()
 
 
 def test_sample_weight_consistency(random_state=42):

From c3ca2f02be39c749290af4d3d45b7a0159b51869 Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Fri, 23 Oct 2015 14:08:26 +0200
Subject: [PATCH 13/17] Changed n_iter_ for iter in check_transformer_n_iter

---
 sklearn/utils/estimator_checks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 3c141e5eb84c2..62d6aa0e78543 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1468,10 +1468,10 @@ def check_transformer_n_iter(name, estimator):
 
     # These return a n_iter per component.
     if name in CROSS_DECOMPOSITION:
-        for iter_ in estimator.n_iter_:
+        for iter_ in estimator.n_iter:
             assert_greater(iter_, 1)
     else:
-        assert_greater(estimator.n_iter_, 1)
+        assert_greater(estimator.n_iter, 1)
 
 
 def check_get_params_invariance(name, estimator):

From 92a81e59bf003a11693f403c832a813888c98f80 Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Fri, 23 Oct 2015 17:14:22 +0200
Subject: [PATCH 14/17] Commented check_transformer_n_iter in
 test_get_params_invariance because it fails

---
 sklearn/tests/test_common.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 4775dedba5baf..3569d496c6423 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -229,7 +229,7 @@ def test_get_params_invariance():
                     yield check_get_params_invariance, name, Estimator
             else:
                 yield check_get_params_invariance, name, Estimator
-            yield check_transformer_n_iter, name, Estimator()
+            #yield check_transformer_n_iter, name, Estimator()
 
 
 def test_sample_weight_consistency(random_state=42):
@@ -275,7 +275,7 @@ def aug(data, sample_weight):
     train, test = train_test_split(range(n_samples))
 
     for name, Estimator in estimators:
-
+        
         if name in exclude:
             print ("%s is being excluded" % name)
             continue
@@ -312,7 +312,6 @@ def aug(data, sample_weight):
         yield assert_array_almost_equal, pred_sw, pred_aug, precision, name+' prediction not equal'
 
 
-
 def test_sample_weight_0(random_state=42):
     exclude = [
                'AdaBoostClassifier', 'AdaBoostRegressor',

From eedfada97c7d5afe2c1e5d9aa410ad068fe2a61b Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Fri, 23 Oct 2015 18:22:21 +0200
Subject: [PATCH 15/17] undo n_iter_ to n_iter

---
 sklearn/utils/estimator_checks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 62d6aa0e78543..3c141e5eb84c2 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1468,10 +1468,10 @@ def check_transformer_n_iter(name, estimator):
 
     # These return a n_iter per component.
     if name in CROSS_DECOMPOSITION:
-        for iter_ in estimator.n_iter:
+        for iter_ in estimator.n_iter_:
             assert_greater(iter_, 1)
     else:
-        assert_greater(estimator.n_iter, 1)
+        assert_greater(estimator.n_iter_, 1)
 
 
 def check_get_params_invariance(name, estimator):

From ac281b5304f83dfddfa9fd79371b06dc563d9e86 Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Mon, 26 Oct 2015 14:06:22 +0100
Subject: [PATCH 16/17] Added case in which estimator has dual_coef_

---
 sklearn/tests/test_common.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 3569d496c6423..3fa9c865c740c 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -275,7 +275,7 @@ def aug(data, sample_weight):
     train, test = train_test_split(range(n_samples))
 
     for name, Estimator in estimators:
-        
+        print ("%s is being analysed" % name)
         if name in exclude:
             print ("%s is being excluded" % name)
             continue
@@ -302,7 +302,10 @@ def aug(data, sample_weight):
 
         precision = 6
         # if estimator has `coef_` attribute, then compare the two
-        if hasattr(estimator_sw, 'coef_'):
+        if hasattr(estimator_sw, 'dual_coef_'):
+            yield (assert_array_almost_equal,
+                   estimator_sw.dual_coef_, estimator_aug.dual_coef_, precision, name+' dual_coef_ not equal')
+        elif hasattr(estimator_sw, 'coef_'):
             yield (assert_array_almost_equal,
                    estimator_sw.coef_, estimator_aug.coef_, precision, name+' coef_ not equal')
 
@@ -346,7 +349,7 @@ def test_sample_weight_0(random_state=42):
     train, test = train_test_split(range(n_samples))
 
     for name, Estimator in estimators:
-
+        print ("%s is being analysed" % name)
         if name in exclude:
             print ("%s is being excluded" % name)
             continue
@@ -371,7 +374,10 @@ def test_sample_weight_0(random_state=42):
 
         precision = 6
         # if estimator has `coef_` attribute, then compare the two
-        if hasattr(estimator_sw, 'coef_'):
+        if hasattr(estimator_sw, 'dual_coef_'):
+            yield (assert_array_almost_equal,
+                   estimator_sw.dual_coef_, estimator_aug.dual_coef_, precision, name+' dual_coef_ not equal')
+        elif hasattr(estimator_sw, 'coef_'):
             yield (assert_array_almost_equal,
                    estimator_sw.coef_, estimator_aug.coef_, precision, name+' coef_ not equal')
 

From 2f1ea6b7f7c54388f0bd454d8028689b1d5d120c Mon Sep 17 00:00:00 2001
From: ainafp <ainafp@gmail.com>
Date: Mon, 26 Oct 2015 17:07:17 +0100
Subject: [PATCH 17/17] Removed case in which estimator has dual_coef_

---
 sklearn/tests/test_common.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 3fa9c865c740c..16fccb283eff1 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -302,10 +302,7 @@ def aug(data, sample_weight):
 
         precision = 6
         # if estimator has `coef_` attribute, then compare the two
-        if hasattr(estimator_sw, 'dual_coef_'):
-            yield (assert_array_almost_equal,
-                   estimator_sw.dual_coef_, estimator_aug.dual_coef_, precision, name+' dual_coef_ not equal')
-        elif hasattr(estimator_sw, 'coef_'):
+        if hasattr(estimator_sw, 'coef_'):
             yield (assert_array_almost_equal,
                    estimator_sw.coef_, estimator_aug.coef_, precision, name+' coef_ not equal')
 
@@ -374,10 +371,7 @@ def test_sample_weight_0(random_state=42):
 
         precision = 6
         # if estimator has `coef_` attribute, then compare the two
-        if hasattr(estimator_sw, 'dual_coef_'):
-            yield (assert_array_almost_equal,
-                   estimator_sw.dual_coef_, estimator_aug.dual_coef_, precision, name+' dual_coef_ not equal')
-        elif hasattr(estimator_sw, 'coef_'):
+        if hasattr(estimator_sw, 'coef_'):
             yield (assert_array_almost_equal,
                    estimator_sw.coef_, estimator_aug.coef_, precision, name+' coef_ not equal')