From 542cd7be863e6b06968cee122ab58def22382517 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 1 Apr 2019 06:43:40 -0400
Subject: [PATCH 1/5] WIP

---
 sklearn/base.py                   |   3 +-
 sklearn/dummy.py                  |   6 +-
 sklearn/ensemble/base.py          |   3 +
 sklearn/utils/estimator_checks.py | 175 +++++++++++++++---------------
 4 files changed, 96 insertions(+), 91 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 167baaf2b7ebd..4934e4ddeff69 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -25,7 +25,8 @@
     'stateless': False,
     'multilabel': False,
     '_skip_test': False,
-    'multioutput_only': False}
+    'multioutput_only': False,
+    'supports_sample_weight': False}
 
 
 def clone(estimator, safe=True):
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index b83712e37f1eb..3a803a98ae466 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -318,7 +318,8 @@ def predict_log_proba(self, X):
             return [np.log(p) for p in proba]
 
     def _more_tags(self):
-        return {'poor_score': True, 'no_validation': True}
+        return {'poor_score': True, 'no_validation': True,
+                'supports_sample_weight': True}
 
     def score(self, X, y, sample_weight=None):
         """Returns the mean accuracy on the given test data and labels.
@@ -510,7 +511,8 @@ def predict(self, X, return_std=False):
         return (y, y_std) if return_std else y
 
     def _more_tags(self):
-        return {'poor_score': True, 'no_validation': True}
+        return {'poor_score': True, 'no_validation': True,
+                'supports_sample_weight': True}
 
     def score(self, X, y, sample_weight=None):
         """Returns the coefficient of determination R^2 of the prediction.
diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py
index 7ac1dd4f72613..0ecf212b6e51f 100644
--- a/sklearn/ensemble/base.py
+++ b/sklearn/ensemble/base.py
@@ -147,6 +147,9 @@ def __iter__(self):
         """Returns iterator over estimators in the ensemble."""
         return iter(self.estimators_)
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 def _partition_estimators(n_estimators, n_jobs):
     """Private function used to partition estimators between jobs."""
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 73c98ea4685be..6c693f1afba3e 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -78,38 +78,40 @@ def _safe_tags(estimator, key=None):
 
 def _yield_checks(name, estimator):
     tags = _safe_tags(estimator)
-    yield check_estimators_dtypes
-    yield check_fit_score_takes_y
-    yield check_sample_weights_pandas_series
-    yield check_sample_weights_list
-    yield check_sample_weights_invariance
-    yield check_estimators_fit_returns_self
-    yield partial(check_estimators_fit_returns_self, readonly_memmap=True)
-
-    # Check that all estimator yield informative messages when
-    # trained on empty datasets
-    if not tags["no_validation"]:
-        yield check_complex_data
-        yield check_dtype_object
-        yield check_estimators_empty_data_messages
-
-    if name not in CROSS_DECOMPOSITION:
-        # cross-decomposition's "transform" returns X and Y
-        yield check_pipeline_consistency
-
-    if not tags["allow_nan"] and not tags["no_validation"]:
-        # Test that all estimators check their input for NaN's and infs
-        yield check_estimators_nan_inf
-
-    yield check_estimators_overwrite_params
-    if hasattr(estimator, 'sparsify'):
-        yield check_sparsify_coefficients
-
-    yield check_estimator_sparse_data
-
-    # Test that estimators can be pickled, and once pickled
-    # give the same answer as before.
-    yield check_estimators_pickle
+    # yield check_estimators_dtypes
+    # yield check_fit_score_takes_y
+    if tags['supports_sample_weight']:
+        print(name)
+        yield check_sample_weights_pandas_series
+        yield check_sample_weights_list
+        yield check_sample_weights_invariance
+    # yield check_estimators_fit_returns_self
+    # yield partial(check_estimators_fit_returns_self, readonly_memmap=True)
+
+    # # Check that all estimator yield informative messages when
+    # # trained on empty datasets
+    # if not tags["no_validation"]:
+    #     yield check_complex_data
+    #     yield check_dtype_object
+    #     yield check_estimators_empty_data_messages
+
+    # if name not in CROSS_DECOMPOSITION:
+    #     # cross-decomposition's "transform" returns X and Y
+    #     yield check_pipeline_consistency
+
+    # if not tags["allow_nan"] and not tags["no_validation"]:
+    #     # Test that all estimators check their input for NaN's and infs
+    #     yield check_estimators_nan_inf
+
+    # yield check_estimators_overwrite_params
+    # if hasattr(estimator, 'sparsify'):
+    #     yield check_sparsify_coefficients
+
+    # yield check_estimator_sparse_data
+
+    # # Test that estimators can be pickled, and once pickled
+    # # give the same answer as before.
+    # yield check_estimators_pickle
 
 
 def _yield_classifier_checks(name, classifier):
@@ -240,31 +242,31 @@ def _yield_all_checks(name, estimator):
 
     for check in _yield_checks(name, estimator):
         yield check
-    if is_classifier(estimator):
-        for check in _yield_classifier_checks(name, estimator):
-            yield check
-    if is_regressor(estimator):
-        for check in _yield_regressor_checks(name, estimator):
-            yield check
-    if hasattr(estimator, 'transform'):
-        for check in _yield_transformer_checks(name, estimator):
-            yield check
-    if isinstance(estimator, ClusterMixin):
-        for check in _yield_clustering_checks(name, estimator):
-            yield check
-    if is_outlier_detector(estimator):
-        for check in _yield_outliers_checks(name, estimator):
-            yield check
-    yield check_fit2d_predict1d
-    yield check_methods_subset_invariance
-    yield check_fit2d_1sample
-    yield check_fit2d_1feature
-    yield check_fit1d
-    yield check_get_params_invariance
-    yield check_set_params
-    yield check_dict_unchanged
-    yield check_dont_overwrite_parameters
-    yield check_fit_idempotent
+    # if is_classifier(estimator):
+    #     for check in _yield_classifier_checks(name, estimator):
+    #         yield check
+    # if is_regressor(estimator):
+    #     for check in _yield_regressor_checks(name, estimator):
+    #         yield check
+    # if hasattr(estimator, 'transform'):
+    #     for check in _yield_transformer_checks(name, estimator):
+    #         yield check
+    # if isinstance(estimator, ClusterMixin):
+    #     for check in _yield_clustering_checks(name, estimator):
+    #         yield check
+    # if is_outlier_detector(estimator):
+    #     for check in _yield_outliers_checks(name, estimator):
+    #         yield check
+    # yield check_fit2d_predict1d
+    # yield check_methods_subset_invariance
+    # yield check_fit2d_1sample
+    # yield check_fit2d_1feature
+    # yield check_fit1d
+    # yield check_get_params_invariance
+    # yield check_set_params
+    # yield check_dict_unchanged
+    # yield check_dont_overwrite_parameters
+    # yield check_fit_idempotent
 
 
 def check_estimator(Estimator):
@@ -546,50 +548,47 @@ def check_sample_weights_pandas_series(name, estimator_orig):
     # check that estimators will accept a 'sample_weight' parameter of
     # type pandas.Series in the 'fit' function.
     estimator = clone(estimator_orig)
-    if has_fit_parameter(estimator, "sample_weight"):
+    try:
+        import pandas as pd
+        X = np.array([[1, 1], [1, 2], [1, 3], [1, 4],
+                        [2, 1], [2, 2], [2, 3], [2, 4]])
+        X = pd.DataFrame(pairwise_estimator_convert_X(X, estimator_orig))
+        y = pd.Series([1, 1, 1, 1, 2, 2, 2, 2])
+        weights = pd.Series([1] * 8)
+        if _safe_tags(estimator, "multioutput_only"):
+            y = pd.DataFrame(y)
         try:
-            import pandas as pd
-            X = np.array([[1, 1], [1, 2], [1, 3], [1, 4],
-                          [2, 1], [2, 2], [2, 3], [2, 4]])
-            X = pd.DataFrame(pairwise_estimator_convert_X(X, estimator_orig))
-            y = pd.Series([1, 1, 1, 1, 2, 2, 2, 2])
-            weights = pd.Series([1] * 8)
-            if _safe_tags(estimator, "multioutput_only"):
-                y = pd.DataFrame(y)
-            try:
-                estimator.fit(X, y, sample_weight=weights)
-            except ValueError:
-                raise ValueError("Estimator {0} raises error if "
-                                 "'sample_weight' parameter is of "
-                                 "type pandas.Series".format(name))
-        except ImportError:
-            raise SkipTest("pandas is not installed: not testing for "
-                           "input of type pandas.Series to class weight.")
+            estimator.fit(X, y, sample_weight=weights)
+        except ValueError:
+            raise ValueError("Estimator {0} raises error if "
+                                "'sample_weight' parameter is of "
+                                "type pandas.Series".format(name))
+    except ImportError:
+        raise SkipTest("pandas is not installed: not testing for "
+                        "input of type pandas.Series to class weight.")
 
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_sample_weights_list(name, estimator_orig):
     # check that estimators will accept a 'sample_weight' parameter of
     # type list in the 'fit' function.
-    if has_fit_parameter(estimator_orig, "sample_weight"):
-        estimator = clone(estimator_orig)
-        rnd = np.random.RandomState(0)
-        X = pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)),
-                                         estimator_orig)
-        y = np.arange(10) % 3
-        y = multioutput_estimator_convert_y_2d(estimator, y)
-        sample_weight = [3] * 10
-        # Test that estimators don't raise any exception
-        estimator.fit(X, y, sample_weight=sample_weight)
+    estimator = clone(estimator_orig)
+    rnd = np.random.RandomState(0)
+    X = pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)),
+                                        estimator_orig)
+    y = np.arange(10) % 3
+    y = multioutput_estimator_convert_y_2d(estimator, y)
+    sample_weight = [3] * 10
+    # Test that estimators don't raise any exception
+    estimator.fit(X, y, sample_weight=sample_weight)
 
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_sample_weights_invariance(name, estimator_orig):
     # check that the estimators yield same results for
     # unit weights and no weights
-    if (has_fit_parameter(estimator_orig, "sample_weight") and
-            not (hasattr(estimator_orig, "_pairwise")
-                 and estimator_orig._pairwise)):
+    if not (hasattr(estimator_orig, "_pairwise")
+            and estimator_orig._pairwise):
         # We skip pairwise because the data is not pairwise
 
         estimator1 = clone(estimator_orig)

From 3d98f7dd3f73fd76d3e8ad8e0a0194cce2924d96 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Tue, 2 Apr 2019 14:10:47 -0400
Subject: [PATCH 2/5] done ensemble + svm

---
 sklearn/ensemble/bagging.py                      | 14 ++++++++++++--
 sklearn/ensemble/gradient_boosting.py            | 16 +++++-----------
 sklearn/ensemble/tests/test_gradient_boosting.py | 12 ------------
 sklearn/ensemble/weight_boosting.py              |  2 +-
 sklearn/impute.py                                |  4 ++++
 sklearn/svm/base.py                              |  3 +++
 sklearn/svm/classes.py                           |  6 ++++++
 sklearn/tree/tree.py                             |  3 +++
 8 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
index f21fbd81b9a8c..bd6ceda41f210 100644
--- a/sklearn/ensemble/bagging.py
+++ b/sklearn/ensemble/bagging.py
@@ -66,8 +66,7 @@ def _parallel_build_estimators(n_estimators, ensemble, X, y, sample_weight,
     max_samples = ensemble._max_samples
     bootstrap = ensemble.bootstrap
     bootstrap_features = ensemble.bootstrap_features
-    support_sample_weight = has_fit_parameter(ensemble.base_estimator_,
-                                              "sample_weight")
+    support_sample_weight = ensemble._get_tags()['supports_sample_weight']
     if not support_sample_weight and sample_weight is not None:
         raise ValueError("The base estimator doesn't support sample weight")
 
@@ -427,6 +426,17 @@ def estimators_samples_(self):
         return [sample_indices
                 for _, sample_indices in self._get_estimators_indices()]
 
+    def _more_tags(self):
+        if self.base_estimator is None:
+            # base_estimator can be None in which case we use a decision tree,
+            # which accepts sample_weight
+            supports_sample_weight = True
+        else:
+            supports_sample_weight = (
+                self.base_estimator._get_tags()['supports_sample_weight'])
+
+        return {'supports_sample_weight': supports_sample_weight}
+
 
 class BaggingClassifier(BaseBagging, ClassifierMixin):
     """A Bagging classifier.
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 253cc60ea8ef2..2d2b2d0b55f7a 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1478,21 +1478,15 @@ def fit(self, X, y, sample_weight=None, monitor=None):
                 raw_predictions = np.zeros(shape=(X.shape[0], self.loss_.K),
                                            dtype=np.float64)
             else:
-                # XXX clean this once we have a support_sample_weight tag
                 if sample_weight_is_none:
                     self.init_.fit(X, y)
                 else:
-                    msg = ("The initial estimator {} does not support sample "
-                           "weights.".format(self.init_.__class__.__name__))
-                    try:
-                        self.init_.fit(X, y, sample_weight=sample_weight)
-                    except TypeError:  # regular estimator without SW support
+                    if not self.init_._get_tags()['supports_sample_weight']:
+                        msg = ("The initial estimator {} does not "
+                               "support sample weights."
+                               .format(self.init_.__class__.__name__))
                         raise ValueError(msg)
-                    except ValueError as e:
-                        if 'not enough values to unpack' in str(e):  # pipeline
-                            raise ValueError(msg) from e
-                        else:  # regular estimator whose input checking failed
-                            raise
+                    self.init_.fit(X, y, sample_weight=sample_weight)
 
                 raw_predictions = \
                     self.loss_.get_init_raw_predictions(X, self.init_)
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 821124b73cb1c..a615981401a2c 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -1395,18 +1395,6 @@ def test_gradient_boosting_with_init_pipeline():
                   'weights'):
         gb.fit(X, y, sample_weight=np.ones(X.shape[0]))
 
-    # Passing sample_weight to a pipeline raises a ValueError. This test makes
-    # sure we make the distinction between ValueError raised by a pipeline that
-    # was passed sample_weight, and a ValueError raised by a regular estimator
-    # whose input checking failed.
-    with pytest.raises(
-            ValueError,
-            match='nu <= 0 or nu > 1'):
-        # Note that NuSVR properly supports sample_weight
-        init = NuSVR(gamma='auto', nu=1.5)
-        gb = GradientBoostingRegressor(init=init)
-        gb.fit(X, y, sample_weight=np.ones(X.shape[0]))
-
 
 @pytest.mark.parametrize('estimator, missing_method', [
     (GradientBoostingClassifier(init=LinearSVC()), 'predict_proba'),
diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py
index 6e13b7bd80ae2..e259db04b6451 100644
--- a/sklearn/ensemble/weight_boosting.py
+++ b/sklearn/ensemble/weight_boosting.py
@@ -440,7 +440,7 @@ def _validate_estimator(self):
                     "probabilities with a predict_proba method.\n"
                     "Please change the base estimator or set "
                     "algorithm='SAMME' instead.")
-        if not has_fit_parameter(self.base_estimator_, "sample_weight"):
+        if not self.base_estimator_._get_tags()['supports_sample_weight']:
             raise ValueError("%s doesn't support sample_weight."
                              % self.base_estimator_.__class__.__name__)
 
diff --git a/sklearn/impute.py b/sklearn/impute.py
index ea4e8663d0313..3263d28af3451 100644
--- a/sklearn/impute.py
+++ b/sklearn/impute.py
@@ -1139,11 +1139,15 @@ def _get_missing_features_info(self, X):
 
             # The imputer mask will be constructed with the same sparse format
             # as X.
+            print(mask)
             sparse_constructor = (sparse.csr_matrix if X.format == 'csr'
                                   else sparse.csc_matrix)
             imputer_mask = sparse_constructor(
                 (mask, X.indices.copy(), X.indptr.copy()),
                 shape=X.shape, dtype=bool)
+            print(imputer_mask.todense())
+            # imputer_mask.eliminate_zeros()
+            print(imputer_mask.todense())
 
             missing_values_mask = imputer_mask.copy()
             missing_values_mask.eliminate_zeros()
diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
index effb0dcd12504..f2ce7b4b01dcc 100644
--- a/sklearn/svm/base.py
+++ b/sklearn/svm/base.py
@@ -495,6 +495,9 @@ def coef_(self):
     def _get_coef(self):
         return safe_sparse_dot(self._dual_coef_, self.support_vectors_)
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 class BaseSVC(BaseLibSVM, ClassifierMixin, metaclass=ABCMeta):
     """ABC for LibSVM-based classifiers."""
diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index e73ca2e559bb6..85401d69d0d63 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -244,6 +244,9 @@ def fit(self, X, y, sample_weight=None):
 
         return self
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 class LinearSVR(LinearModel, RegressorMixin):
     """Linear Support Vector Regression.
@@ -425,6 +428,9 @@ def fit(self, X, y, sample_weight=None):
 
         return self
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 class SVC(BaseSVC):
     """C-Support Vector Classification.
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index a07e6a0ca5d9a..9a2438d863706 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -526,6 +526,9 @@ def feature_importances_(self):
 
         return self.tree_.compute_feature_importances()
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 # =============================================================================
 # Public estimators

From 5f1ed2b9a13640a80c23e28c7de9958aef7b5d97 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Tue, 2 Apr 2019 14:13:22 -0400
Subject: [PATCH 3/5] reverted unwanted changes

---
 sklearn/impute.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sklearn/impute.py b/sklearn/impute.py
index 3263d28af3451..ea4e8663d0313 100644
--- a/sklearn/impute.py
+++ b/sklearn/impute.py
@@ -1139,15 +1139,11 @@ def _get_missing_features_info(self, X):
 
             # The imputer mask will be constructed with the same sparse format
             # as X.
-            print(mask)
             sparse_constructor = (sparse.csr_matrix if X.format == 'csr'
                                   else sparse.csc_matrix)
             imputer_mask = sparse_constructor(
                 (mask, X.indices.copy(), X.indptr.copy()),
                 shape=X.shape, dtype=bool)
-            print(imputer_mask.todense())
-            # imputer_mask.eliminate_zeros()
-            print(imputer_mask.todense())
 
             missing_values_mask = imputer_mask.copy()
             missing_values_mask.eliminate_zeros()

From bc61c5f475171afb15c75b45ab471af210a69e20 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 3 Apr 2019 14:29:44 -0400
Subject: [PATCH 4/5] more on this

---
 sklearn/calibration.py                      | 11 +++++++++++
 sklearn/cluster/dbscan_.py                  |  3 +++
 sklearn/cluster/k_means_.py                 |  2 ++
 sklearn/compose/_target.py                  | 12 +++++++++++-
 sklearn/ensemble/bagging.py                 |  2 +-
 sklearn/ensemble/voting_classifier.py       | 17 +++++++++++++----
 sklearn/ensemble/weight_boosting.py         |  1 -
 sklearn/isotonic.py                         |  3 ++-
 sklearn/kernel_ridge.py                     |  3 +++
 sklearn/linear_model/base.py                |  3 +++
 sklearn/linear_model/bayes.py               |  3 +++
 sklearn/linear_model/huber.py               |  3 +++
 sklearn/linear_model/logistic.py            |  6 ++++++
 sklearn/linear_model/perceptron.py          |  3 +++
 sklearn/linear_model/ransac.py              | 18 +++++++++++++-----
 sklearn/linear_model/ridge.py               |  6 ++++++
 sklearn/linear_model/stochastic_gradient.py |  6 ++++++
 sklearn/multioutput.py                      | 11 +++++++----
 sklearn/naive_bayes.py                      |  3 +++
 sklearn/neighbors/kde.py                    |  3 +++
 sklearn/utils/estimator_checks.py           | 10 +++++++++-
 21 files changed, 111 insertions(+), 18 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index f84cbb328370f..fd04eea0d847a 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -199,6 +199,17 @@ def fit(self, X, y, sample_weight=None):
 
         return self
 
+    def _more_tags(self):
+        if self.base_estimator is None:
+            # base_estimator can be None in which case we use LinearSVC
+            # which accepts sample_weight
+            supports_sample_weight = True
+        else:
+            supports_sample_weight = (
+                self.base_estimator._get_tags()['supports_sample_weight'])
+
+        return {'supports_sample_weight': supports_sample_weight}
+
     def predict_proba(self, X):
         """Posterior probabilities of classification
 
diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py
index f21beb3f91453..afcd98e85a60c 100644
--- a/sklearn/cluster/dbscan_.py
+++ b/sklearn/cluster/dbscan_.py
@@ -367,3 +367,6 @@ def fit_predict(self, X, y=None, sample_weight=None):
         """
         self.fit(X, sample_weight=sample_weight)
         return self.labels_
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
\ No newline at end of file
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index 042e6990b5df1..7aae4551b258c 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -1103,6 +1103,8 @@ def score(self, X, y=None, sample_weight=None):
         return -_labels_inertia(X, sample_weight, x_squared_norms,
                                 self.cluster_centers_)[1]
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
 
 def _mini_batch_step(X, sample_weight, x_squared_norms, centers, weight_sums,
                      old_center_buffer, compute_squared_diff,
diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py
index 5213605defd30..7c0bd9a72cbaf 100644
--- a/sklearn/compose/_target.py
+++ b/sklearn/compose/_target.py
@@ -235,4 +235,14 @@ def predict(self, X):
         return pred_trans
 
     def _more_tags(self):
-        return {'poor_score': True, 'no_validation': True}
+        if self.regressor is None:
+            # base_estimator can be None in which case we use LinearRegression
+            # which accepts sample_weight
+            supports_sample_weight = True
+        else:
+            supports_sample_weight = (
+                self.base_estimator._get_tags()['supports_sample_weight'])
+
+        return {'poor_score': True, 'no_validation': True,
+                'supports_sample_weight': supports_sample_weight}
+
diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
index bd6ceda41f210..9c00a54f78f5c 100644
--- a/sklearn/ensemble/bagging.py
+++ b/sklearn/ensemble/bagging.py
@@ -20,7 +20,7 @@
 from ..utils.metaestimators import if_delegate_has_method
 from ..utils.multiclass import check_classification_targets
 from ..utils.random import sample_without_replacement
-from ..utils.validation import has_fit_parameter, check_is_fitted
+from ..utils.validation import check_is_fitted
 
 
 __all__ = ["BaggingClassifier",
diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py
index 63e0ee94a97b5..2d41ba5677173 100644
--- a/sklearn/ensemble/voting_classifier.py
+++ b/sklearn/ensemble/voting_classifier.py
@@ -18,7 +18,7 @@
 from ..base import clone
 from ..preprocessing import LabelEncoder
 from ..utils._joblib import Parallel, delayed
-from ..utils.validation import has_fit_parameter, check_is_fitted
+from ..utils.validation import check_is_fitted
 from ..utils.metaestimators import _BaseComposition
 from ..utils import Bunch
 
@@ -176,10 +176,11 @@ def fit(self, X, y, sample_weight=None):
                              % (len(self.weights), len(self.estimators)))
 
         if sample_weight is not None:
-            for name, step in self.estimators:
-                if not has_fit_parameter(step, 'sample_weight'):
+            for est in self.estimators:
+                if not est._get_tags()['supports_sample_weight']:
                     raise ValueError('Underlying estimator \'%s\' does not'
-                                     ' support sample weights.' % name)
+                                     ' support sample weights.' %
+                                     est.__class__.__name__)
         names, clfs = zip(*self.estimators)
         self._validate_names(names)
 
@@ -343,3 +344,11 @@ def get_params(self, deep=True):
     def _predict(self, X):
         """Collect results from clf.predict calls. """
         return np.asarray([clf.predict(X) for clf in self.estimators_]).T
+
+    def _more_tags(self):
+        supports_sample_weight = all(
+            est._get_tags()['supports_sample_weight']
+            for est in self.estimators
+        )
+
+        return {'supports_sample_weight': supports_sample_weight}
diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py
index e259db04b6451..bf563254d48bd 100644
--- a/sklearn/ensemble/weight_boosting.py
+++ b/sklearn/ensemble/weight_boosting.py
@@ -37,7 +37,6 @@
 from ..utils.extmath import stable_cumsum
 from ..metrics import accuracy_score, r2_score
 from ..utils.validation import check_is_fitted
-from ..utils.validation import has_fit_parameter
 from ..utils.validation import _num_samples
 
 __all__ = [
diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py
index 3b8f74a946699..216d18c845665 100644
--- a/sklearn/isotonic.py
+++ b/sklearn/isotonic.py
@@ -407,4 +407,5 @@ def __setstate__(self, state):
             self._build_f(self._necessary_X_, self._necessary_y_)
 
     def _more_tags(self):
-        return {'X_types': ['1darray']}
+        return {'X_types': ['1darray'],
+                'supports_sample_weight': True}
diff --git a/sklearn/kernel_ridge.py b/sklearn/kernel_ridge.py
index aeb5fd45f413f..bd657d52da8fd 100644
--- a/sklearn/kernel_ridge.py
+++ b/sklearn/kernel_ridge.py
@@ -192,3 +192,6 @@ def predict(self, X):
         check_is_fitted(self, ["X_fit_", "dual_coef_"])
         K = self._get_kernel(X, self.X_fit_)
         return np.dot(K, self.dual_coef_)
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index 54083fee1e904..c210d3ae3e1dc 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -508,6 +508,9 @@ def rmatvec(b):
         self._set_intercept(X_offset, y_offset, X_scale)
         return self
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,
              check_input=True):
diff --git a/sklearn/linear_model/bayes.py b/sklearn/linear_model/bayes.py
index 1ff59aa313b99..0d75a50667c79 100644
--- a/sklearn/linear_model/bayes.py
+++ b/sklearn/linear_model/bayes.py
@@ -359,6 +359,9 @@ def _log_marginal_likelihood(self, n_samples, n_features, eigen_vals,
 
         return score
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 ###############################################################################
 # ARD (Automatic Relevance Determination) regression
diff --git a/sklearn/linear_model/huber.py b/sklearn/linear_model/huber.py
index 285913684832f..7b7443f81397e 100644
--- a/sklearn/linear_model/huber.py
+++ b/sklearn/linear_model/huber.py
@@ -305,3 +305,6 @@ def fit(self, X, y, sample_weight=None):
             y - safe_sparse_dot(X, self.coef_) - self.intercept_)
         self.outliers_ = residual > self.scale_ * self.epsilon
         return self
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
\ No newline at end of file
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index be664d5b5c087..af3e4240558ed 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1680,6 +1680,9 @@ def predict_log_proba(self, X):
         """
         return np.log(self.predict_proba(X))
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 class LogisticRegressionCV(LogisticRegression, BaseEstimator,
                            LinearClassifierMixin):
@@ -2260,3 +2263,6 @@ def score(self, X, y, sample_weight=None):
             scoring = get_scorer(scoring)
 
         return scoring(self, X, y, sample_weight=sample_weight)
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
diff --git a/sklearn/linear_model/perceptron.py b/sklearn/linear_model/perceptron.py
index 2bf7899069864..393c414d6d60d 100644
--- a/sklearn/linear_model/perceptron.py
+++ b/sklearn/linear_model/perceptron.py
@@ -153,3 +153,6 @@ def __init__(self, penalty=None, alpha=0.0001, fit_intercept=True,
             validation_fraction=validation_fraction,
             n_iter_no_change=n_iter_no_change, power_t=0.5,
             warm_start=warm_start, class_weight=class_weight, n_jobs=n_jobs)
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py
index 0205b75df55cf..4e3bdf3997cb9 100644
--- a/sklearn/linear_model/ransac.py
+++ b/sklearn/linear_model/ransac.py
@@ -13,7 +13,6 @@
 from ..utils.random import sample_without_replacement
 from ..utils.validation import check_is_fitted
 from .base import LinearRegression
-from ..utils.validation import has_fit_parameter
 from ..exceptions import ConvergenceWarning
 
 _EPSILON = np.spacing(1)
@@ -316,11 +315,9 @@ def fit(self, X, y, sample_weight=None):
         except ValueError:
             pass
 
-        estimator_fit_has_sample_weight = has_fit_parameter(base_estimator,
-                                                            "sample_weight")
+        supports_sample_weight = self._get_tags()['supports_sample_weight']
         estimator_name = type(base_estimator).__name__
-        if (sample_weight is not None and not
-                estimator_fit_has_sample_weight):
+        if sample_weight is not None and not supports_sample_weight:
             raise ValueError("%s does not support sample_weight. Samples"
                              " weights are only used for the calibration"
                              " itself." % estimator_name)
@@ -492,3 +489,14 @@ def score(self, X, y):
         check_is_fitted(self, 'estimator_')
 
         return self.estimator_.score(X, y)
+
+    def _more_tags(self):
+        if self.base_estimator is None:
+            # base_estimator can be None in which case we use LinearRegression
+            # which accepts sample_weight
+            supports_sample_weight = True
+        else:
+            supports_sample_weight = (
+                self.base_estimator._get_tags()['supports_sample_weight'])
+
+        return {'supports_sample_weight': supports_sample_weight}
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index e1fc9b42438e4..35bc43892d3e0 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -572,6 +572,9 @@ def fit(self, X, y, sample_weight=None):
 
         return self
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 class Ridge(_BaseRidge, RegressorMixin):
     """Linear least squares with l2 regularization.
@@ -1223,6 +1226,9 @@ def fit(self, X, y, sample_weight=None):
 
         return self
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 class RidgeCV(_BaseRidgeCV, RegressorMixin):
     """Ridge regression with built-in cross-validation.
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 3e33e59588117..b71eeaee4cade 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -1048,6 +1048,9 @@ def predict_log_proba(self):
     def _predict_log_proba(self, X):
         return np.log(self.predict_proba(X))
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 class BaseSGDRegressor(BaseSGD, RegressorMixin):
 
@@ -1526,3 +1529,6 @@ def __init__(self, loss="squared_loss", penalty="l2", alpha=0.0001,
             validation_fraction=validation_fraction,
             n_iter_no_change=n_iter_no_change, warm_start=warm_start,
             average=average)
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index 0da22e5e570d7..7d93d1facc9ed 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -23,7 +23,7 @@
 from .utils import check_array, check_X_y, check_random_state
 from .utils.fixes import parallel_helper
 from .utils.metaestimators import if_delegate_has_method
-from .utils.validation import check_is_fitted, has_fit_parameter
+from .utils.validation import check_is_fitted
 from .utils.multiclass import check_classification_targets
 from .utils._joblib import Parallel, delayed
 
@@ -106,7 +106,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
                              "multi-output regression but has only one.")
 
         if (sample_weight is not None and
-                not has_fit_parameter(self.estimator, 'sample_weight')):
+                not self.estimator._get_tags()['supports_sample_weight']):
             raise ValueError("Underlying estimator does not support"
                              " sample weights.")
 
@@ -159,7 +159,7 @@ def fit(self, X, y, sample_weight=None):
                              "multi-output regression but has only one.")
 
         if (sample_weight is not None and
-                not has_fit_parameter(self.estimator, 'sample_weight')):
+                not self.estimator._get_tags()['supports_sample_weight']):
             raise ValueError("Underlying estimator does not support"
                              " sample weights.")
 
@@ -197,7 +197,10 @@ def predict(self, X):
         return np.asarray(y).T
 
     def _more_tags(self):
-        return {'multioutput_only': True}
+        supports_sample_weight = (
+            self.estimator._get_tags()['supports_sample_weight'])
+        return {'multioutput_only': True,
+                'supports_sample_weight': supports_sample_weight}
 
 
 class MultiOutputRegressor(MultiOutputEstimator, RegressorMixin):
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 5d18327c8a261..1f549d8c07804 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -102,6 +102,9 @@ def predict_proba(self, X):
         """
         return np.exp(self.predict_log_proba(X))
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 class GaussianNB(BaseNB):
     """
diff --git a/sklearn/neighbors/kde.py b/sklearn/neighbors/kde.py
index be5002e579423..8c7539ae8b77c 100644
--- a/sklearn/neighbors/kde.py
+++ b/sklearn/neighbors/kde.py
@@ -244,3 +244,6 @@ def sample(self, n_samples=1, random_state=None):
             correction = (gammainc(0.5 * dim, 0.5 * s_sq) ** (1. / dim)
                           * self.bandwidth / np.sqrt(s_sq))
             return data[i] + X * correction[:, np.newaxis]
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 6c693f1afba3e..acf6472bcb663 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -81,7 +81,6 @@ def _yield_checks(name, estimator):
     # yield check_estimators_dtypes
     # yield check_fit_score_takes_y
     if tags['supports_sample_weight']:
-        print(name)
         yield check_sample_weights_pandas_series
         yield check_sample_weights_list
         yield check_sample_weights_invariance
@@ -267,6 +266,7 @@ def _yield_all_checks(name, estimator):
     # yield check_dict_unchanged
     # yield check_dont_overwrite_parameters
     # yield check_fit_idempotent
+    yield check_supports_sample_weight_tag
 
 
 def check_estimator(Estimator):
@@ -2467,3 +2467,11 @@ def check_fit_idempotent(name, estimator_orig):
         if hasattr(estimator, method):
             new_result = getattr(estimator, method)(X_test)
             assert_allclose_dense_sparse(result[method], new_result)
+
+def check_supports_sample_weight_tag(name, estimator_orig):
+    # Make sure that the supports_sample_weight tag is correct
+
+    estimator = clone(estimator_orig)
+
+    assert (has_fit_parameter(estimator, 'sample_weight') ==
+            estimator._get_tags()['supports_sample_weight'])

From 7318f48f400b38be870eb512ed67660ff26a6eea Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 3 Apr 2019 15:04:06 -0400
Subject: [PATCH 5/5] uncommented rest of the checks

---
 sklearn/utils/estimator_checks.py | 96 +++++++++++++++----------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index acf6472bcb663..7841f09b81c6d 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -78,39 +78,39 @@ def _safe_tags(estimator, key=None):
 
 def _yield_checks(name, estimator):
     tags = _safe_tags(estimator)
-    # yield check_estimators_dtypes
-    # yield check_fit_score_takes_y
+    yield check_estimators_dtypes
+    yield check_fit_score_takes_y
     if tags['supports_sample_weight']:
         yield check_sample_weights_pandas_series
         yield check_sample_weights_list
         yield check_sample_weights_invariance
-    # yield check_estimators_fit_returns_self
-    # yield partial(check_estimators_fit_returns_self, readonly_memmap=True)
+    yield check_estimators_fit_returns_self
+    yield partial(check_estimators_fit_returns_self, readonly_memmap=True)
 
-    # # Check that all estimator yield informative messages when
-    # # trained on empty datasets
-    # if not tags["no_validation"]:
-    #     yield check_complex_data
-    #     yield check_dtype_object
-    #     yield check_estimators_empty_data_messages
+    # Check that all estimator yield informative messages when
+    # trained on empty datasets
+    if not tags["no_validation"]:
+        yield check_complex_data
+        yield check_dtype_object
+        yield check_estimators_empty_data_messages
 
-    # if name not in CROSS_DECOMPOSITION:
-    #     # cross-decomposition's "transform" returns X and Y
-    #     yield check_pipeline_consistency
+    if name not in CROSS_DECOMPOSITION:
+        # cross-decomposition's "transform" returns X and Y
+        yield check_pipeline_consistency
 
-    # if not tags["allow_nan"] and not tags["no_validation"]:
-    #     # Test that all estimators check their input for NaN's and infs
-    #     yield check_estimators_nan_inf
+    if not tags["allow_nan"] and not tags["no_validation"]:
+        # Test that all estimators check their input for NaN's and infs
+        yield check_estimators_nan_inf
 
-    # yield check_estimators_overwrite_params
-    # if hasattr(estimator, 'sparsify'):
-    #     yield check_sparsify_coefficients
+    yield check_estimators_overwrite_params
+    if hasattr(estimator, 'sparsify'):
+        yield check_sparsify_coefficients
 
-    # yield check_estimator_sparse_data
+    yield check_estimator_sparse_data
 
-    # # Test that estimators can be pickled, and once pickled
-    # # give the same answer as before.
-    # yield check_estimators_pickle
+    # Test that estimators can be pickled, and once pickled
+    # give the same answer as before.
+    yield check_estimators_pickle
 
 
 def _yield_classifier_checks(name, classifier):
@@ -241,31 +241,31 @@ def _yield_all_checks(name, estimator):
 
     for check in _yield_checks(name, estimator):
         yield check
-    # if is_classifier(estimator):
-    #     for check in _yield_classifier_checks(name, estimator):
-    #         yield check
-    # if is_regressor(estimator):
-    #     for check in _yield_regressor_checks(name, estimator):
-    #         yield check
-    # if hasattr(estimator, 'transform'):
-    #     for check in _yield_transformer_checks(name, estimator):
-    #         yield check
-    # if isinstance(estimator, ClusterMixin):
-    #     for check in _yield_clustering_checks(name, estimator):
-    #         yield check
-    # if is_outlier_detector(estimator):
-    #     for check in _yield_outliers_checks(name, estimator):
-    #         yield check
-    # yield check_fit2d_predict1d
-    # yield check_methods_subset_invariance
-    # yield check_fit2d_1sample
-    # yield check_fit2d_1feature
-    # yield check_fit1d
-    # yield check_get_params_invariance
-    # yield check_set_params
-    # yield check_dict_unchanged
-    # yield check_dont_overwrite_parameters
-    # yield check_fit_idempotent
+    if is_classifier(estimator):
+        for check in _yield_classifier_checks(name, estimator):
+            yield check
+    if is_regressor(estimator):
+        for check in _yield_regressor_checks(name, estimator):
+            yield check
+    if hasattr(estimator, 'transform'):
+        for check in _yield_transformer_checks(name, estimator):
+            yield check
+    if isinstance(estimator, ClusterMixin):
+        for check in _yield_clustering_checks(name, estimator):
+            yield check
+    if is_outlier_detector(estimator):
+        for check in _yield_outliers_checks(name, estimator):
+            yield check
+    yield check_fit2d_predict1d
+    yield check_methods_subset_invariance
+    yield check_fit2d_1sample
+    yield check_fit2d_1feature
+    yield check_fit1d
+    yield check_get_params_invariance
+    yield check_set_params
+    yield check_dict_unchanged
+    yield check_dont_overwrite_parameters
+    yield check_fit_idempotent
     yield check_supports_sample_weight_tag