From ba18d23712d0889fb6b0ac96904b4ea6efa10cd5 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 26 Aug 2021 16:47:35 +0200 Subject: [PATCH 1/3] Check feature_names_in_ for sklearn.model_selection --- sklearn/model_selection/_search.py | 31 ++++++++++++++++--- .../_search_successive_halving.py | 28 ++++++++++++++--- sklearn/tests/test_common.py | 7 +++-- 3 files changed, 56 insertions(+), 10 deletions(-) diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 864567dabad11..e07d716187c8f 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -858,6 +858,9 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None): refit_end_time = time.time() self.refit_time_ = refit_end_time - refit_start_time + if hasattr(self.best_estimator_, "feature_names_in_"): + self.feature_names_in_ = self.best_estimator_.feature_names_in_ + # Store the only scorer not as a dict for single metric evaluation self.scorer_ = scorers @@ -1246,11 +1249,21 @@ class GridSearchCV(BaseSearchCV): the underlying estimator is a classifier. n_features_in_ : int - Number of features seen during :term:`fit`. Only defined if the - underlying estimator exposes such an attribute when fit. + Number of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes such an + attribute when fit. .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes such an + attribute when fit. + + .. versionadded:: 1.0 + Notes ----- The parameters selected are those that maximize the score of the left out @@ -1595,11 +1608,21 @@ class RandomizedSearchCV(BaseSearchCV): the underlying estimator is a classifier. n_features_in_ : int - Number of features seen during :term:`fit`. Only defined if the - underlying estimator exposes such an attribute when fit. + Number of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes such an + attribute when fit. .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes such an + attribute when fit. + + .. versionadded:: 1.0 + Notes ----- The parameters selected are those that maximize the score of the held-out diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py index ec7692af7f509..349dd185450c6 100644 --- a/sklearn/model_selection/_search_successive_halving.py +++ b/sklearn/model_selection/_search_successive_halving.py @@ -626,11 +626,21 @@ class HalvingGridSearchCV(BaseSuccessiveHalving): the underlying estimator is a classifier. n_features_in_ : int - Number of features seen during :term:`fit`. Only defined if the - underlying estimator exposes such an attribute when fit. + Number of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes such an + attribute when fit. .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes such an + attribute when fit. + + .. versionadded:: 1.0 + See Also -------- :class:`HalvingRandomSearchCV`: @@ -954,11 +964,21 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving): the underlying estimator is a classifier. n_features_in_ : int - Number of features seen during :term:`fit`. Only defined if the - underlying estimator exposes such an attribute when fit. + Number of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes such an + attribute when fit. .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes such an + attribute when fit. + + .. versionadded:: 1.0 + See Also -------- :class:`HalvingGridSearchCV`: diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index ea32a6de37126..201707cc1431a 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -327,12 +327,15 @@ def test_check_n_features_in_after_fitting(estimator): "compose", "feature_extraction", "kernel_approximation", - "model_selection", "multioutput", } _estimators_to_test = list( - chain(_tested_estimators(), [make_pipeline(LogisticRegression(C=1))]) + chain( + _tested_estimators(), + [make_pipeline(LogisticRegression(C=1))], + list(_generate_search_cv_instances()), + ) ) From 1ea3dd9ea5b8d48c83be08b25f21b53fdfa05ed1 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 26 Aug 2021 17:08:05 +0200 Subject: [PATCH 2/3] Simpligy phrasing --- .../model_selection/_search_successive_halving.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py index 349dd185450c6..cf4f65bfd26a0 100644 --- a/sklearn/model_selection/_search_successive_halving.py +++ b/sklearn/model_selection/_search_successive_halving.py @@ -628,16 +628,14 @@ class HalvingGridSearchCV(BaseSuccessiveHalving): n_features_in_ : int Number of features seen during :term:`fit`. Only defined if `best_estimator_` is defined (see the documentation for the `refit` - parameter for more details) and that `best_estimator_` exposes such an - attribute when fit. + parameter for more details) and exposes such an attribute when fit. .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Only defined if `best_estimator_` is defined (see the documentation for the `refit` - parameter for more details) and that `best_estimator_` exposes such an - attribute when fit. + parameter for more details) and exposes such an attribute when fit. .. versionadded:: 1.0 @@ -966,16 +964,14 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving): n_features_in_ : int Number of features seen during :term:`fit`. Only defined if `best_estimator_` is defined (see the documentation for the `refit` - parameter for more details) and that `best_estimator_` exposes such an - attribute when fit. + parameter for more details) and exposes such an attribute when fit. .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Only defined if `best_estimator_` is defined (see the documentation for the `refit` - parameter for more details) and that `best_estimator_` exposes such an - attribute when fit. + parameter for more details) and exposes such an attribute when fit. .. versionadded:: 1.0 From fdf189b575c26b21d2a3d96c4219ead064de86af Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 26 Aug 2021 22:26:04 +0200 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Thomas J. Fan --- sklearn/model_selection/_search.py | 16 ++++++++-------- .../_search_successive_halving.py | 12 ++++++++---- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index e07d716187c8f..db6e12cc1b9f3 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -1251,16 +1251,16 @@ class GridSearchCV(BaseSearchCV): n_features_in_ : int Number of features seen during :term:`fit`. Only defined if `best_estimator_` is defined (see the documentation for the `refit` - parameter for more details) and that `best_estimator_` exposes such an - attribute when fit. + parameter for more details) and that `best_estimator_` exposes + `n_features_in_` when fit. .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Only defined if `best_estimator_` is defined (see the documentation for the `refit` - parameter for more details) and that `best_estimator_` exposes such an - attribute when fit. + parameter for more details) and that `best_estimator_` exposes + `feature_names_in_` when fit. .. versionadded:: 1.0 @@ -1610,16 +1610,16 @@ class RandomizedSearchCV(BaseSearchCV): n_features_in_ : int Number of features seen during :term:`fit`. Only defined if `best_estimator_` is defined (see the documentation for the `refit` - parameter for more details) and that `best_estimator_` exposes such an - attribute when fit. + parameter for more details) and that `best_estimator_` exposes + `n_features_in_` when fit. .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Only defined if `best_estimator_` is defined (see the documentation for the `refit` - parameter for more details) and that `best_estimator_` exposes such an - attribute when fit. + parameter for more details) and that `best_estimator_` exposes + `feature_names_in_` when fit. .. versionadded:: 1.0 diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py index cf4f65bfd26a0..a040c0f4e74c3 100644 --- a/sklearn/model_selection/_search_successive_halving.py +++ b/sklearn/model_selection/_search_successive_halving.py @@ -628,14 +628,16 @@ class HalvingGridSearchCV(BaseSuccessiveHalving): n_features_in_ : int Number of features seen during :term:`fit`. Only defined if `best_estimator_` is defined (see the documentation for the `refit` - parameter for more details) and exposes such an attribute when fit. + parameter for more details) and that `best_estimator_` exposes + `n_features_in_` when fit. .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Only defined if `best_estimator_` is defined (see the documentation for the `refit` - parameter for more details) and exposes such an attribute when fit. + parameter for more details) and that `best_estimator_` exposes + `feature_names_in_` when fit. .. versionadded:: 1.0 @@ -964,14 +966,16 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving): n_features_in_ : int Number of features seen during :term:`fit`. Only defined if `best_estimator_` is defined (see the documentation for the `refit` - parameter for more details) and exposes such an attribute when fit. + parameter for more details) and that `best_estimator_` exposes + `n_features_in_` when fit. .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Only defined if `best_estimator_` is defined (see the documentation for the `refit` - parameter for more details) and exposes such an attribute when fit. + parameter for more details) and that `best_estimator_` exposes + `feature_names_in_` when fit. .. versionadded:: 1.0