diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 864567dabad11..db6e12cc1b9f3 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -858,6 +858,9 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None): refit_end_time = time.time() self.refit_time_ = refit_end_time - refit_start_time + if hasattr(self.best_estimator_, "feature_names_in_"): + self.feature_names_in_ = self.best_estimator_.feature_names_in_ + # Store the only scorer not as a dict for single metric evaluation self.scorer_ = scorers @@ -1246,11 +1249,21 @@ class GridSearchCV(BaseSearchCV): the underlying estimator is a classifier. n_features_in_ : int - Number of features seen during :term:`fit`. Only defined if the - underlying estimator exposes such an attribute when fit. + Number of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes + `n_features_in_` when fit. .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes + `feature_names_in_` when fit. + + .. versionadded:: 1.0 + Notes ----- The parameters selected are those that maximize the score of the left out @@ -1595,11 +1608,21 @@ class RandomizedSearchCV(BaseSearchCV): the underlying estimator is a classifier. n_features_in_ : int - Number of features seen during :term:`fit`. Only defined if the - underlying estimator exposes such an attribute when fit. + Number of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes + `n_features_in_` when fit. .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes + `feature_names_in_` when fit. + + .. versionadded:: 1.0 + Notes ----- The parameters selected are those that maximize the score of the held-out diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py index ec7692af7f509..a040c0f4e74c3 100644 --- a/sklearn/model_selection/_search_successive_halving.py +++ b/sklearn/model_selection/_search_successive_halving.py @@ -626,11 +626,21 @@ class HalvingGridSearchCV(BaseSuccessiveHalving): the underlying estimator is a classifier. n_features_in_ : int - Number of features seen during :term:`fit`. Only defined if the - underlying estimator exposes such an attribute when fit. + Number of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes + `n_features_in_` when fit. .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes + `feature_names_in_` when fit. + + .. versionadded:: 1.0 + See Also -------- :class:`HalvingRandomSearchCV`: @@ -954,11 +964,21 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving): the underlying estimator is a classifier. n_features_in_ : int - Number of features seen during :term:`fit`. Only defined if the - underlying estimator exposes such an attribute when fit. + Number of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes + `n_features_in_` when fit. .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes + `feature_names_in_` when fit. + + .. versionadded:: 1.0 + See Also -------- :class:`HalvingGridSearchCV`: diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index e9090780f9eb1..f245f940cc943 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -325,11 +325,14 @@ def test_check_n_features_in_after_fitting(estimator): COLUMN_NAME_MODULES_TO_IGNORE = { "compose", - "model_selection", } _estimators_to_test = list( - chain(_tested_estimators(), [make_pipeline(LogisticRegression(C=1))]) + chain( + _tested_estimators(), + [make_pipeline(LogisticRegression(C=1))], + list(_generate_search_cv_instances()), + ) )