From e996b4fdbadd38070a5f88fb06999be9b042fd43 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 15 Jan 2023 16:32:40 +0100 Subject: [PATCH 1/5] Fix errors in get_feature_names_out --- doc/whats_new/v1.3.rst | 6 +++++- sklearn/feature_extraction/_dict_vectorizer.py | 2 ++ sklearn/impute/_base.py | 1 + sklearn/preprocessing/_discretization.py | 1 + sklearn/preprocessing/_polynomial.py | 1 + sklearn/tests/test_common.py | 4 ---- 6 files changed, 10 insertions(+), 5 deletions(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index e6911d90a7d77..c6ced865bc189 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -41,8 +41,10 @@ Changes impacting all modules raises a `NotFittedError` if the instance is not fitted. This ensures the error is consistent in all estimators with the `get_feature_names_out` method. + - :class:`impute.MissingIndicator` - :class:`kernel_approximation.AdditiveChi2Sampler` - :class:`preprocessing.Binarizer` + - :class:`preprocessing.KBinsDiscretizer` - :class:`preprocessing.MaxAbsScaler` - :class:`preprocessing.MinMaxScaler` - :class:`preprocessing.Normalizer` @@ -50,14 +52,16 @@ Changes impacting all modules - :class:`preprocessing.PowerTransformer` - :class:`preprocessing.QuantileTransformer` - :class:`preprocessing.RobustScaler` + - :class:`preprocessing.SplineTransformer` - :class:`preprocessing.StandardScaler` + - :class:`feature_extraction.DictVectorizer` - :class:`feature_extraction.text.TfidfTransformer` The `NotFittedError` displays an informative message asking to fit the instance with the appropriate arguments. :pr:`25294` by :user:`John Pangas ` and :pr:`25291` by - :user:`Rahil Parikh `. + :user:`Rahil Parikh ` and :pr:`25294` by :user:`Alex Buzenet `. Changelog --------- diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index 4bd1694270a55..f2381cf2de85d 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -12,6 +12,7 @@ from ..base import BaseEstimator, TransformerMixin from ..utils import check_array +from ..utils.validation import check_is_fitted class DictVectorizer(TransformerMixin, BaseEstimator): @@ -384,6 +385,7 @@ def get_feature_names_out(self, input_features=None): feature_names_out : ndarray of str objects Transformed feature names. """ + check_is_fitted(self, "n_features_in_") if any(not isinstance(name, str) for name in self.feature_names_): feature_names = [str(name) for name in self.feature_names_] else: diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index ab92e839718df..cc57a64d34a3b 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -1053,6 +1053,7 @@ def get_feature_names_out(self, input_features=None): feature_names_out : ndarray of str objects Transformed feature names. """ + check_is_fitted(self, "n_features_in_") input_features = _check_feature_names_in(self, input_features) prefix = self.__class__.__name__.lower() return np.asarray( diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py index 01a1509b5d770..abc5de750969e 100644 --- a/sklearn/preprocessing/_discretization.py +++ b/sklearn/preprocessing/_discretization.py @@ -448,6 +448,7 @@ def get_feature_names_out(self, input_features=None): feature_names_out : ndarray of str objects Transformed feature names. """ + check_is_fitted(self, "n_features_in_") input_features = _check_feature_names_in(self, input_features) if hasattr(self, "_encoder"): return self._encoder.get_feature_names_out(input_features) diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py index 3e1dd4f6602b3..ad60dfc7c37b7 100644 --- a/sklearn/preprocessing/_polynomial.py +++ b/sklearn/preprocessing/_polynomial.py @@ -673,6 +673,7 @@ def get_feature_names_out(self, input_features=None): feature_names_out : ndarray of str objects Transformed feature names. """ + check_is_fitted(self, "n_features_in_") n_splines = self.bsplines_[0].c.shape[0] input_features = _check_feature_names_in(self, input_features) feature_names = [] diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index c7377cc1d0227..89b25496f5224 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -463,14 +463,11 @@ def test_transformers_get_feature_names_out(transformer): ] WHITELISTED_FAILING_ESTIMATORS = [ - "DictVectorizer", "GaussianRandomProjection", "GenericUnivariateSelect", "IterativeImputer", "IsotonicRegression", - "KBinsDiscretizer", "KNNImputer", - "MissingIndicator", "RFE", "RFECV", "SelectFdr", @@ -482,7 +479,6 @@ def test_transformers_get_feature_names_out(transformer): "SequentialFeatureSelector", "SimpleImputer", "SparseRandomProjection", - "SplineTransformer", "StackingClassifier", "StackingRegressor", "VarianceThreshold", From 3094b0a95fbc9dbaed02f5e777f16ec3d0e938c1 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 15 Jan 2023 16:46:10 +0100 Subject: [PATCH 2/5] update change-log --- doc/whats_new/v1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index c6ced865bc189..0443c82ca3779 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -61,7 +61,7 @@ Changes impacting all modules with the appropriate arguments. :pr:`25294` by :user:`John Pangas ` and :pr:`25291` by - :user:`Rahil Parikh ` and :pr:`25294` by :user:`Alex Buzenet `. + :user:`Rahil Parikh ` and :pr:`25402` by :user:`Alex Buzenet `. Changelog --------- From 1701044fae4b128be714d3aeed47462b04ced0ae Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 17 Jan 2023 00:04:55 +0100 Subject: [PATCH 3/5] Fix DictVectorizer --- sklearn/feature_extraction/_dict_vectorizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index f2381cf2de85d..b51ccceaac9d1 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -385,7 +385,7 @@ def get_feature_names_out(self, input_features=None): feature_names_out : ndarray of str objects Transformed feature names. """ - check_is_fitted(self, "n_features_in_") + check_is_fitted(self, "feature_names_") if any(not isinstance(name, str) for name in self.feature_names_): feature_names = [str(name) for name in self.feature_names_] else: From 915f63b6821712c86e3552a8e37532045dd29d88 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 18 Jan 2023 16:16:43 +0100 Subject: [PATCH 4/5] update changelog --- doc/whats_new/v1.3.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index b85c31c7adb2c..93863eeffbbf8 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -42,10 +42,6 @@ Changes impacting all modules consistent in all estimators with the `get_feature_names_out` method. - :class:`impute.MissingIndicator` - - :class:`ensemble.StackingClassifier` - - :class:`ensemble.StackingRegressor` - - :class:`ensemble.VotingClassifier` - - :class:`ensemble.VotingRegressor` - :class:`feature_extraction.DictVectorizer` - :class:`feature_extraction.text.TfidfTransformer` - :class:`kernel_approximation.AdditiveChi2Sampler` From cbafeb948bcf50b09bef0ef24013e3a051d50691 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 18 Jan 2023 17:11:18 +0100 Subject: [PATCH 5/5] fix error in SimpleImputer --- sklearn/impute/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index 37498ae1cb074..95ba89bc35915 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -709,6 +709,7 @@ def get_feature_names_out(self, input_features=None): feature_names_out : ndarray of str objects Transformed feature names. """ + check_is_fitted(self, "n_features_in_") input_features = _check_feature_names_in(self, input_features) non_missing_mask = np.logical_not(_get_mask(self.statistics_, np.nan)) names = input_features[non_missing_mask]