From 6d4c2ecaf7fc67dccb7e983c8f529c8246ef960c Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Thu, 19 Aug 2021 22:11:36 -0400 Subject: [PATCH 1/6] ENH Adds feature_names_in to pipeline and multiclass --- sklearn/base.py | 2 +- sklearn/multiclass.py | 25 +++++++++++++++++++++++++ sklearn/pipeline.py | 11 +++++++++++ sklearn/tests/test_common.py | 10 ++++++---- 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index a585b2b06c394..31ac305ae5cbe 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -571,7 +571,7 @@ def _validate_data( X, y = check_X_y(X, y, **check_params) out = X, y - if not no_val_X and check_params.get("ensure_2d", True): + if not no_val_X: self._check_n_features(X, reset=reset) return out diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index 99d5c5d07985d..6da62d93ef990 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -269,6 +269,12 @@ class OneVsRestClassifier( .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (n_features_in_,) + Names of features seen during :term:`fit`. Only defined if the + underlying estimator exposes such an attribute when fit. + + .. versionadded:: 1.0 + Examples -------- >>> import numpy as np @@ -342,6 +348,8 @@ def fit(self, X, y): if hasattr(self.estimators_[0], "n_features_in_"): self.n_features_in_ = self.estimators_[0].n_features_in_ + if hasattr(self.estimators_[0], "feature_names_in_"): + self.feature_names_in_ = self.estimators_[0].feature_names_in_ return self @@ -664,6 +672,12 @@ class OneVsOneClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (n_features_in_,) + Names of features seen during :term:`fit`. Only defined if the + underlying estimator exposes such an attribute when fit. + + .. versionadded:: 1.0 + Examples -------- >>> from sklearn.datasets import load_iris @@ -795,6 +809,8 @@ def partial_fit(self, X, y, classes=None): if hasattr(self.estimators_[0], "n_features_in_"): self.n_features_in_ = self.estimators_[0].n_features_in_ + if hasattr(self.estimators_[0], "feature_names_in_"): + self.feature_names_in_ = self.estimators_[0].feature_names_in_ return self @@ -842,6 +858,7 @@ def decision_function(self, X): scikit-learn conventions for binary classification. """ check_is_fitted(self) + self._check_feature_names(X, reset=False) indices = self.pairwise_indices_ if indices is None: @@ -936,6 +953,12 @@ class OutputCodeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (n_features_in_,) + Names of features seen during :term:`fit`. Only defined if the + underlying estimator exposes such an attribute when fit. + + .. versionadded:: 1.0 + Examples -------- >>> from sklearn.multiclass import OutputCodeClassifier @@ -1032,6 +1055,8 @@ def fit(self, X, y): if hasattr(self.estimators_[0], "n_features_in_"): self.n_features_in_ = self.estimators_[0].n_features_in_ + if hasattr(self.estimators_[0], "feature_names_in_"): + self.feature_names_in_ = self.estimators_[0].feature_names_in_ return self diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 0814632721ba4..1e2a524e2962d 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -94,6 +94,12 @@ class Pipeline(_BaseComposition): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (n_features_in_,) + Names of features seen during :term:`fit`. Only defined if the + underlying estimator exposes such an attribute when fit. + + .. versionadded:: 1.0 + See Also -------- make_pipeline : Convenience function for simplified pipeline construction. @@ -657,6 +663,11 @@ def n_features_in_(self): # delegate to first step (which will call _check_is_fitted) return self.steps[0][1].n_features_in_ + @property + def feature_names_in_(self): + # delegate to first step (which will call _check_is_fitted) + return self.steps[0][1].feature_names_in_ + def _sk_visual_block_(self): _, estimators = zip(*self.steps) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 008bdee7e646b..f742fffaba6b6 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -12,7 +12,7 @@ import re import pkgutil from inspect import isgenerator, signature -from itertools import product +from itertools import product, chain from functools import partial import pytest @@ -329,16 +329,18 @@ def test_check_n_features_in_after_fitting(estimator): "feature_extraction", "kernel_approximation", "model_selection", - "multiclass", "multioutput", - "pipeline", "semi_supervised", } +_estimators_to_test = list( + chain(_tested_estimators(), [make_pipeline(LogisticRegression(C=1))]) +) + column_name_estimators = [ est - for est in _tested_estimators() + for est in _estimators_to_test if est.__module__.split(".")[1] not in COLUMN_NAME_MODULES_TO_IGNORE ] From ab1419d82ba72f406aa6dd6f34223cbd3b566d9f Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Thu, 19 Aug 2021 22:19:43 -0400 Subject: [PATCH 2/6] REV Reduce diff --- sklearn/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/base.py b/sklearn/base.py index 31ac305ae5cbe..a585b2b06c394 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -571,7 +571,7 @@ def _validate_data( X, y = check_X_y(X, y, **check_params) out = X, y - if not no_val_X: + if not no_val_X and check_params.get("ensure_2d", True): self._check_n_features(X, reset=reset) return out From e13adf0a97597760b9bab296bf4432f2ac3ae54f Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Thu, 19 Aug 2021 23:45:35 -0400 Subject: [PATCH 3/6] DOC Fix sphinx error --- sklearn/multiclass.py | 6 +++--- sklearn/pipeline.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index 6da62d93ef990..a90e7c950adc8 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -269,7 +269,7 @@ class OneVsRestClassifier( .. versionadded:: 0.24 - feature_names_in_ : ndarray of shape (n_features_in_,) + feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Only defined if the underlying estimator exposes such an attribute when fit. @@ -672,7 +672,7 @@ class OneVsOneClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator): .. versionadded:: 0.24 - feature_names_in_ : ndarray of shape (n_features_in_,) + feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Only defined if the underlying estimator exposes such an attribute when fit. @@ -953,7 +953,7 @@ class OutputCodeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator): .. versionadded:: 0.24 - feature_names_in_ : ndarray of shape (n_features_in_,) + feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Only defined if the underlying estimator exposes such an attribute when fit. diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 1e2a524e2962d..3f8caada92c60 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -94,7 +94,7 @@ class Pipeline(_BaseComposition): .. versionadded:: 0.24 - feature_names_in_ : ndarray of shape (n_features_in_,) + feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Only defined if the underlying estimator exposes such an attribute when fit. From f45825c39a47928210043b8192e8f35259fd5eea Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 20 Aug 2021 11:40:34 -0400 Subject: [PATCH 4/6] CLN Removes delgation code --- sklearn/multiclass.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index a90e7c950adc8..1f71090524a42 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -673,8 +673,8 @@ class OneVsOneClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator): .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) - Names of features seen during :term:`fit`. Only defined if the - underlying estimator exposes such an attribute when fit. + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are strings. .. versionadded:: 1.0 @@ -809,8 +809,6 @@ def partial_fit(self, X, y, classes=None): if hasattr(self.estimators_[0], "n_features_in_"): self.n_features_in_ = self.estimators_[0].n_features_in_ - if hasattr(self.estimators_[0], "feature_names_in_"): - self.feature_names_in_ = self.estimators_[0].feature_names_in_ return self From 48e3463ad3d4e2b6cfde22476d31dea0f29c47c3 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 20 Aug 2021 12:04:34 -0400 Subject: [PATCH 5/6] DOC Adds ALL to docstring --- sklearn/multiclass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index 1f71090524a42..8b1045af2d4ff 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -674,7 +674,7 @@ class OneVsOneClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator): feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` - has feature names that are strings. + has feature names that are all strings. .. versionadded:: 1.0 From c7ccfc93d1d10fccaa97354314f188e191a9927e Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 23 Aug 2021 14:34:02 +0200 Subject: [PATCH 6/6] Avoid duplicate call to _check_partial_fit_first_call in partial_fit --- sklearn/multiclass.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index 8b1045af2d4ff..a746d75164c06 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -776,7 +776,8 @@ def partial_fit(self, X, y, classes=None): ------- self """ - if _check_partial_fit_first_call(self, classes): + first_call = _check_partial_fit_first_call(self, classes) + if first_call: self.estimators_ = [ clone(self.estimator) for _ in range(self.n_classes_ * (self.n_classes_ - 1) // 2) @@ -794,7 +795,7 @@ def partial_fit(self, X, y, classes=None): y, accept_sparse=["csr", "csc"], force_all_finite=False, - reset=_check_partial_fit_first_call(self, classes), + reset=first_call, ) check_classification_targets(y) combinations = itertools.combinations(range(self.n_classes_), 2)