scikit-learn · glemaitre · Feb 22, 2021 · Feb 1, 2021 · Feb 1, 2021 · Feb 3, 2021
diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
@@ -16,7 +16,7 @@
 from ..base import ClassifierMixin, RegressorMixin
 from ..metrics import r2_score, accuracy_score
 from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
-from ..utils import check_random_state, check_array, column_or_1d
+from ..utils import check_random_state, column_or_1d, deprecated
 from ..utils import indices_to_mask
 from ..utils.metaestimators import if_delegate_has_method
 from ..utils.multiclass import check_classification_targets
@@ -287,7 +287,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
             sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
 
         # Remap output
-        n_samples, self.n_features_ = X.shape
+        n_samples = X.shape[0]
         self._n_samples = n_samples
         y = self._validate_y(y)
 
@@ -313,11 +313,11 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
         if isinstance(self.max_features, numbers.Integral):
             max_features = self.max_features
         elif isinstance(self.max_features, float):
-            max_features = self.max_features * self.n_features_
+            max_features = self.max_features * self.n_features_in_
         else:
             raise ValueError("max_features must be int or float")
 
-        if not (0 < max_features <= self.n_features_):
+        if not (0 < max_features <= self.n_features_in_):
             raise ValueError("max_features must be in (0, n_features]")
 
         max_features = max(1, int(max_features))
@@ -408,7 +408,7 @@ def _get_estimators_indices(self):
             # to those in `_parallel_build_estimators()`
             feature_indices, sample_indices = _generate_bagging_indices(
                 seed, self.bootstrap_features, self.bootstrap,
-                self.n_features_, self._n_samples, self._max_features,
+                self.n_features_in_, self._n_samples, self._max_features,
                 self._max_samples)
 
             yield feature_indices, sample_indices
@@ -429,6 +429,16 @@ def estimators_samples_(self):
         return [sample_indices
                 for _, sample_indices in self._get_estimators_indices()]
 
+    # TODO: Remove in 1.2
+    # mypy error: Decorated property not supported
+    @deprecated(  # type: ignore
+        "Attribute n_features_ was deprecated in version 1.0 and will be "
+        "removed in 1.2. Use 'n_features_in_' instead."
+    )
+    @property
+    def n_features_(self):
+        return self.n_features_in_
+
 
 class BaggingClassifier(ClassifierMixin, BaseBagging):
     """A Bagging classifier.
@@ -523,6 +533,10 @@ class BaggingClassifier(ClassifierMixin, BaseBagging):
     n_features_ : int
         The number of features when :meth:`fit` is performed.
 
+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
     estimators_ : list of estimators
         The collection of fitted base estimators.
 
@@ -702,17 +716,11 @@ def predict_proba(self, X):
         """
         check_is_fitted(self)
         # Check data
-        X = check_array(
+        X = self._validate_data(
             X, accept_sparse=['csr', 'csc'], dtype=None,
-            force_all_finite=False
+            force_all_finite=False, reset=False
         )
 
-        if self.n_features_ != X.shape[1]:
-            raise ValueError("Number of features of the model must "
-                             "match the input. Model n_features is {0} and "
-                             "input n_features is {1}."
-                             "".format(self.n_features_, X.shape[1]))
-
         # Parallel loop
         n_jobs, n_estimators, starts = _partition_estimators(self.n_estimators,
                                                              self.n_jobs)
@@ -753,17 +761,11 @@ def predict_log_proba(self, X):
         check_is_fitted(self)
         if hasattr(self.base_estimator_, "predict_log_proba"):
             # Check data
-            X = check_array(
+            X = self._validate_data(
                 X, accept_sparse=['csr', 'csc'], dtype=None,
-                force_all_finite=False
+                force_all_finite=False, reset=False
             )
 
-            if self.n_features_ != X.shape[1]:
-                raise ValueError("Number of features of the model must "
-                                 "match the input. Model n_features is {0} "
-                                 "and input n_features is {1} "
-                                 "".format(self.n_features_, X.shape[1]))
-
             # Parallel loop
             n_jobs, n_estimators, starts = _partition_estimators(
                 self.n_estimators, self.n_jobs)
@@ -811,17 +813,11 @@ def decision_function(self, X):
         check_is_fitted(self)
 
         # Check data
-        X = check_array(
+        X = self._validate_data(
             X, accept_sparse=['csr', 'csc'], dtype=None,
-            force_all_finite=False
+            force_all_finite=False, reset=False
         )
 
-        if self.n_features_ != X.shape[1]:
-            raise ValueError("Number of features of the model must "
-                             "match the input. Model n_features is {0} and "
-                             "input n_features is {1} "
-                             "".format(self.n_features_, X.shape[1]))
-
         # Parallel loop
         n_jobs, n_estimators, starts = _partition_estimators(self.n_estimators,
                                                              self.n_jobs)
@@ -929,6 +925,10 @@ class BaggingRegressor(RegressorMixin, BaseBagging):
     n_features_ : int
         The number of features when :meth:`fit` is performed.
 
+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
     estimators_ : list of estimators
         The collection of fitted sub-estimators.
 
@@ -1024,9 +1024,9 @@ def predict(self, X):
         """
         check_is_fitted(self)
         # Check data
-        X = check_array(
+        X = self._validate_data(
             X, accept_sparse=['csr', 'csc'], dtype=None,
-            force_all_finite=False
+            force_all_finite=False, reset=False
         )
 
         # Parallel loop

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
@@ -57,7 +57,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
 from ..tree import (DecisionTreeClassifier, DecisionTreeRegressor,
                     ExtraTreeClassifier, ExtraTreeRegressor)
 from ..tree._tree import DTYPE, DOUBLE
-from ..utils import check_random_state, check_array, compute_sample_weight
+from ..utils import check_random_state, compute_sample_weight, deprecated
 from ..exceptions import DataConversionWarning
 from ._base import BaseEnsemble, _partition_estimators
 from ..utils.fixes import delayed
@@ -312,9 +312,6 @@ def fit(self, X, y, sample_weight=None):
             # ensemble sorts the indices.
             X.sort_indices()
 
-        # Remap output
-        self.n_features_ = X.shape[1]
-
         y = np.atleast_1d(y)
         if y.ndim == 2 and y.shape[1] == 1:
             warn("A column-vector y was passed when a 1d array was"
@@ -446,7 +443,8 @@ def _compute_oob_predictions(self, X, y):
                 (n_samples, 1, n_outputs)
             The OOB predictions.
       """
-        X = check_array(X, dtype=DTYPE, accept_sparse='csr')
+        X = self._validate_data(X, dtype=DTYPE, accept_sparse='csr',
+                                reset=False)
 
         n_samples = y.shape[0]
         n_outputs = self.n_outputs_
@@ -530,12 +528,22 @@ def feature_importances_(self):
             for tree in self.estimators_ if tree.tree_.node_count > 1)
 
         if not all_importances:
-            return np.zeros(self.n_features_, dtype=np.float64)
+            return np.zeros(self.n_features_in_, dtype=np.float64)
 
         all_importances = np.mean(all_importances,
                                   axis=0, dtype=np.float64)
         return all_importances / np.sum(all_importances)
 
+    # TODO: Remove in 1.2
+    # mypy error: Decorated property not supported
+    @deprecated(  # type: ignore
+        "Attribute n_features_ was deprecated in version 1.0 and will be "
+        "removed in 1.2. Use 'n_features_in_' instead."
+    )
+    @property
+    def n_features_(self):
+        return self.n_features_in_
+
 
 def _accumulate_prediction(predict, X, out, lock):
     """
@@ -1163,6 +1171,10 @@ class labels (multi-output problem).
     n_features_ : int
         The number of features when ``fit`` is performed.
 
+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
     n_outputs_ : int
         The number of outputs when ``fit`` is performed.
 
@@ -1463,6 +1475,10 @@ class RandomForestRegressor(ForestRegressor):
     n_features_ : int
         The number of features when ``fit`` is performed.
 
+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
     n_outputs_ : int
         The number of outputs when ``fit`` is performed.
 
@@ -1783,6 +1799,10 @@ class labels (multi-output problem).
     n_features_ : int
         The number of features when ``fit`` is performed.
 
+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
     n_outputs_ : int
         The number of outputs when ``fit`` is performed.
 
@@ -2068,6 +2088,10 @@ class ExtraTreesRegressor(ForestRegressor):
     n_features_ : int
         The number of features.
 
+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
     n_outputs_ : int
         The number of outputs.
 
@@ -2292,6 +2316,10 @@ class RandomTreesEmbedding(BaseForest):
     n_features_ : int
         The number of features when ``fit`` is performed.
 
+        .. deprecated:: 1.0
+            Attribute `n_features_` was deprecated in version 1.0 and will be
+            removed in 1.2. Use `n_features_in_` instead.
+
     n_outputs_ : int
         The number of outputs when ``fit`` is performed.
 
@@ -2421,7 +2449,7 @@ def fit_transform(self, X, y=None, sample_weight=None):
         X_transformed : sparse matrix of shape (n_samples, n_out)
             Transformed dataset.
         """
-        X = check_array(X, accept_sparse=['csc'])
+        X = self._validate_data(X, accept_sparse=['csc'])
         if issparse(X):
             # Pre-sort indices to avoid that each individual tree of the
             # ensemble sorts the indices.