scikit-learn · qinhanmin2014 · Feb 14, 2018 · Feb 14, 2018
diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
@@ -614,10 +614,9 @@ that contain the missing values::
 
     >>> import numpy as np
     >>> from sklearn.preprocessing import Imputer
-    >>> imp = Imputer(missing_values='NaN', strategy='mean')
+    >>> imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
     >>> imp.fit([[1, 2], [np.nan, 3], [7, 6]])
-    Imputer(axis=None, copy=True, missing_values='NaN', strategy='mean',
-        verbose=0)
+    Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)
     >>> X = [[np.nan, 2], [6, np.nan], [7, 6]]
     >>> print(imp.transform(X))                           # doctest: +ELLIPSIS
     [[ 4.          2.        ]
@@ -628,9 +627,9 @@ The :class:`Imputer` class also supports sparse matrices::
 
     >>> import scipy.sparse as sp
     >>> X = sp.csc_matrix([[1, 2], [0, 3], [7, 6]])
-    >>> imp = Imputer(missing_values=0, strategy='mean')
+    >>> imp = Imputer(missing_values=0, strategy='mean', axis=0)
     >>> imp.fit(X)
-    Imputer(axis=None, copy=True, missing_values=0, strategy='mean', verbose=0)
+    Imputer(axis=0, copy=True, missing_values=0, strategy='mean', verbose=0)
     >>> X_test = sp.csc_matrix([[0, 2], [6, 0], [7, 6]])
     >>> print(imp.transform(X_test))                      # doctest: +ELLIPSIS
     [[ 4.          2.        ]

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
@@ -382,12 +382,6 @@ Outlier Detection models
   ``raw_values`` parameter is deprecated as the shifted Mahalanobis distance
   will be always returned in 0.22. :issue:`9015` by `Nicolas Goix`_.
 
-Preprocessing
-
-- Deprecate ``axis`` parameter in :func:`preprocessing.Imputer`.
-  :issue:`10558` by :user:`Baze Petrushev <petrushev>` and
-  :user:`Hanmin Qin <qinhanmin2014>`.
-
 Misc
 
 - Changed warning type from UserWarning to ConvergenceWarning for failing

diff --git a/examples/plot_missing_values.py b/examples/plot_missing_values.py
@@ -65,7 +65,8 @@
 X_missing[np.where(missing_samples)[0], missing_features] = 0
 y_missing = y_full.copy()
 estimator = Pipeline([("imputer", Imputer(missing_values=0,
-                                          strategy="mean")),
+                                          strategy="mean",
+                                          axis=0)),
                       ("forest", RandomForestRegressor(random_state=0,
                                                        n_estimators=100))])
 score = cross_val_score(estimator, X_missing, y_missing).mean()

diff --git a/sklearn/preprocessing/imputation.py b/sklearn/preprocessing/imputation.py
@@ -82,19 +82,12 @@ class Imputer(BaseEstimator, TransformerMixin):
         - If "most_frequent", then replace missing using the most frequent
           value along the axis.
 
-    axis : integer, optional (default=None)
+    axis : integer, optional (default=0)
         The axis along which to impute.
 
         - If `axis=0`, then impute along columns.
         - If `axis=1`, then impute along rows.
 
-        .. deprecated:: 0.20
-           Parameter ``axis`` has been deprecated in 0.20 and will be removed
-           in 0.22. Future (and default) behavior is equivalent to ``axis=0``
-           (impute along columns). Row-wise imputation can be performed with
-           FunctionTransformer (e.g.,
-           ``FunctionTransformer(lambda X: Imputer().fit_transform(X.T).T)``).
-
     verbose : integer, optional (default=0)
         Controls the verbosity of the imputer.
 
@@ -122,7 +115,7 @@ class Imputer(BaseEstimator, TransformerMixin):
       contain missing values).
     """
     def __init__(self, missing_values="NaN", strategy="mean",
-                 axis=None, verbose=0, copy=True):
+                 axis=0, verbose=0, copy=True):
         self.missing_values = missing_values
         self.strategy = strategy
         self.axis = axis
@@ -149,37 +142,27 @@ def fit(self, X, y=None):
                              " got strategy={1}".format(allowed_strategies,
                                                         self.strategy))
 
-        if self.axis is None:
-            self._axis = 0
-        else:
-            warnings.warn("Parameter 'axis' has been deprecated in 0.20 and "
-                          "will be removed in 0.22. Future (and default) "
-                          "behavior is equivalent to 'axis=0' (impute along "
-                          "columns). Row-wise imputation can be performed "
-                          "with FunctionTransformer.", DeprecationWarning)
-            self._axis = self.axis
-
-        if self._axis not in [0, 1]:
+        if self.axis not in [0, 1]:
             raise ValueError("Can only impute missing values on axis 0 and 1, "
-                             " got axis={0}".format(self._axis))
+                             " got axis={0}".format(self.axis))
 
         # Since two different arrays can be provided in fit(X) and
         # transform(X), the imputation data will be computed in transform()
         # when the imputation is done per sample (i.e., when axis=1).
-        if self._axis == 0:
+        if self.axis == 0:
             X = check_array(X, accept_sparse='csc', dtype=np.float64,
                             force_all_finite=False)
 
             if sparse.issparse(X):
                 self.statistics_ = self._sparse_fit(X,
                                                     self.strategy,
                                                     self.missing_values,
-                                                    self._axis)
+                                                    self.axis)
             else:
                 self.statistics_ = self._dense_fit(X,
                                                    self.strategy,
                                                    self.missing_values,
-                                                   self._axis)
+                                                   self.axis)
 
         return self
 
@@ -322,7 +305,7 @@ def transform(self, X):
         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
             The input data to complete.
         """
-        if self._axis == 0:
+        if self.axis == 0:
             check_is_fitted(self, 'statistics_')
             X = check_array(X, accept_sparse='csc', dtype=FLOAT_DTYPES,
                             force_all_finite=False, copy=self.copy)
@@ -342,27 +325,27 @@ def transform(self, X):
                 statistics = self._sparse_fit(X,
                                               self.strategy,
                                               self.missing_values,
-                                              self._axis)
+                                              self.axis)
 
             else:
                 statistics = self._dense_fit(X,
                                              self.strategy,
                                              self.missing_values,
-                                             self._axis)
+                                             self.axis)
 
         # Delete the invalid rows/columns
         invalid_mask = np.isnan(statistics)
         valid_mask = np.logical_not(invalid_mask)
         valid_statistics = statistics[valid_mask]
         valid_statistics_indexes = np.where(valid_mask)[0]
-        missing = np.arange(X.shape[not self._axis])[invalid_mask]
+        missing = np.arange(X.shape[not self.axis])[invalid_mask]
 
-        if self._axis == 0 and invalid_mask.any():
+        if self.axis == 0 and invalid_mask.any():
             if self.verbose:
                 warnings.warn("Deleting features without "
                               "observed values: %s" % missing)
             X = X[:, valid_statistics_indexes]
-        elif self._axis == 1 and invalid_mask.any():
+        elif self.axis == 1 and invalid_mask.any():
             raise ValueError("Some rows only contain "
                              "missing values: %s" % missing)
 
@@ -379,10 +362,10 @@ def transform(self, X):
                 X = X.toarray()
 
             mask = _get_mask(X, self.missing_values)
-            n_missing = np.sum(mask, axis=self._axis)
+            n_missing = np.sum(mask, axis=self.axis)
             values = np.repeat(valid_statistics, n_missing)
 
-            if self._axis == 0:
+            if self.axis == 0:
                 coordinates = np.where(mask.transpose())[::-1]
             else:
                 coordinates = mask

diff --git a/sklearn/preprocessing/tests/test_imputation.py b/sklearn/preprocessing/tests/test_imputation.py
@@ -7,8 +7,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_false
-from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import ignore_warnings
 
 from sklearn.preprocessing.imputation import Imputer
 from sklearn.pipeline import Pipeline
@@ -17,7 +15,6 @@
 from sklearn.random_projection import sparse_random_matrix
 
 
-@ignore_warnings(category=DeprecationWarning)  # To be removed in 0.22
 def _check_statistics(X, X_true,
                       strategy, statistics, missing_values):
     """Utility function for testing imputation for a given strategy.
@@ -301,7 +298,6 @@ def test_imputation_pickle():
         )
 
 
-@ignore_warnings(category=DeprecationWarning)  # To be removed in 0.22
 def test_imputation_copy():
     # Test imputation with copy
     X_orig = sparse_random_matrix(5, 5, density=0.75, random_state=0)
@@ -368,15 +364,3 @@ def test_imputation_copy():
 
     # Note: If X is sparse and if missing_values=0, then a (dense) copy of X is
     # made, even if copy=False.
-
-
-def test_deprecated_imputer_axis():
-    depr_message = ("Parameter 'axis' has been deprecated in 0.20 and will "
-                    "be removed in 0.22. Future (and default) behavior is "
-                    "equivalent to 'axis=0' (impute along columns). Row-wise "
-                    "imputation can be performed with FunctionTransformer.")
-    X = sparse_random_matrix(5, 5, density=0.75, random_state=0)
-    imputer = Imputer(missing_values=0, axis=0)
-    assert_warns_message(DeprecationWarning, depr_message, imputer.fit, X)
-    imputer = Imputer(missing_values=0, axis=1)
-    assert_warns_message(DeprecationWarning, depr_message, imputer.fit, X)