From c900dd564bb2e178f8fdd36acbf91f9ea7a8e90d Mon Sep 17 00:00:00 2001
From: petrushev <b.petrushev@gmail.com>
Date: Fri, 1 Sep 2017 15:30:17 +0200
Subject: [PATCH 1/3] Deprecate ``Imputer.axis`` argument

---
 doc/whats_new.rst                   |  5 +++++
 sklearn/preprocessing/imputation.py | 32 +++++++++++++++++++++--------
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 0ca707ce2cbbf..4e728afcb29e3 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -47,6 +47,11 @@ Model evaluation and meta-estimators
 
 - A scorer based on :func:`metrics.brier_score_loss` is also available.
   :issue:`9521` by :user:`Hanmin Qin <qinhanmin2014>`.
+- The ``axis`` parameter in
+  :class:`preprocessing.Imputer <preprocessing.Imputer>` is deprecated. Its
+  removal is planned for 0.22 release. :issue:`9672` by
+  :user:`Baze Petrushev <petrushev>`.
+
 
 Bug fixes
 .........
diff --git a/sklearn/preprocessing/imputation.py b/sklearn/preprocessing/imputation.py
index 12d5425fbf604..048404c11edbb 100644
--- a/sklearn/preprocessing/imputation.py
+++ b/sklearn/preprocessing/imputation.py
@@ -82,12 +82,16 @@ class Imputer(BaseEstimator, TransformerMixin):
         - If "most_frequent", then replace missing using the most frequent
           value along the axis.
 
-    axis : integer, optional (default=0)
+    axis : integer, optional (default=None)
         The axis along which to impute.
 
         - If `axis=0`, then impute along columns.
         - If `axis=1`, then impute along rows.
 
+        .. deprecated:: 0.20
+           ``axis`` will be removed from ``Imputer``, and it will only impute
+           along columns (axis=0) in 0.22.
+
     verbose : integer, optional (default=0)
         Controls the verbosity of the imputer.
 
@@ -115,13 +119,18 @@ class Imputer(BaseEstimator, TransformerMixin):
       contain missing values).
     """
     def __init__(self, missing_values="NaN", strategy="mean",
-                 axis=0, verbose=0, copy=True):
+                 axis=None, verbose=0, copy=True):
         self.missing_values = missing_values
         self.strategy = strategy
-        self.axis = axis
         self.verbose = verbose
         self.copy = copy
 
+        self.axis = axis
+        if axis is not None:
+            warnings.warn("'axis' will be removed from Imputer, and it will "
+                          "only impute along columns (axis=0) in 0.22",
+                          DeprecationWarning)
+
     def fit(self, X, y=None):
         """Fit the imputer on X.
 
@@ -143,14 +152,14 @@ def fit(self, X, y=None):
                              " got strategy={1}".format(allowed_strategies,
                                                         self.strategy))
 
-        if self.axis not in [0, 1]:
+        if self.axis not in [None, 0, 1]:
             raise ValueError("Can only impute missing values on axis 0 and 1, "
                              " got axis={0}".format(self.axis))
 
         # Since two different arrays can be provided in fit(X) and
         # transform(X), the imputation data will be computed in transform()
         # when the imputation is done per sample (i.e., when axis=1).
-        if self.axis == 0:
+        if self.axis == 0 or self.axis is None:
             X = check_array(X, accept_sparse='csc', dtype=np.float64,
                             force_all_finite=False)
 
@@ -169,8 +178,12 @@ def fit(self, X, y=None):
 
     def _sparse_fit(self, X, strategy, missing_values, axis):
         """Fit the transformer on sparse data."""
+        if axis is None:
+            axis = 0
+
         # Imputation is done "by column", so if we want to do it
         # by row we only need to convert the matrix to csr format.
+
         if axis == 1:
             X = X.tocsr()
         else:
@@ -249,6 +262,9 @@ def _sparse_fit(self, X, strategy, missing_values, axis):
 
     def _dense_fit(self, X, strategy, missing_values, axis):
         """Fit the transformer on dense data."""
+        if axis is None:
+            axis = 0
+
         X = check_array(X, force_all_finite=False)
         mask = _get_mask(X, missing_values)
         masked_X = ma.masked_array(X, mask=mask)
@@ -306,7 +322,7 @@ def transform(self, X):
         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
             The input data to complete.
         """
-        if self.axis == 0:
+        if self.axis is None or self.axis == 0:
             check_is_fitted(self, 'statistics_')
             X = check_array(X, accept_sparse='csc', dtype=FLOAT_DTYPES,
                             force_all_finite=False, copy=self.copy)
@@ -341,7 +357,7 @@ def transform(self, X):
         valid_statistics_indexes = np.where(valid_mask)[0]
         missing = np.arange(X.shape[not self.axis])[invalid_mask]
 
-        if self.axis == 0 and invalid_mask.any():
+        if (self.axis is None or self.axis == 0) and invalid_mask.any():
             if self.verbose:
                 warnings.warn("Deleting features without "
                               "observed values: %s" % missing)
@@ -366,7 +382,7 @@ def transform(self, X):
             n_missing = np.sum(mask, axis=self.axis)
             values = np.repeat(valid_statistics, n_missing)
 
-            if self.axis == 0:
+            if self.axis is None or self.axis == 0:
                 coordinates = np.where(mask.transpose())[::-1]
             else:
                 coordinates = mask

From 7d40c5f94383e7a5ee6cd18bf2c156163572d1ca Mon Sep 17 00:00:00 2001
From: petrushev <b.petrushev@gmail.com>
Date: Sun, 3 Sep 2017 21:39:09 +0200
Subject: [PATCH 2/3] Add proxy property `Imputer.axis_` that is used in the
 fit/transform and equals to 0 when axis is None.

---
 doc/whats_new.rst                   |  2 +-
 sklearn/preprocessing/imputation.py | 50 +++++++++++++----------------
 2 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 4e728afcb29e3..120573c1d09c5 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -47,12 +47,12 @@ Model evaluation and meta-estimators
 
 - A scorer based on :func:`metrics.brier_score_loss` is also available.
   :issue:`9521` by :user:`Hanmin Qin <qinhanmin2014>`.
+
 - The ``axis`` parameter in
   :class:`preprocessing.Imputer <preprocessing.Imputer>` is deprecated. Its
   removal is planned for 0.22 release. :issue:`9672` by
   :user:`Baze Petrushev <petrushev>`.
 
-
 Bug fixes
 .........
 
diff --git a/sklearn/preprocessing/imputation.py b/sklearn/preprocessing/imputation.py
index 048404c11edbb..da776067a929b 100644
--- a/sklearn/preprocessing/imputation.py
+++ b/sklearn/preprocessing/imputation.py
@@ -90,7 +90,7 @@ class Imputer(BaseEstimator, TransformerMixin):
 
         .. deprecated:: 0.20
            ``axis`` will be removed from ``Imputer``, and it will only impute
-           along columns (axis=0) in 0.22.
+           along columns (i.e., ``axis=0``) in 0.22.
 
     verbose : integer, optional (default=0)
         Controls the verbosity of the imputer.
@@ -122,15 +122,10 @@ def __init__(self, missing_values="NaN", strategy="mean",
                  axis=None, verbose=0, copy=True):
         self.missing_values = missing_values
         self.strategy = strategy
+        self.axis = axis
         self.verbose = verbose
         self.copy = copy
 
-        self.axis = axis
-        if axis is not None:
-            warnings.warn("'axis' will be removed from Imputer, and it will "
-                          "only impute along columns (axis=0) in 0.22",
-                          DeprecationWarning)
-
     def fit(self, X, y=None):
         """Fit the imputer on X.
 
@@ -152,14 +147,22 @@ def fit(self, X, y=None):
                              " got strategy={1}".format(allowed_strategies,
                                                         self.strategy))
 
-        if self.axis not in [None, 0, 1]:
+        if self.axis is None:
+            self.axis_ = 0
+        else:
+            warnings.warn("'axis' will be removed from Imputer, and it will "
+                          "only impute along columns (axis=0) in 0.22",
+                          DeprecationWarning)
+            self.axis_ = self.axis
+
+        if self.axis_ not in [0, 1]:
             raise ValueError("Can only impute missing values on axis 0 and 1, "
-                             " got axis={0}".format(self.axis))
+                             " got axis={0}".format(self.axis_))
 
         # Since two different arrays can be provided in fit(X) and
         # transform(X), the imputation data will be computed in transform()
         # when the imputation is done per sample (i.e., when axis=1).
-        if self.axis == 0 or self.axis is None:
+        if self.axis_ == 0:
             X = check_array(X, accept_sparse='csc', dtype=np.float64,
                             force_all_finite=False)
 
@@ -167,23 +170,19 @@ def fit(self, X, y=None):
                 self.statistics_ = self._sparse_fit(X,
                                                     self.strategy,
                                                     self.missing_values,
-                                                    self.axis)
+                                                    self.axis_)
             else:
                 self.statistics_ = self._dense_fit(X,
                                                    self.strategy,
                                                    self.missing_values,
-                                                   self.axis)
+                                                   self.axis_)
 
         return self
 
     def _sparse_fit(self, X, strategy, missing_values, axis):
         """Fit the transformer on sparse data."""
-        if axis is None:
-            axis = 0
-
         # Imputation is done "by column", so if we want to do it
         # by row we only need to convert the matrix to csr format.
-
         if axis == 1:
             X = X.tocsr()
         else:
@@ -262,9 +261,6 @@ def _sparse_fit(self, X, strategy, missing_values, axis):
 
     def _dense_fit(self, X, strategy, missing_values, axis):
         """Fit the transformer on dense data."""
-        if axis is None:
-            axis = 0
-
         X = check_array(X, force_all_finite=False)
         mask = _get_mask(X, missing_values)
         masked_X = ma.masked_array(X, mask=mask)
@@ -322,7 +318,7 @@ def transform(self, X):
         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
             The input data to complete.
         """
-        if self.axis is None or self.axis == 0:
+        if self.axis_ == 0:
             check_is_fitted(self, 'statistics_')
             X = check_array(X, accept_sparse='csc', dtype=FLOAT_DTYPES,
                             force_all_finite=False, copy=self.copy)
@@ -342,27 +338,27 @@ def transform(self, X):
                 statistics = self._sparse_fit(X,
                                               self.strategy,
                                               self.missing_values,
-                                              self.axis)
+                                              self.axis_)
 
             else:
                 statistics = self._dense_fit(X,
                                              self.strategy,
                                              self.missing_values,
-                                             self.axis)
+                                             self.axis_)
 
         # Delete the invalid rows/columns
         invalid_mask = np.isnan(statistics)
         valid_mask = np.logical_not(invalid_mask)
         valid_statistics = statistics[valid_mask]
         valid_statistics_indexes = np.where(valid_mask)[0]
-        missing = np.arange(X.shape[not self.axis])[invalid_mask]
+        missing = np.arange(X.shape[not self.axis_])[invalid_mask]
 
-        if (self.axis is None or self.axis == 0) and invalid_mask.any():
+        if self.axis_ == 0 and invalid_mask.any():
             if self.verbose:
                 warnings.warn("Deleting features without "
                               "observed values: %s" % missing)
             X = X[:, valid_statistics_indexes]
-        elif self.axis == 1 and invalid_mask.any():
+        elif self.axis_ == 1 and invalid_mask.any():
             raise ValueError("Some rows only contain "
                              "missing values: %s" % missing)
 
@@ -379,10 +375,10 @@ def transform(self, X):
                 X = X.toarray()
 
             mask = _get_mask(X, self.missing_values)
-            n_missing = np.sum(mask, axis=self.axis)
+            n_missing = np.sum(mask, axis=self.axis_)
             values = np.repeat(valid_statistics, n_missing)
 
-            if self.axis is None or self.axis == 0:
+            if self.axis_ == 0:
                 coordinates = np.where(mask.transpose())[::-1]
             else:
                 coordinates = mask

From 7f88b5fe3c4d969d329111d1f50ac344f5148606 Mon Sep 17 00:00:00 2001
From: petrushev <b.petrushev@gmail.com>
Date: Tue, 5 Sep 2017 22:40:26 +0200
Subject: [PATCH 3/3] Switch to private proxy property `Imputer._axis`

---
 sklearn/preprocessing/imputation.py | 30 ++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/sklearn/preprocessing/imputation.py b/sklearn/preprocessing/imputation.py
index da776067a929b..881ccc31aed97 100644
--- a/sklearn/preprocessing/imputation.py
+++ b/sklearn/preprocessing/imputation.py
@@ -148,21 +148,21 @@ def fit(self, X, y=None):
                                                         self.strategy))
 
         if self.axis is None:
-            self.axis_ = 0
+            self._axis = 0
         else:
             warnings.warn("'axis' will be removed from Imputer, and it will "
                           "only impute along columns (axis=0) in 0.22",
                           DeprecationWarning)
-            self.axis_ = self.axis
+            self._axis = self.axis
 
-        if self.axis_ not in [0, 1]:
+        if self._axis not in [0, 1]:
             raise ValueError("Can only impute missing values on axis 0 and 1, "
-                             " got axis={0}".format(self.axis_))
+                             " got axis={0}".format(self._axis))
 
         # Since two different arrays can be provided in fit(X) and
         # transform(X), the imputation data will be computed in transform()
         # when the imputation is done per sample (i.e., when axis=1).
-        if self.axis_ == 0:
+        if self._axis == 0:
             X = check_array(X, accept_sparse='csc', dtype=np.float64,
                             force_all_finite=False)
 
@@ -170,12 +170,12 @@ def fit(self, X, y=None):
                 self.statistics_ = self._sparse_fit(X,
                                                     self.strategy,
                                                     self.missing_values,
-                                                    self.axis_)
+                                                    self._axis)
             else:
                 self.statistics_ = self._dense_fit(X,
                                                    self.strategy,
                                                    self.missing_values,
-                                                   self.axis_)
+                                                   self._axis)
 
         return self
 
@@ -318,7 +318,7 @@ def transform(self, X):
         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
             The input data to complete.
         """
-        if self.axis_ == 0:
+        if self._axis == 0:
             check_is_fitted(self, 'statistics_')
             X = check_array(X, accept_sparse='csc', dtype=FLOAT_DTYPES,
                             force_all_finite=False, copy=self.copy)
@@ -338,27 +338,27 @@ def transform(self, X):
                 statistics = self._sparse_fit(X,
                                               self.strategy,
                                               self.missing_values,
-                                              self.axis_)
+                                              self._axis)
 
             else:
                 statistics = self._dense_fit(X,
                                              self.strategy,
                                              self.missing_values,
-                                             self.axis_)
+                                             self._axis)
 
         # Delete the invalid rows/columns
         invalid_mask = np.isnan(statistics)
         valid_mask = np.logical_not(invalid_mask)
         valid_statistics = statistics[valid_mask]
         valid_statistics_indexes = np.where(valid_mask)[0]
-        missing = np.arange(X.shape[not self.axis_])[invalid_mask]
+        missing = np.arange(X.shape[not self._axis])[invalid_mask]
 
-        if self.axis_ == 0 and invalid_mask.any():
+        if self._axis == 0 and invalid_mask.any():
             if self.verbose:
                 warnings.warn("Deleting features without "
                               "observed values: %s" % missing)
             X = X[:, valid_statistics_indexes]
-        elif self.axis_ == 1 and invalid_mask.any():
+        elif self._axis == 1 and invalid_mask.any():
             raise ValueError("Some rows only contain "
                              "missing values: %s" % missing)
 
@@ -375,10 +375,10 @@ def transform(self, X):
                 X = X.toarray()
 
             mask = _get_mask(X, self.missing_values)
-            n_missing = np.sum(mask, axis=self.axis_)
+            n_missing = np.sum(mask, axis=self._axis)
             values = np.repeat(valid_statistics, n_missing)
 
-            if self.axis_ == 0:
+            if self._axis == 0:
                 coordinates = np.where(mask.transpose())[::-1]
             else:
                 coordinates = mask