Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[MRG] revert #10558 Deprecate axis parameter in imputer #10635

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions doc/modules/preprocessing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -614,10 +614,9 @@ that contain the missing values::

>>> import numpy as np
>>> from sklearn.preprocessing import Imputer
>>> imp = Imputer(missing_values='NaN', strategy='mean')
>>> imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
>>> imp.fit([[1, 2], [np.nan, 3], [7, 6]])
Imputer(axis=None, copy=True, missing_values='NaN', strategy='mean',
verbose=0)
Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)
>>> X = [[np.nan, 2], [6, np.nan], [7, 6]]
>>> print(imp.transform(X)) # doctest: +ELLIPSIS
[[ 4. 2. ]
Expand All @@ -628,9 +627,9 @@ The :class:`Imputer` class also supports sparse matrices::

>>> import scipy.sparse as sp
>>> X = sp.csc_matrix([[1, 2], [0, 3], [7, 6]])
>>> imp = Imputer(missing_values=0, strategy='mean')
>>> imp = Imputer(missing_values=0, strategy='mean', axis=0)
>>> imp.fit(X)
Imputer(axis=None, copy=True, missing_values=0, strategy='mean', verbose=0)
Imputer(axis=0, copy=True, missing_values=0, strategy='mean', verbose=0)
>>> X_test = sp.csc_matrix([[0, 2], [6, 0], [7, 6]])
>>> print(imp.transform(X_test)) # doctest: +ELLIPSIS
[[ 4. 2. ]
Expand Down
6 changes: 0 additions & 6 deletions doc/whats_new/v0.20.rst
Original file line number Diff line number Diff line change
Expand Up @@ -382,12 +382,6 @@ Outlier Detection models
``raw_values`` parameter is deprecated as the shifted Mahalanobis distance
will be always returned in 0.22. :issue:`9015` by `Nicolas Goix`_.

Preprocessing

- Deprecate ``axis`` parameter in :func:`preprocessing.Imputer`.
:issue:`10558` by :user:`Baze Petrushev <petrushev>` and
:user:`Hanmin Qin <qinhanmin2014>`.

Misc

- Changed warning type from UserWarning to ConvergenceWarning for failing
Expand Down
3 changes: 2 additions & 1 deletion examples/plot_missing_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@
X_missing[np.where(missing_samples)[0], missing_features] = 0
y_missing = y_full.copy()
estimator = Pipeline([("imputer", Imputer(missing_values=0,
strategy="mean")),
strategy="mean",
axis=0)),
("forest", RandomForestRegressor(random_state=0,
n_estimators=100))])
score = cross_val_score(estimator, X_missing, y_missing).mean()
Expand Down
47 changes: 15 additions & 32 deletions sklearn/preprocessing/imputation.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,12 @@ class Imputer(BaseEstimator, TransformerMixin):
- If "most_frequent", then replace missing using the most frequent
value along the axis.

axis : integer, optional (default=None)
axis : integer, optional (default=0)
The axis along which to impute.

- If `axis=0`, then impute along columns.
- If `axis=1`, then impute along rows.

.. deprecated:: 0.20
Parameter ``axis`` has been deprecated in 0.20 and will be removed
in 0.22. Future (and default) behavior is equivalent to ``axis=0``
(impute along columns). Row-wise imputation can be performed with
FunctionTransformer (e.g.,
``FunctionTransformer(lambda X: Imputer().fit_transform(X.T).T)``).

verbose : integer, optional (default=0)
Controls the verbosity of the imputer.

Expand Down Expand Up @@ -122,7 +115,7 @@ class Imputer(BaseEstimator, TransformerMixin):
contain missing values).
"""
def __init__(self, missing_values="NaN", strategy="mean",
axis=None, verbose=0, copy=True):
axis=0, verbose=0, copy=True):
self.missing_values = missing_values
self.strategy = strategy
self.axis = axis
Expand All @@ -149,37 +142,27 @@ def fit(self, X, y=None):
" got strategy={1}".format(allowed_strategies,
self.strategy))

if self.axis is None:
self._axis = 0
else:
warnings.warn("Parameter 'axis' has been deprecated in 0.20 and "
"will be removed in 0.22. Future (and default) "
"behavior is equivalent to 'axis=0' (impute along "
"columns). Row-wise imputation can be performed "
"with FunctionTransformer.", DeprecationWarning)
self._axis = self.axis

if self._axis not in [0, 1]:
if self.axis not in [0, 1]:
raise ValueError("Can only impute missing values on axis 0 and 1, "
" got axis={0}".format(self._axis))
" got axis={0}".format(self.axis))

# Since two different arrays can be provided in fit(X) and
# transform(X), the imputation data will be computed in transform()
# when the imputation is done per sample (i.e., when axis=1).
if self._axis == 0:
if self.axis == 0:
X = check_array(X, accept_sparse='csc', dtype=np.float64,
force_all_finite=False)

if sparse.issparse(X):
self.statistics_ = self._sparse_fit(X,
self.strategy,
self.missing_values,
self._axis)
self.axis)
else:
self.statistics_ = self._dense_fit(X,
self.strategy,
self.missing_values,
self._axis)
self.axis)

return self

Expand Down Expand Up @@ -322,7 +305,7 @@ def transform(self, X):
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
The input data to complete.
"""
if self._axis == 0:
if self.axis == 0:
check_is_fitted(self, 'statistics_')
X = check_array(X, accept_sparse='csc', dtype=FLOAT_DTYPES,
force_all_finite=False, copy=self.copy)
Expand All @@ -342,27 +325,27 @@ def transform(self, X):
statistics = self._sparse_fit(X,
self.strategy,
self.missing_values,
self._axis)
self.axis)

else:
statistics = self._dense_fit(X,
self.strategy,
self.missing_values,
self._axis)
self.axis)

# Delete the invalid rows/columns
invalid_mask = np.isnan(statistics)
valid_mask = np.logical_not(invalid_mask)
valid_statistics = statistics[valid_mask]
valid_statistics_indexes = np.where(valid_mask)[0]
missing = np.arange(X.shape[not self._axis])[invalid_mask]
missing = np.arange(X.shape[not self.axis])[invalid_mask]

if self._axis == 0 and invalid_mask.any():
if self.axis == 0 and invalid_mask.any():
if self.verbose:
warnings.warn("Deleting features without "
"observed values: %s" % missing)
X = X[:, valid_statistics_indexes]
elif self._axis == 1 and invalid_mask.any():
elif self.axis == 1 and invalid_mask.any():
raise ValueError("Some rows only contain "
"missing values: %s" % missing)

Expand All @@ -379,10 +362,10 @@ def transform(self, X):
X = X.toarray()

mask = _get_mask(X, self.missing_values)
n_missing = np.sum(mask, axis=self._axis)
n_missing = np.sum(mask, axis=self.axis)
values = np.repeat(valid_statistics, n_missing)

if self._axis == 0:
if self.axis == 0:
coordinates = np.where(mask.transpose())[::-1]
else:
coordinates = mask
Expand Down
16 changes: 0 additions & 16 deletions sklearn/preprocessing/tests/test_imputation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_raises
from sklearn.utils.testing import assert_false
from sklearn.utils.testing import assert_warns_message
from sklearn.utils.testing import ignore_warnings

from sklearn.preprocessing.imputation import Imputer
from sklearn.pipeline import Pipeline
Expand All @@ -17,7 +15,6 @@
from sklearn.random_projection import sparse_random_matrix


@ignore_warnings(category=DeprecationWarning) # To be removed in 0.22
def _check_statistics(X, X_true,
strategy, statistics, missing_values):
"""Utility function for testing imputation for a given strategy.
Expand Down Expand Up @@ -301,7 +298,6 @@ def test_imputation_pickle():
)


@ignore_warnings(category=DeprecationWarning) # To be removed in 0.22
def test_imputation_copy():
# Test imputation with copy
X_orig = sparse_random_matrix(5, 5, density=0.75, random_state=0)
Expand Down Expand Up @@ -368,15 +364,3 @@ def test_imputation_copy():

# Note: If X is sparse and if missing_values=0, then a (dense) copy of X is
# made, even if copy=False.


def test_deprecated_imputer_axis():
depr_message = ("Parameter 'axis' has been deprecated in 0.20 and will "
"be removed in 0.22. Future (and default) behavior is "
"equivalent to 'axis=0' (impute along columns). Row-wise "
"imputation can be performed with FunctionTransformer.")
X = sparse_random_matrix(5, 5, density=0.75, random_state=0)
imputer = Imputer(missing_values=0, axis=0)
assert_warns_message(DeprecationWarning, depr_message, imputer.fit, X)
imputer = Imputer(missing_values=0, axis=1)
assert_warns_message(DeprecationWarning, depr_message, imputer.fit, X)