Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[MRG+2] ENH Passthrough DataFrame in FunctionTransformer #11043

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jul 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats_new/v0.20.rst
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,12 @@ Misc
indices should be rejected.
:issue:`11327` by :user:`Karan Dhingra <kdhingra307>` and `Joel Nothman`_.

Preprocessing

- In :class:`preprocessing.FunctionTransformer`, the default of ``validate``
will be from ``True`` to ``False`` in 0.22.
:issue:`10655` by :user:`Guillaume Lemaitre <glemaitre>`.

Changes to estimator checks
---------------------------

Expand Down
35 changes: 26 additions & 9 deletions sklearn/preprocessing/_function_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,16 @@ class FunctionTransformer(BaseEstimator, TransformerMixin):

validate : bool, optional default=True
Indicate that the input X array should be checked before calling
func. If validate is false, there will be no input validation.
If it is true, then X will be converted to a 2-dimensional NumPy
array or sparse matrix. If this conversion is not possible or X
contains NaN or infinity, an exception is raised.
``func``. The possibilities are:

- If False, there is no input validation.
- If True, then X will be converted to a 2-dimensional NumPy array or
sparse matrix. If the conversion is not possible an exception is
raised.

.. deprecated:: 0.20
``validate=True`` as default will be replaced by
``validate=False`` in 0.22.

accept_sparse : boolean, optional
Indicate that func accepts a sparse matrix as input. If validate is
Expand All @@ -72,7 +78,7 @@ class FunctionTransformer(BaseEstimator, TransformerMixin):
Dictionary of additional keyword arguments to pass to inverse_func.

"""
def __init__(self, func=None, inverse_func=None, validate=True,
def __init__(self, func=None, inverse_func=None, validate=None,
accept_sparse=False, pass_y='deprecated', check_inverse=True,
kw_args=None, inv_kw_args=None):
self.func = func
Expand All @@ -84,6 +90,19 @@ def __init__(self, func=None, inverse_func=None, validate=True,
self.kw_args = kw_args
self.inv_kw_args = inv_kw_args

def _check_input(self, X):
# FIXME: Future warning to be removed in 0.22
if self.validate is None:
self._validate = True
warnings.warn("The default validate=True will be replaced by "
"validate=False in 0.22.", FutureWarning)
else:
self._validate = self.validate

if self._validate:
return check_array(X, accept_sparse=self.accept_sparse)
return X

def _check_inverse_transform(self, X):
"""Check that func and inverse_func are the inverse."""
idx_selected = slice(None, None, max(1, X.shape[0] // 100))
Expand Down Expand Up @@ -111,8 +130,7 @@ def fit(self, X, y=None):
-------
self
"""
if self.validate:
X = check_array(X, self.accept_sparse)
X = self._check_input(X)
if (self.check_inverse and not (self.func is None or
self.inverse_func is None)):
self._check_inverse_transform(X)
Expand Down Expand Up @@ -165,8 +183,7 @@ def inverse_transform(self, X, y='deprecated'):
kw_args=self.inv_kw_args)

def _transform(self, X, y=None, func=None, kw_args=None):
if self.validate:
X = check_array(X, self.accept_sparse)
X = self._check_input(X)

if func is None:
func = _identity
Expand Down
33 changes: 29 additions & 4 deletions sklearn/preprocessing/tests/test_function_transformer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pytest
import numpy as np
from scipy import sparse

Expand Down Expand Up @@ -145,7 +146,8 @@ def test_check_inverse():
trans = FunctionTransformer(func=np.sqrt,
inverse_func=np.around,
accept_sparse=accept_sparse,
check_inverse=True)
check_inverse=True,
validate=True)
assert_warns_message(UserWarning,
"The provided functions are not strictly"
" inverse of each other. If you are sure you"
Expand All @@ -156,15 +158,38 @@ def test_check_inverse():
trans = FunctionTransformer(func=np.expm1,
inverse_func=np.log1p,
accept_sparse=accept_sparse,
check_inverse=True)
check_inverse=True,
validate=True)
Xt = assert_no_warnings(trans.fit_transform, X)
assert_allclose_dense_sparse(X, trans.inverse_transform(Xt))

# check that we don't check inverse when one of the func or inverse is not
# provided.
trans = FunctionTransformer(func=np.expm1, inverse_func=None,
check_inverse=True)
check_inverse=True, validate=True)
assert_no_warnings(trans.fit, X_dense)
trans = FunctionTransformer(func=None, inverse_func=np.expm1,
check_inverse=True)
check_inverse=True, validate=True)
assert_no_warnings(trans.fit, X_dense)


@pytest.mark.parametrize("validate, expected_warning",
[(None, FutureWarning),
(True, None),
(False, None)])
def test_function_transformer_future_warning(validate, expected_warning):
# FIXME: to be removed in 0.22
X = np.random.randn(100, 10)
transformer = FunctionTransformer(validate=validate)
with pytest.warns(expected_warning) as results:
transformer.fit_transform(X)
if expected_warning is None:
assert len(results) == 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this assert not results?



def test_function_transformer_frame():
pd = pytest.importorskip('pandas')
X_df = pd.DataFrame(np.random.randn(100, 10))
transformer = FunctionTransformer(validate=False)
X_df_trans = transformer.fit_transform(X_df)
assert hasattr(X_df_trans, 'loc')