Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions sklearn/compose/_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,23 @@ def _log_message(self, name, idx, total):
return None
return "(%d of %d) Processing %s" % (idx, total, name)

def _fit_transform(self, X, y, func, fitted=False, column_as_strings=False):
def _check_fit_params(self, **fit_params):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this copied from pipeline or somewhere else?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, it came from pipeline. I tried to make it work in the same way.

# fit_params_steps = {name: {} for name, _, _ in self.transformers if trans is not None}
fit_params_steps = {name: {} for name, _, _ in self.transformers}
for pname, pval in fit_params.items():
if "__" not in pname:
raise ValueError(
"ColumnTransformer.fit does not accept the {} parameter. "
"You can pass parameters to specific steps of your "
"column transformer using the stepname__parameter format, e.g. "
"`ColumnTransformer.fit(X, y, standardscaler__sample_weight"
"=sample_weight)`.".format(pname)
)
step, param = pname.split("__", 1)
fit_params_steps[step][param] = pval
return fit_params_steps

def _fit_transform(self, X, y, func, fitted=False, column_as_strings=False, **fit_params_steps):
"""
Private function to fit and/or transform on demand.

Expand All @@ -611,6 +627,7 @@ def _fit_transform(self, X, y, func, fitted=False, column_as_strings=False):
weight=weight,
message_clsname="ColumnTransformer",
message=self._log_message(name, idx, len(transformers)),
**fit_params_steps[name],
)
for idx, (name, trans, column, weight) in enumerate(transformers, 1)
)
Expand All @@ -620,7 +637,7 @@ def _fit_transform(self, X, y, func, fitted=False, column_as_strings=False):
else:
raise

def fit(self, X, y=None):
def fit(self, X, y=None, **fit_params):
"""Fit all transformers using X.

Parameters
Expand All @@ -632,17 +649,20 @@ def fit(self, X, y=None):
y : array-like of shape (n_samples,...), default=None
Targets for supervised learning.

**fit_params : dict, default=None
Parameters to pass to the fit method of the estimator and transformers.

Returns
-------
self : ColumnTransformer
This estimator.
"""
# we use fit_transform to make sure to set sparse_output_ (for which we
# need the transformed data) to have consistent output type in predict
self.fit_transform(X, y=y)
self.fit_transform(X, y=y, **fit_params)
return self

def fit_transform(self, X, y=None):
def fit_transform(self, X, y=None, **fit_params):
"""Fit all transformers, transform the data and concatenate results.

Parameters
Expand All @@ -654,6 +674,9 @@ def fit_transform(self, X, y=None):
y : array-like of shape (n_samples,), default=None
Targets for supervised learning.

**fit_params : dict, default=None
Parameters to pass to the fit method of the estimator and transformers.

Returns
-------
X_t : {array-like, sparse matrix} of \
Expand All @@ -671,8 +694,9 @@ def fit_transform(self, X, y=None):
self._validate_transformers()
self._validate_column_callables(X)
self._validate_remainder(X)
fit_params_steps = self._check_fit_params(**fit_params)

result = self._fit_transform(X, y, _fit_transform_one)
result = self._fit_transform(X, y, _fit_transform_one, **fit_params_steps)

if not result:
self._update_fitted_transformers([])
Expand Down Expand Up @@ -717,6 +741,7 @@ def transform(self, X):
"""
check_is_fitted(self)
X = _check_X(X)
fit_params_steps = {name: {} for name, _, _ in self.transformers}

fit_dataframe_and_transform_dataframe = hasattr(
self, "feature_names_in_"
Expand Down Expand Up @@ -751,6 +776,7 @@ def transform(self, X):
_transform_one,
fitted=True,
column_as_strings=fit_dataframe_and_transform_dataframe,
**fit_params_steps,
)
self._validate_output(Xs)

Expand Down
62 changes: 62 additions & 0 deletions sklearn/tests/test_columntransformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pandas as pd

data = {
"x1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
"x2": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
"y": [0, 1, 2, 1, 2, 0, 0, 2, 1, 0],
"equal_sample_weight": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
"sample_weight": [.5, 1, .5, 1, .5, 1, .5, 1, .5, 1]
}

df = pd.DataFrame.from_dict(data)

# check outside of columntransformer
from sklearn.preprocessing import StandardScaler

sc1 = StandardScaler()
sc1_xWeight = sc1.fit_transform(X=df[['x1']], y=df['y'])
sc1_wWeight = sc1.fit_transform(X=df[['x1']], y=df['y'], sample_weight=df['sample_weight'])
sc1_wEqualWeight = sc1.fit_transform(X=df[['x1']], y=df['y'], sample_weight=df['equal_sample_weight'])

print(f"xWeight: {sc1_xWeight}")
print(f"wWeight: {sc1_wWeight}")
print(f"wEqualWeight: {sc1_wEqualWeight}")
from sklearn.utils._testing import assert_array_equal
assert_array_equal(sc1_xWeight, sc1_wEqualWeight, err_msg= "These should be equal")
assert_array_equal(sc1_xWeight, sc1_wWeight, err_msg= "You should see this message because we shouldn't be equal")

# test as part of a columntransformer
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer(
[
("standard_scaler", StandardScaler(), ["x1"]),
]
)

ct_xWeight = ct.fit_transform(X=df[['x1']], y=df['y'])
fit_params = {'standard_scaler__sample_weight': df['sample_weight']}
ct_wWeight = ct.fit_transform(X=df[['x1']], y=df['y'], **fit_params)
fit_params = {'standard_scaler__sample_weight': df['equal_sample_weight']}
ct_wEqualWeight = ct.fit_transform(X=df[['x1']], y=df['y'], **fit_params)

assert_array_equal(sc1_xWeight, ct_xWeight, err_msg= "These should be equal")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pep8: there shouldn't be a space after "=" here. But you can also just remove the err_msg I think.

assert_array_equal(sc1_wWeight, ct_wWeight, err_msg= "These should be equal")
assert_array_equal(sc1_wEqualWeight, ct_wEqualWeight, err_msg= "These should be equal")

# Test with Pipeline
from sklearn.pipeline import Pipeline
ct2 = ColumnTransformer(
[
("standard_scaler", StandardScaler(), ["x1"]),
]
)
pt = Pipeline([('ct_step', ct2), ('passthrough_test',"passthrough" )])
pt_xWeight = pt.fit_transform(X=df[['x1']], y=df['y'])
fit_params = {'ct_step__standard_scaler__sample_weight': df['sample_weight']}
pt_wWeight = pt.fit_transform(X=df[['x1']], y=df['y'], **fit_params)
fit_params = {'ct_step__standard_scaler__sample_weight': df['equal_sample_weight']}
pt_wEqualWeight = pt.fit_transform(X=df[['x1']], y=df['y'], **fit_params)

assert_array_equal(sc1_xWeight, pt_xWeight, err_msg= "These should be equal")
assert_array_equal(sc1_wWeight, pt_wWeight, err_msg= "These should be equal")
assert_array_equal(sc1_wEqualWeight, pt_wEqualWeight, err_msg= "These should be equal")