Thanks to visit codestin.com
Credit goes to github.com

Skip to content
5 changes: 5 additions & 0 deletions doc/whats_new/v1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ Changelog
of each transformer in `output_indices_`. :pr:`18393` by
:user:`Luca Bittarello <lbittarello>`.

- |FIX| :meth:`compose.ColumnTransformer.get_feature_names` supports
non-string feature names returned by any of its transformers.
:pr:`18459` by :user:`Albert Villanova del Moral <albertvillanova>` and
:user:`Alonso Silva Allende <alonsosilvaallende>`.

:mod:`sklearn.datasets`
.......................

Expand Down
2 changes: 1 addition & 1 deletion sklearn/compose/_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def get_feature_names(self):
raise AttributeError("Transformer %s (type %s) does not "
"provide get_feature_names."
% (str(name), type(trans).__name__))
feature_names.extend([name + "__" + f for f in
feature_names.extend([f"{name}__{f}" for f in
trans.get_feature_names()])
return feature_names

Expand Down
28 changes: 18 additions & 10 deletions sklearn/compose/tests/test_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,31 +748,38 @@ def test_column_transformer_cloning():
assert hasattr(ct.transformers_[0][1], 'mean_')


def test_column_transformer_get_feature_names():
def test_column_transformer_get_feature_names_raises():
X_array = np.array([[0., 1., 2.], [2., 4., 6.]]).T
ct = ColumnTransformer([('trans', Trans(), [0, 1])])
# raise correct error when not fitted
with pytest.raises(NotFittedError):
ct.get_feature_names()
# raise correct error when no feature names are available
ct.fit(X_array)
assert_raise_message(AttributeError,
"Transformer trans (type Trans) does not provide "
"get_feature_names", ct.get_feature_names)
msg = r"Transformer trans \(type Trans\) does not provide " \
r"get_feature_names"
with pytest.raises(AttributeError, match=msg):
ct.get_feature_names()

# working example
X = np.array([[{'a': 1, 'b': 2}, {'a': 3, 'b': 4}],
[{'c': 5}, {'c': 6}]], dtype=object).T

@pytest.mark.parametrize("X, keys", [
(np.array([[{'a': 1, 'b': 2}, {'a': 3, 'b': 4}],
[{'c': 5}, {'c': 6}]], dtype=object).T, ('a', 'b', 'c')),
(np.array([[{1: 1, 2: 2}, {1: 3, 2: 4}],
[{3: 5}, {3: 6}]], dtype=object).T, ('1', '2', '3')),
])
def test_column_transformer_get_feature_names(X, keys):
ct = ColumnTransformer(
[('col' + str(i), DictVectorizer(), i) for i in range(2)])
ct.fit(X)
assert ct.get_feature_names() == ['col0__a', 'col0__b', 'col1__c']
assert ct.get_feature_names() == [f'col0__{key}' for key in keys[:2]] + \
[f'col1__{keys[2]}']

# drop transformer
ct = ColumnTransformer(
[('col0', DictVectorizer(), 0), ('col1', 'drop', 1)])
ct.fit(X)
assert ct.get_feature_names() == ['col0__a', 'col0__b']
assert ct.get_feature_names() == [f'col0__{key}' for key in keys[:2]]

# passthrough transformer
ct = ColumnTransformer([('trans', 'passthrough', [0, 1])])
Expand All @@ -782,7 +789,8 @@ def test_column_transformer_get_feature_names():
ct = ColumnTransformer([('trans', DictVectorizer(), 0)],
remainder='passthrough')
ct.fit(X)
assert ct.get_feature_names() == ['trans__a', 'trans__b', 'x1']
assert ct.get_feature_names() == [f'trans__{key}' for key in keys[:2]] + \
['x1']

ct = ColumnTransformer([('trans', 'passthrough', [1])],
remainder='passthrough')
Expand Down