From c77670db1903ee414b66715da17bfd54b0e8c18b Mon Sep 17 00:00:00 2001 From: Alonso Silva Allende Date: Wed, 29 Jan 2020 16:23:58 +0100 Subject: [PATCH 1/7] Get column transform to accept other than strings --- sklearn/compose/_column_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index e39c859f20fd1..3d3415b49e6ae 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -361,7 +361,7 @@ def get_feature_names(self): raise AttributeError("Transformer %s (type %s) does not " "provide get_feature_names." % (str(name), type(trans).__name__)) - feature_names.extend([name + "__" + f for f in + feature_names.extend([name + "__" + str(f) for f in trans.get_feature_names()]) return feature_names From 8903307e5f44e640f038ab3ec09c2e7bcfb0e279 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Fri, 25 Sep 2020 10:27:40 +0200 Subject: [PATCH 2/7] Split test_raises --- sklearn/compose/tests/test_column_transformer.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index 4e58769e244c7..f4e8a76fb69a3 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -661,7 +661,7 @@ def test_column_transformer_cloning(): assert hasattr(ct.transformers_[0][1], 'mean_') -def test_column_transformer_get_feature_names(): +def test_column_transformer_get_feature_names_raises(): X_array = np.array([[0., 1., 2.], [2., 4., 6.]]).T ct = ColumnTransformer([('trans', Trans(), [0, 1])]) # raise correct error when not fitted @@ -669,10 +669,13 @@ def test_column_transformer_get_feature_names(): ct.get_feature_names() # raise correct error when no feature names are available ct.fit(X_array) - assert_raise_message(AttributeError, - "Transformer trans (type Trans) does not provide " - "get_feature_names", ct.get_feature_names) + msg = r"Transformer trans \(type Trans\) does not provide " \ + r"get_feature_names" + with pytest.raises(AttributeError, match=msg): + ct.get_feature_names() + +def test_column_transformer_get_feature_names(): # working example X = np.array([[{'a': 1, 'b': 2}, {'a': 3, 'b': 4}], [{'c': 5}, {'c': 6}]], dtype=object).T From 4be5fc0fbda9f4433d1d8b3e2c5118cc6a378b3f Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Fri, 25 Sep 2020 10:54:37 +0200 Subject: [PATCH 3/7] Test with parametrize --- .../compose/tests/test_column_transformer.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index f4e8a76fb69a3..5e5d794048164 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -675,20 +675,22 @@ def test_column_transformer_get_feature_names_raises(): ct.get_feature_names() -def test_column_transformer_get_feature_names(): - # working example - X = np.array([[{'a': 1, 'b': 2}, {'a': 3, 'b': 4}], - [{'c': 5}, {'c': 6}]], dtype=object).T +@pytest.mark.parametrize("X, keys", [ + (np.array([[{'a': 1, 'b': 2}, {'a': 3, 'b': 4}], + [{'c': 5}, {'c': 6}]], dtype=object).T, ('a', 'b', 'c')), +]) +def test_column_transformer_get_feature_names(X, keys): ct = ColumnTransformer( [('col' + str(i), DictVectorizer(), i) for i in range(2)]) ct.fit(X) - assert ct.get_feature_names() == ['col0__a', 'col0__b', 'col1__c'] + assert ct.get_feature_names() == [f'col0__{key}' for key in keys[:2]] + \ + [f'col1__{keys[2]}'] # drop transformer ct = ColumnTransformer( [('col0', DictVectorizer(), 0), ('col1', 'drop', 1)]) ct.fit(X) - assert ct.get_feature_names() == ['col0__a', 'col0__b'] + assert ct.get_feature_names() == [f'col0__{key}' for key in keys[:2]] # passthrough transformer ct = ColumnTransformer([('trans', 'passthrough', [0, 1])]) @@ -698,7 +700,8 @@ def test_column_transformer_get_feature_names(): ct = ColumnTransformer([('trans', DictVectorizer(), 0)], remainder='passthrough') ct.fit(X) - assert ct.get_feature_names() == ['trans__a', 'trans__b', 'x1'] + assert ct.get_feature_names() == [f'trans__{key}' for key in keys[:2]] + \ + ['x1'] ct = ColumnTransformer([('trans', 'passthrough', [1])], remainder='passthrough') From 1de6840dce53826472807983a1fc7584f72f4941 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Fri, 25 Sep 2020 10:58:25 +0200 Subject: [PATCH 4/7] Add non-regression test --- sklearn/compose/tests/test_column_transformer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index 5e5d794048164..1c42bd5d4878c 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -678,6 +678,8 @@ def test_column_transformer_get_feature_names_raises(): @pytest.mark.parametrize("X, keys", [ (np.array([[{'a': 1, 'b': 2}, {'a': 3, 'b': 4}], [{'c': 5}, {'c': 6}]], dtype=object).T, ('a', 'b', 'c')), + (np.array([[{1: 1, 2: 2}, {1: 3, 2: 4}], + [{3: 5}, {3: 6}]], dtype=object).T, ('1', '2', '3')), ]) def test_column_transformer_get_feature_names(X, keys): ct = ColumnTransformer( From 9a6206c3b796123d597e23c5bf4043f22763595e Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Fri, 25 Sep 2020 13:17:56 +0200 Subject: [PATCH 5/7] Add whatsnew entry --- doc/whats_new/v0.24.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index 81fb18cc276d4..8dccfb9eb3cbb 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -89,6 +89,11 @@ Changelog - |FIX| :class:`compose.ColumnTransformer` now displays the remainder in the diagram display. :pr:`18167` by `Thomas Fan`_. +- |FIX| :meth:`compose.ColumnTransformer.get_feature_names` supports + non-string feature names returned by any of its transformers. + :pr:`18459` by :user:`Albert Villanova del Moral ` and + :user:`Alonso Silva Allende `. + :mod:`sklearn.covariance` ......................... From 5a9c483325155a4508b2f5576117fdb5c7bd7909 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 30 Sep 2020 15:09:26 +0200 Subject: [PATCH 6/7] Accept suggested change Co-authored-by: Roman Yurchak --- sklearn/compose/_column_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index 964ec24ffcaa6..c0c005d309160 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -378,7 +378,7 @@ def get_feature_names(self): raise AttributeError("Transformer %s (type %s) does not " "provide get_feature_names." % (str(name), type(trans).__name__)) - feature_names.extend([name + "__" + str(f) for f in + feature_names.extend([f"{name}__{f}" for f in trans.get_feature_names()]) return feature_names From 5d3ce6c666a8b71edef3071cfdfebcc3a472f878 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 8 Apr 2021 20:53:59 +0200 Subject: [PATCH 7/7] Add whatsnew entry to v1.0 --- doc/whats_new/v1.0.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index ce7da3139d140..602d4b1246878 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -105,6 +105,11 @@ Changelog of each transformer in `output_indices_`. :pr:`18393` by :user:`Luca Bittarello `. +- |FIX| :meth:`compose.ColumnTransformer.get_feature_names` supports + non-string feature names returned by any of its transformers. + :pr:`18459` by :user:`Albert Villanova del Moral ` and + :user:`Alonso Silva Allende `. + :mod:`sklearn.datasets` .......................