From b939336ee2e4a4ed9391035a07f3e5ab824bfc6f Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Sat, 15 Oct 2022 11:16:53 -0400 Subject: [PATCH 1/6] DOC Improve docstring around set_output --- sklearn/compose/_column_transformer.py | 4 ++++ sklearn/pipeline.py | 8 ++++++++ sklearn/preprocessing/_function_transformer.py | 9 ++++----- sklearn/utils/_set_output.py | 9 ++++----- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index db7a7016c83ab..fa6f824529eb5 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -265,6 +265,10 @@ def set_output(self, transform=None): transform : {"default", "pandas"}, default=None Configure output of `transform` and `fit_transform`. + - `"default"`: Output of an un-configured transformer + - `"pandas"`: DataFrames output + - `"None"`: No-op + Returns ------- self : estimator instance diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 3f74acda1fc29..4fb0ee73fc550 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -158,6 +158,10 @@ def set_output(self, transform=None): transform : {"default", "pandas"}, default=None Configure output of `transform` and `fit_transform`. + - `"default"`: Output of an un-configured transformer + - `"pandas"`: DataFrames output + - `"None"`: No-op + Returns ------- self : estimator instance @@ -999,6 +1003,10 @@ def set_output(self, transform=None): transform : {"default", "pandas"}, default=None Configure output of `transform` and `fit_transform`. + - `"default"`: Output of an un-configured transformer + - `"pandas"`: DataFrames output + - `"None"`: No-op + Returns ------- self : estimator instance diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index 228304bb70091..8990e488610b8 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -316,12 +316,11 @@ def set_output(self, *, transform=None): Parameters ---------- transform : {"default", "pandas"}, default=None - Configure output of the following estimator's methods: + Configure output of `transform` and `fit_transform`. - - `"transform"` - - `"fit_transform"` - - If `None`, this operation is a no-op. + - `"default"`: Output of an un-configured transformer + - `"pandas"`: DataFrames output + - `"None"`: No-op Returns ------- diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py index 525c6e0fe0118..1765124a0f5d4 100644 --- a/sklearn/utils/_set_output.py +++ b/sklearn/utils/_set_output.py @@ -206,12 +206,11 @@ def set_output(self, *, transform=None): Parameters ---------- transform : {"default", "pandas"}, default=None - Configure output of the following estimator's methods: + Configure output of `transform` and `fit_transform`. - - `"transform"` - - `"fit_transform"` - - If `None`, this operation is a no-op. + - `"default"`: Output of an un-configured transformer + - `"pandas"`: DataFrames output + - `"None"`: No-op Returns ------- From 0bf2c880b40fe0b2193eed7db06230468a998c8f Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Sat, 15 Oct 2022 11:20:25 -0400 Subject: [PATCH 2/6] DOC Improve docs around set_output --- doc/developers/develop.rst | 3 +++ sklearn/base.py | 6 +++--- sklearn/utils/_set_output.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst index 05f6ea26ac80e..ef55be3cdffca 100644 --- a/doc/developers/develop.rst +++ b/doc/developers/develop.rst @@ -659,6 +659,9 @@ setting `auto_wrap_output_keys=None` when defining a custom subclass:: def get_feature_names_out(self, input_features=None): ... +The default value for `auto_wrap_output_keys` is `("transform",)`, which automatically +wraps `fit_transform` and `transform`. + For transformers that return multiple arrays in `transform`, auto wrapping will only wrap the first array and not alter the other arrays. diff --git a/sklearn/base.py b/sklearn/base.py index 3ef2a908cdadf..d7d5d8f6644b5 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -808,9 +808,9 @@ def get_submatrix(self, i, data): class TransformerMixin(_SetOutputMixin): """Mixin class for all transformers in scikit-learn. - If :term:`get_feature_names_out` is defined and `auto_wrap_output` is True, - then `BaseEstimator` will automatically wrap `transform` and `fit_transform` to - follow the `set_output` API. See the :ref:`developer_api_set_output` for details. + If :term:`get_feature_names_out` is defined, then `BaseEstimator` will + automatically wrap `transform` and `fit_transform` to follow the `set_output` + API. See the :ref:`developer_api_set_output` for details. """ def fit_transform(self, X, y=None, **fit_params): diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py index 1765124a0f5d4..81f40dd8b6c16 100644 --- a/sklearn/utils/_set_output.py +++ b/sklearn/utils/_set_output.py @@ -167,7 +167,7 @@ class _SetOutputMixin: it based on `set_output` of the global configuration. `set_output` is only defined if `get_feature_names_out` is defined and - `auto_wrap_output` is True. + `auto_wrap_output_keys` is the default value. """ def __init_subclass__(cls, auto_wrap_output_keys=("transform",), **kwargs): From f5355a3a957b28feeff708ae89223eaee597b798 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 17 Oct 2022 14:29:02 -0400 Subject: [PATCH 3/6] DOC Address comments --- sklearn/compose/_column_transformer.py | 4 ++-- sklearn/pipeline.py | 8 ++++---- sklearn/preprocessing/_function_transformer.py | 4 ++-- sklearn/utils/_set_output.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index fa6f824529eb5..283290ff1826a 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -265,9 +265,9 @@ def set_output(self, transform=None): transform : {"default", "pandas"}, default=None Configure output of `transform` and `fit_transform`. - - `"default"`: Output of an un-configured transformer + - `"default"`: Default output format of a transformer - `"pandas"`: DataFrames output - - `"None"`: No-op + - `None`: Current configuration is unchanged Returns ------- diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 4fb0ee73fc550..2d3eb50b70fa6 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -158,9 +158,9 @@ def set_output(self, transform=None): transform : {"default", "pandas"}, default=None Configure output of `transform` and `fit_transform`. - - `"default"`: Output of an un-configured transformer + - `"default"`: Default output format of a transformer - `"pandas"`: DataFrames output - - `"None"`: No-op + - `None`: Current configuration is unchanged Returns ------- @@ -1003,9 +1003,9 @@ def set_output(self, transform=None): transform : {"default", "pandas"}, default=None Configure output of `transform` and `fit_transform`. - - `"default"`: Output of an un-configured transformer + - `"default"`: Default output format of a transformer - `"pandas"`: DataFrames output - - `"None"`: No-op + - `None`: Current configuration is unchanged Returns ------- diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index 8990e488610b8..a764582e1f236 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -318,9 +318,9 @@ def set_output(self, *, transform=None): transform : {"default", "pandas"}, default=None Configure output of `transform` and `fit_transform`. - - `"default"`: Output of an un-configured transformer + - `"default"`: Default output format of a transformer - `"pandas"`: DataFrames output - - `"None"`: No-op + - `None`: Current configuration is unchanged Returns ------- diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py index 81f40dd8b6c16..cba635bf89af4 100644 --- a/sklearn/utils/_set_output.py +++ b/sklearn/utils/_set_output.py @@ -208,9 +208,9 @@ def set_output(self, *, transform=None): transform : {"default", "pandas"}, default=None Configure output of `transform` and `fit_transform`. - - `"default"`: Output of an un-configured transformer + - `"default"`: Default output format of a transformer - `"pandas"`: DataFrames output - - `"None"`: No-op + - `None`: Current configuration is unchanged Returns ------- From 8a7c46697c7786ef3d1bc9440183fc5592dec521 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 17 Oct 2022 14:36:29 -0400 Subject: [PATCH 4/6] DOC Better grammar --- sklearn/compose/_column_transformer.py | 2 +- sklearn/pipeline.py | 4 ++-- sklearn/preprocessing/_function_transformer.py | 2 +- sklearn/utils/_set_output.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index 283290ff1826a..bf2d64e17461b 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -267,7 +267,7 @@ def set_output(self, transform=None): - `"default"`: Default output format of a transformer - `"pandas"`: DataFrames output - - `None`: Current configuration is unchanged + - `None`: Current transform configuration is unchanged Returns ------- diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 2d3eb50b70fa6..53df7e046321c 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -160,7 +160,7 @@ def set_output(self, transform=None): - `"default"`: Default output format of a transformer - `"pandas"`: DataFrames output - - `None`: Current configuration is unchanged + - `None`: Current transform configuration is unchanged Returns ------- @@ -1005,7 +1005,7 @@ def set_output(self, transform=None): - `"default"`: Default output format of a transformer - `"pandas"`: DataFrames output - - `None`: Current configuration is unchanged + - `None`: Current transform configuration is unchanged Returns ------- diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index a764582e1f236..30f65ffd7f8cd 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -320,7 +320,7 @@ def set_output(self, *, transform=None): - `"default"`: Default output format of a transformer - `"pandas"`: DataFrames output - - `None`: Current configuration is unchanged + - `None`: Current transform configuration is unchanged Returns ------- diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py index cba635bf89af4..e3f4def1369ca 100644 --- a/sklearn/utils/_set_output.py +++ b/sklearn/utils/_set_output.py @@ -210,7 +210,7 @@ def set_output(self, *, transform=None): - `"default"`: Default output format of a transformer - `"pandas"`: DataFrames output - - `None`: Current configuration is unchanged + - `None`: Current transform configuration is unchanged Returns ------- From 3c7a2081ed5ef6e5c8d71899b59ae6a6b9f40d6a Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 17 Oct 2022 15:07:54 -0400 Subject: [PATCH 5/6] DOC Improve wording --- sklearn/compose/_column_transformer.py | 4 ++-- sklearn/pipeline.py | 8 ++++---- sklearn/preprocessing/_function_transformer.py | 4 ++-- sklearn/utils/_set_output.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index bf2d64e17461b..1fb81e69647c7 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -266,8 +266,8 @@ def set_output(self, transform=None): Configure output of `transform` and `fit_transform`. - `"default"`: Default output format of a transformer - - `"pandas"`: DataFrames output - - `None`: Current transform configuration is unchanged + - `"pandas"`: DataFrame output + - `None`: Transform configuration is unchanged Returns ------- diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 53df7e046321c..dff4d4c1cd147 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -159,8 +159,8 @@ def set_output(self, transform=None): Configure output of `transform` and `fit_transform`. - `"default"`: Default output format of a transformer - - `"pandas"`: DataFrames output - - `None`: Current transform configuration is unchanged + - `"pandas"`: DataFrame output + - `None`: Transform configuration is unchanged Returns ------- @@ -1004,8 +1004,8 @@ def set_output(self, transform=None): Configure output of `transform` and `fit_transform`. - `"default"`: Default output format of a transformer - - `"pandas"`: DataFrames output - - `None`: Current transform configuration is unchanged + - `"pandas"`: DataFrame output + - `None`: Transform configuration is unchanged Returns ------- diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index 30f65ffd7f8cd..d4c2cf6de7af2 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -319,8 +319,8 @@ def set_output(self, *, transform=None): Configure output of `transform` and `fit_transform`. - `"default"`: Default output format of a transformer - - `"pandas"`: DataFrames output - - `None`: Current transform configuration is unchanged + - `"pandas"`: DataFrame output + - `None`: Transform configuration is unchanged Returns ------- diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py index e3f4def1369ca..5de296dc62d9b 100644 --- a/sklearn/utils/_set_output.py +++ b/sklearn/utils/_set_output.py @@ -209,8 +209,8 @@ def set_output(self, *, transform=None): Configure output of `transform` and `fit_transform`. - `"default"`: Default output format of a transformer - - `"pandas"`: DataFrames output - - `None`: Current transform configuration is unchanged + - `"pandas"`: DataFrame output + - `None`: Transform configuration is unchanged Returns ------- From c3102177ba54a4d780c11077b3e4506308214bb4 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 17 Oct 2022 15:13:46 -0400 Subject: [PATCH 6/6] DOC Improves docstring in set_config --- sklearn/_config.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/sklearn/_config.py b/sklearn/_config.py index ea5c47499b5b4..e4c398c9c5444 100644 --- a/sklearn/_config.py +++ b/sklearn/_config.py @@ -123,7 +123,14 @@ def set_config( .. versionadded:: 1.2 transform_output : str, default=None - Configure the output container for transform. + Configure output of `transform` and `fit_transform`. + + See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py` + for an example on how to use the API. + + - `"default"`: Default output format of a transformer + - `"pandas"`: DataFrame output + - `None`: Transform configuration is unchanged .. versionadded:: 1.2 @@ -231,7 +238,14 @@ def config_context( .. versionadded:: 1.2 transform_output : str, default=None - Configure the output container for transform. + Configure output of `transform` and `fit_transform`. + + See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py` + for an example on how to use the API. + + - `"default"`: Default output format of a transformer + - `"pandas"`: DataFrame output + - `None`: Transform configuration is unchanged .. versionadded:: 1.2