diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index e09d2d09d7e43..a96ca4182856b 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -669,6 +669,14 @@ def make_column_transformer(*transformers, **kwargs): non-specified columns will use the ``remainder`` estimator. The estimator must support `fit` and `transform`. + sparse_threshold : float, default = 0.3 + If the transformed output consists of a mix of sparse and dense data, + it will be stacked as a sparse matrix if the density is lower than this + value. Use ``sparse_threshold=0`` to always return dense. + When the transformed output consists of all sparse or all dense data, + the stacked result will be sparse or dense, respectively, and this + keyword will be ignored. + n_jobs : int or None, optional (default=None) Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. @@ -705,9 +713,11 @@ def make_column_transformer(*transformers, **kwargs): """ n_jobs = kwargs.pop('n_jobs', None) remainder = kwargs.pop('remainder', 'drop') + sparse_threshold = kwargs.pop('sparse_threshold', 0.3) if kwargs: raise TypeError('Unknown keyword arguments: "{}"' .format(list(kwargs.keys())[0])) transformer_list = _get_transformer_list(transformers) return ColumnTransformer(transformer_list, n_jobs=n_jobs, - remainder=remainder) + remainder=remainder, + sparse_threshold=sparse_threshold) diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index f67806a52c543..4304b9184e923 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -431,11 +431,13 @@ def test_make_column_transformer_kwargs(): scaler = StandardScaler() norm = Normalizer() ct = make_column_transformer(('first', scaler), (['second'], norm), - n_jobs=3, remainder='drop') + n_jobs=3, remainder='drop', + sparse_threshold=0.3) assert_equal(ct.transformers, make_column_transformer( ('first', scaler), (['second'], norm)).transformers) assert_equal(ct.n_jobs, 3) assert_equal(ct.remainder, 'drop') + assert_equal(ct.sparse_threshold, 0.3) # invalid keyword parameters should raise an error message assert_raise_message( TypeError,