Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
3 changes: 3 additions & 0 deletions doc/whats_new/upcoming_changes/sklearn.compose/32188.fix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- The :class:`compose.ColumnTransformer` now correctly fits on data provided as a
`polars.DataFrame` when any transformer has a sparse output.
By :user:`Phillipp Gnan <ph-ll-pp>`.
6 changes: 3 additions & 3 deletions sklearn/compose/_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1014,10 +1014,10 @@ def fit_transform(self, X, y=None, **params):

# determine if concatenated output will be sparse or not
if any(sparse.issparse(X) for X in Xs):
nnz = sum(X.nnz if sparse.issparse(X) else X.size for X in Xs)
total = sum(
X.shape[0] * X.shape[1] if sparse.issparse(X) else X.size for X in Xs
nnz = sum(
X.nnz if sparse.issparse(X) else X.shape[0] * X.shape[1] for X in Xs
)
total = sum(X.shape[0] * X.shape[1] for X in Xs)
density = nnz / total
self.sparse_output_ = density < self.sparse_threshold
else:
Expand Down
15 changes: 9 additions & 6 deletions sklearn/compose/tests/test_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,14 +513,17 @@ def test_column_transformer_list():


@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_column_transformer_sparse_stacking(csr_container):
X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
@pytest.mark.parametrize("constructor_name", ["array", "pandas", "polars"])
def test_column_transformer_sparse_stacking(csr_container, constructor_name):
X = np.array([[0, 1, 2], [2, 4, 6]]).T
X = _convert_container(X, constructor_name, columns_name=["first", "second"])

col_trans = ColumnTransformer(
[("trans1", Trans(), [0]), ("trans2", SparseMatrixTrans(csr_container), 1)],
sparse_threshold=0.8,
)
col_trans.fit(X_array)
X_trans = col_trans.transform(X_array)
col_trans.fit(X)
X_trans = col_trans.transform(X)
assert sparse.issparse(X_trans)
assert X_trans.shape == (X_trans.shape[0], X_trans.shape[0] + 1)
assert_array_equal(X_trans.toarray()[:, 1:], np.eye(X_trans.shape[0]))
Expand All @@ -531,8 +534,8 @@ def test_column_transformer_sparse_stacking(csr_container):
[("trans1", Trans(), [0]), ("trans2", SparseMatrixTrans(csr_container), 1)],
sparse_threshold=0.1,
)
col_trans.fit(X_array)
X_trans = col_trans.transform(X_array)
col_trans.fit(X)
X_trans = col_trans.transform(X)
assert not sparse.issparse(X_trans)
assert X_trans.shape == (X_trans.shape[0], X_trans.shape[0] + 1)
assert_array_equal(X_trans[:, 1:], np.eye(X_trans.shape[0]))
Expand Down
4 changes: 2 additions & 2 deletions sklearn/utils/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,12 +976,12 @@ def _convert_container(
container : array-like
The container to convert.
constructor_name : {"list", "tuple", "array", "sparse", "dataframe", \
"series", "index", "slice", "sparse_csr", "sparse_csc", \
"pandas", "series", "index", "slice", "sparse_csr", "sparse_csc", \
"sparse_csr_array", "sparse_csc_array", "pyarrow", "polars", \
"polars_series"}
The type of the returned container.
columns_name : index or array-like, default=None
For pandas container supporting `columns_names`, it will affect
For pandas/polars container supporting `columns_names`, it will affect
specific names.
dtype : dtype, default=None
Force the dtype of the container. Does not apply to `"slice"`
Expand Down
Loading