1414from ..utils .deprecation import deprecated
1515from ..utils .validation import check_is_fitted
1616from ..utils .validation import _check_feature_names_in
17- from ..utils ._param_validation import Interval
18- from ..utils ._param_validation import StrOptions
17+ from ..utils ._param_validation import Interval , StrOptions , Hidden
1918from ..utils ._mask import _get_mask
2019
2120from ..utils ._encode import _encode , _check_unknown , _unique , _get_counts
@@ -209,7 +208,7 @@ class OneHotEncoder(_BaseEncoder):
209208 strings, denoting the values taken on by categorical (discrete) features.
210209 The features are encoded using a one-hot (aka 'one-of-K' or 'dummy')
211210 encoding scheme. This creates a binary column for each category and
212- returns a sparse matrix or dense array (depending on the ``sparse ``
211+ returns a sparse matrix or dense array (depending on the ``sparse_output ``
213212 parameter)
214213
215214 By default, the encoder derives the categories based on the unique values
@@ -271,6 +270,16 @@ class OneHotEncoder(_BaseEncoder):
271270 sparse : bool, default=True
272271 Will return sparse matrix if set True else will return an array.
273272
273+ .. deprecated:: 1.2
274+ `sparse` is deprecated in 1.2 and will be removed in 1.4. Use
275+ `sparse_output` instead.
276+
277+ sparse_output : bool, default=True
278+ Will return sparse matrix if set True else will return an array.
279+
280+ .. versionadded:: 1.2
281+ `sparse` was renamed to `sparse_output`
282+
274283 dtype : number type, default=float
275284 Desired dtype of output.
276285
@@ -331,7 +340,7 @@ class OneHotEncoder(_BaseEncoder):
331340 (if any).
332341
333342 drop_idx_ : array of shape (n_features,)
334- - ``drop_idx_[i]`` is the index in ``categories_[i]`` of the category
343+ - ``drop_idx_[i]`` is the index in ``categories_[i]`` of the category
335344 to be dropped for each feature.
336345 - ``drop_idx_[i] = None`` if no category is to be dropped from the
337346 feature with index ``i``, e.g. when `drop='if_binary'` and the
@@ -425,7 +434,7 @@ class OneHotEncoder(_BaseEncoder):
425434
426435 >>> import numpy as np
427436 >>> X = np.array([["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3], dtype=object).T
428- >>> ohe = OneHotEncoder(max_categories=3, sparse =False).fit(X)
437+ >>> ohe = OneHotEncoder(max_categories=3, sparse_output =False).fit(X)
429438 >>> ohe.infrequent_categories_
430439 [array(['a', 'd'], dtype=object)]
431440 >>> ohe.transform([["a"], ["b"]])
@@ -444,22 +453,26 @@ class OneHotEncoder(_BaseEncoder):
444453 Interval (Real , 0 , 1 , closed = "neither" ),
445454 None ,
446455 ],
447- "sparse" : ["boolean" ],
456+ "sparse" : [Hidden (StrOptions ({"deprecated" })), "boolean" ], # deprecated
457+ "sparse_output" : ["boolean" ],
448458 }
449459
450460 def __init__ (
451461 self ,
452462 * ,
453463 categories = "auto" ,
454464 drop = None ,
455- sparse = True ,
465+ sparse = "deprecated" ,
466+ sparse_output = True ,
456467 dtype = np .float64 ,
457468 handle_unknown = "error" ,
458469 min_frequency = None ,
459470 max_categories = None ,
460471 ):
461472 self .categories = categories
473+ # TODO(1.4): Remove self.sparse
462474 self .sparse = sparse
475+ self .sparse_output = sparse_output
463476 self .dtype = dtype
464477 self .handle_unknown = handle_unknown
465478 self .drop = drop
@@ -798,6 +811,16 @@ def fit(self, X, y=None):
798811 Fitted encoder.
799812 """
800813 self ._validate_params ()
814+
815+ if self .sparse != "deprecated" :
816+ warnings .warn (
817+ "`sparse` was renamed to `sparse_output` in version 1.2 and "
818+ "will be removed in 1.4. `sparse_out` is ignored unless you "
819+ "leave `sparse` to its default value." ,
820+ FutureWarning ,
821+ )
822+ self .sparse_output = self .sparse
823+
801824 self ._check_infrequent_enabled ()
802825
803826 fit_results = self ._fit (
@@ -830,7 +853,7 @@ def transform(self, X):
830853 -------
831854 X_out : {ndarray, sparse matrix} of shape \
832855 (n_samples, n_encoded_features)
833- Transformed input. If `sparse =True`, a sparse matrix will be
856+ Transformed input. If `sparse_output =True`, a sparse matrix will be
834857 returned.
835858 """
836859 check_is_fitted (self )
@@ -879,7 +902,7 @@ def transform(self, X):
879902 shape = (n_samples , feature_indices [- 1 ]),
880903 dtype = self .dtype ,
881904 )
882- if not self .sparse :
905+ if not self .sparse_output :
883906 return out .toarray ()
884907 else :
885908 return out
0 commit comments