Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/whats_new/v1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,16 @@ Changelog
its memory footprint and runtime.
:pr:`22268` by :user:`MohamedBsh <Bsh>`.

:mod:`sklearn.decomposition`
............................

- |API| The `n_iter` parameter of :class:`decomposition.MiniBatchSparsePCA` is
deprecated and replaced by the parameters `max_iter`, `tol`, and
`max_no_improvement` to be consistent with
:class:`decomposition.MiniBatchDictionaryLearning`. `n_iter` will be removed
in version 1.3. :pr:`23726` by :user:`Guillaume Lemaitre <glemaitre>`.


:mod:`sklearn.ensemble`
.......................

Expand Down
25 changes: 14 additions & 11 deletions sklearn/decomposition/_dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,7 @@ def dict_learning_online(
Number of mini-batch iterations to perform.

.. deprecated:: 1.1
`n_iter` is deprecated in 1.1 and will be removed in 1.3. Use
`n_iter` is deprecated in 1.1 and will be removed in 1.4. Use
`max_iter` instead.

max_iter : int, default=None
Expand Down Expand Up @@ -1758,7 +1758,7 @@ class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):
Total number of iterations over data batches to perform.

.. deprecated:: 1.1
``n_iter`` is deprecated in 1.1 and will be removed in 1.3. Use
``n_iter`` is deprecated in 1.1 and will be removed in 1.4. Use
``max_iter`` instead.

max_iter : int, default=None
Expand Down Expand Up @@ -2251,6 +2251,17 @@ def fit(self, X, y=None):
)

self._check_params(X)

if self.n_iter != "deprecated":
warnings.warn(
"'n_iter' is deprecated in version 1.1 and will be removed "
"in version 1.4. Use 'max_iter' and let 'n_iter' to its default "
"value instead. 'n_iter' is also ignored if 'max_iter' is "
"specified.",
FutureWarning,
)
n_iter = self.n_iter

self._random_state = check_random_state(self.random_state)

dictionary = self._initialize_dict(X, self._random_state)
Expand Down Expand Up @@ -2310,15 +2321,7 @@ def fit(self, X, y=None):
self.n_iter_ = np.ceil(self.n_steps_ / n_steps_per_iter)
else:
# TODO remove this branch in 1.3
if self.n_iter != "deprecated":
warnings.warn(
"'n_iter' is deprecated in version 1.1 and will be removed"
" in version 1.3. Use 'max_iter' instead.",
FutureWarning,
)
n_iter = self.n_iter
else:
n_iter = 1000
n_iter = 1000 if self.n_iter == "deprecated" else self.n_iter

batches = gen_batches(n_samples, self._batch_size)
batches = itertools.cycle(batches)
Expand Down
103 changes: 60 additions & 43 deletions sklearn/decomposition/_sparse_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@
# Author: Vlad Niculae, Gael Varoquaux, Alexandre Gramfort
# License: BSD 3 clause

import warnings

import numpy as np

from ..utils import check_random_state
from ..utils.validation import check_is_fitted
from ..linear_model import ridge_regression
from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
from ._dict_learning import dict_learning, dict_learning_online
from ._dict_learning import dict_learning, MiniBatchDictionaryLearning


class SparsePCA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
Expand Down Expand Up @@ -275,6 +273,17 @@ class MiniBatchSparsePCA(SparsePCA):
n_iter : int, default=100
Number of iterations to perform for each mini batch.

.. deprecated:: 1.2
`n_iter` is deprecated in 1.2 and will be removed in 1.4. Use
`max_iter` instead.

max_iter : int, default=None
Maximum number of iterations over the complete dataset before
stopping independently of any early stopping criterion heuristics.
If `max_iter` is not `None`, `n_iter` is ignored.

.. versionadded:: 1.2

callback : callable, default=None
Callable that gets invoked every five iterations.

Expand Down Expand Up @@ -307,6 +316,25 @@ class MiniBatchSparsePCA(SparsePCA):
across multiple function calls.
See :term:`Glossary <random_state>`.

tol : float, default=1e-3
Control early stopping based on the norm of the differences in the
dictionary between 2 steps. Used only if `max_iter` is not None.

To disable early stopping based on changes in the dictionary, set
`tol` to 0.0.

.. versionadded:: 1.1

max_no_improvement : int, default=10
Control early stopping based on the consecutive number of mini batches
that does not yield an improvement on the smoothed cost function. Used only if
`max_iter` is not None.

To disable convergence detection based on cost function, set
`max_no_improvement` to `None`.

.. versionadded:: 1.1
Copy link
Member

@ogrisel ogrisel Jun 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In retrospect it's a bit weird to introduce a new parameter in a bugfix release.

Maybe we should instead move this PR to target 1.2 and update the previous deprecation message for the MB sparse PCA to 1.4 instead of 1.3 so that both the MB dict learning and sparse PCA variant are actually removed in 1.4 at the same time to ease maintenance.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I pushed 6ea3f39 to do that.


Attributes
----------
components_ : ndarray of shape (n_components, n_features)
Expand Down Expand Up @@ -350,15 +378,15 @@ class MiniBatchSparsePCA(SparsePCA):
>>> from sklearn.decomposition import MiniBatchSparsePCA
>>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)
>>> transformer = MiniBatchSparsePCA(n_components=5, batch_size=50,
... random_state=0)
... max_iter=10, random_state=0)
>>> transformer.fit(X)
MiniBatchSparsePCA(...)
>>> X_transformed = transformer.transform(X)
>>> X_transformed.shape
(200, 5)
>>> # most values in the components_ are zero (sparsity)
>>> np.mean(transformer.components_ == 0)
0.94
0.9...
"""

def __init__(
Expand All @@ -367,14 +395,17 @@ def __init__(
*,
alpha=1,
ridge_alpha=0.01,
n_iter=100,
n_iter="deprecated",
max_iter=None,
callback=None,
batch_size=3,
verbose=False,
shuffle=True,
n_jobs=None,
method="lars",
random_state=None,
tol=1e-3,
max_no_improvement=10,
):
super().__init__(
n_components=n_components,
Expand All @@ -386,9 +417,12 @@ def __init__(
random_state=random_state,
)
self.n_iter = n_iter
self.max_iter = max_iter
self.callback = callback
self.batch_size = batch_size
self.shuffle = shuffle
self.tol = tol
self.max_no_improvement = max_no_improvement

def fit(self, X, y=None):
"""Fit the model from data in X.
Expand Down Expand Up @@ -418,44 +452,27 @@ def fit(self, X, y=None):
else:
n_components = self.n_components

with warnings.catch_warnings():
# return_n_iter and n_iter are deprecated. TODO Remove in 1.3
warnings.filterwarnings(
"ignore",
message=(
"'return_n_iter' is deprecated in version 1.1 and will be "
"removed in version 1.3. From 1.3 'n_iter' will never be "
"returned. Refer to the 'n_iter_' and 'n_steps_' attributes "
"of the MiniBatchDictionaryLearning object instead."
),
category=FutureWarning,
)
warnings.filterwarnings(
"ignore",
message=(
"'n_iter' is deprecated in version 1.1 and will be removed in "
"version 1.3. Use 'max_iter' instead."
),
category=FutureWarning,
)
Vt, _, self.n_iter_ = dict_learning_online(
X.T,
n_components,
alpha=self.alpha,
n_iter=self.n_iter,
return_code=True,
dict_init=None,
verbose=self.verbose,
callback=self.callback,
batch_size=self.batch_size,
shuffle=self.shuffle,
n_jobs=self.n_jobs,
method=self.method,
random_state=random_state,
return_n_iter=True,
)
transform_algorithm = "lasso_" + self.method
est = MiniBatchDictionaryLearning(
n_components=n_components,
alpha=self.alpha,
n_iter=self.n_iter,
max_iter=self.max_iter,
dict_init=None,
batch_size=self.batch_size,
shuffle=self.shuffle,
n_jobs=self.n_jobs,
fit_algorithm=self.method,
random_state=random_state,
transform_algorithm=transform_algorithm,
transform_alpha=self.alpha,
verbose=self.verbose,
callback=self.callback,
tol=self.tol,
max_no_improvement=self.max_no_improvement,
).fit(X.T)

self.components_ = Vt.T
self.components_, self.n_iter_ = est.transform(X.T).T, est.n_iter_

components_norm = np.linalg.norm(self.components_, axis=1)[:, np.newaxis]
components_norm[components_norm == 0] = 1
Expand Down
13 changes: 12 additions & 1 deletion sklearn/decomposition/tests/test_dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,7 @@ def test_minibatch_dict_learning_n_iter_deprecated():
# check the deprecation warning of n_iter
# FIXME: remove in 1.3
depr_msg = (
"'n_iter' is deprecated in version 1.1 and will be removed in version 1.3"
"'n_iter' is deprecated in version 1.1 and will be removed in version 1.4"
)
est = MiniBatchDictionaryLearning(
n_components=2, batch_size=4, n_iter=5, random_state=0
Expand Down Expand Up @@ -1072,3 +1072,14 @@ def test_get_feature_names_out(estimator):
feature_names_out,
[f"{estimator_name}{i}" for i in range(n_components)],
)


# TODO(1.4) remove
def test_minibatch_dictionary_learning_warns_and_ignore_n_iter():
"""Check that we always raise a warning when `n_iter` is set even if it is
ignored if `max_iter` is set.
"""
warn_msg = "'n_iter' is deprecated in version 1.1"
with pytest.warns(FutureWarning, match=warn_msg):
model = MiniBatchDictionaryLearning(batch_size=256, n_iter=2, max_iter=2).fit(X)
assert model.n_iter_ == 2
48 changes: 48 additions & 0 deletions sklearn/decomposition/tests/test_sparse_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,51 @@ def test_spca_feature_names_out(SPCA):

estimator_name = SPCA.__name__.lower()
assert_array_equal([f"{estimator_name}{i}" for i in range(4)], names)


# TODO (1.4): remove this test
def test_spca_n_iter_deprecation():
"""Check that we raise a warning for the deprecation of `n_iter` and it is ignored
when `max_iter` is specified.
"""
rng = np.random.RandomState(0)
n_samples, n_features = 12, 10
X = rng.randn(n_samples, n_features)

warn_msg = "'n_iter' is deprecated in version 1.1 and will be removed"
with pytest.warns(FutureWarning, match=warn_msg):
MiniBatchSparsePCA(n_iter=2).fit(X)

n_iter, max_iter = 1, 100
with pytest.warns(FutureWarning, match=warn_msg):
model = MiniBatchSparsePCA(
n_iter=n_iter, max_iter=max_iter, random_state=0
).fit(X)
assert model.n_iter_ > 1
assert model.n_iter_ <= max_iter


def test_spca_early_stopping(global_random_seed):
"""Check that `tol` and `max_no_improvement` act as early stopping."""
rng = np.random.RandomState(global_random_seed)
n_samples, n_features = 50, 10
X = rng.randn(n_samples, n_features)

# vary the tolerance to force the early stopping of one of the model
model_early_stopped = MiniBatchSparsePCA(
max_iter=100, tol=0.5, random_state=global_random_seed
).fit(X)
model_not_early_stopped = MiniBatchSparsePCA(
max_iter=100, tol=1e-3, random_state=global_random_seed
).fit(X)
assert model_early_stopped.n_iter_ < model_not_early_stopped.n_iter_

# force the max number of no improvement to a large value to check that
# it does help to early stop
model_early_stopped = MiniBatchSparsePCA(
max_iter=100, tol=1e-6, max_no_improvement=2, random_state=global_random_seed
).fit(X)
model_not_early_stopped = MiniBatchSparsePCA(
max_iter=100, tol=1e-6, max_no_improvement=100, random_state=global_random_seed
).fit(X)
assert model_early_stopped.n_iter_ < model_not_early_stopped.n_iter_