diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index a1f21723bac28..f6c58b1c3ce1d 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -216,6 +216,15 @@ Changelog for non-English characters. :pr:`18959` by :user:`Zero ` and :user:`wstates `. +:mod:`sklearn.utils` +.................... + +- |Enhancement| Deprecated the default value of the `random_state=0` in + :func:`~sklearn.utils.extmath.randomized_svd`. Starting in 1.2, + the default value of `random_state` will be set to `None`. + :pr:`19459` by :user:`Cindy Bezuidenhout ` and + :user:`Clifford Akai-Nettey`. + :mod:`sklearn.calibration` ............................ diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index 31ac63c42eb69..42a014dcd8ade 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -245,7 +245,7 @@ def randomized_range_finder(A, *, size, n_iter, @_deprecate_positional_args def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto', power_iteration_normalizer='auto', transpose='auto', - flip_sign=True, random_state=0): + flip_sign=True, random_state='warn'): """Computes a truncated randomized SVD. Parameters @@ -296,11 +296,17 @@ def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto', set to `True`, the sign ambiguity is resolved by making the largest loadings for each component in the left singular vectors positive. - random_state : int, RandomState instance or None, default=0 - The seed of the pseudo random number generator to use when shuffling - the data, i.e. getting the random vectors to initialize the algorithm. - Pass an int for reproducible results across multiple function calls. - See :term:`Glossary `. + random_state : int, RandomState instance or None, default='warn' + The seed of the pseudo random number generator to use when + shuffling the data, i.e. getting the random vectors to initialize + the algorithm. Pass an int for reproducible results across multiple + function calls. See :term:`Glossary `. + + .. versionchanged:: 1.2 + The previous behavior (`random_state=0`) is deprecated, and + from v1.2 the default value will be `random_state=None`. Set + the value of `random_state` explicitly to suppress the deprecation + warning. Notes ----- @@ -326,10 +332,22 @@ def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto', """ if isinstance(M, (sparse.lil_matrix, sparse.dok_matrix)): warnings.warn("Calculating SVD of a {} is expensive. " - "csr_matrix is more efficient.".format( - type(M).__name__), + "csr_matrix is more efficient.".format(type(M).__name__), sparse.SparseEfficiencyWarning) + if random_state == 'warn': + warnings.warn( + "If 'random_state' is not supplied, the current default " + "is to use 0 as a fixed seed. This will change to " + "None in version 1.2 leading to non-deterministic results " + "that better reflect nature of the randomized_svd solver. " + "If you want to silence this warning, set 'random_state' " + "to an integer seed or to None explicitly depending " + "if you want your code to be deterministic or not.", + FutureWarning + ) + random_state = 0 + random_state = check_random_state(random_state) n_random = n_components + n_oversamples n_samples, n_features = M.shape diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index cee4870b087c2..8e53d94d911f0 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -249,7 +249,8 @@ def test_randomized_svd_infinite_rank(): # compute the singular values of X using the fast approximate method # without the iterated power method _, sa, _ = randomized_svd(X, k, n_iter=0, - power_iteration_normalizer=normalizer) + power_iteration_normalizer=normalizer, + random_state=0) # the approximation does not tolerate the noise: assert np.abs(s[:k] - sa).max() > 0.1 @@ -257,7 +258,8 @@ def test_randomized_svd_infinite_rank(): # compute the singular values of X using the fast approximate method # with iterated power method _, sap, _ = randomized_svd(X, k, n_iter=5, - power_iteration_normalizer=normalizer) + power_iteration_normalizer=normalizer, + random_state=0) # the iterated power method is still managing to get most of the # structure at the requested rank @@ -307,11 +309,13 @@ def test_randomized_svd_power_iteration_normalizer(): # Check that it diverges with many (non-normalized) power iterations U, s, Vt = randomized_svd(X, n_components, n_iter=2, - power_iteration_normalizer='none') + power_iteration_normalizer='none', + random_state=0) A = X - U.dot(np.diag(s).dot(Vt)) error_2 = linalg.norm(A, ord='fro') U, s, Vt = randomized_svd(X, n_components, n_iter=20, - power_iteration_normalizer='none') + power_iteration_normalizer='none', + random_state=0) A = X - U.dot(np.diag(s).dot(Vt)) error_20 = linalg.norm(A, ord='fro') assert np.abs(error_2 - error_20) > 100 @@ -401,14 +405,15 @@ def max_loading_is_positive(u, v): mat = np.arange(10 * 8).reshape(10, -1) # Without transpose - u_flipped, _, v_flipped = randomized_svd(mat, 3, flip_sign=True) + u_flipped, _, v_flipped = randomized_svd(mat, 3, flip_sign=True, + random_state=0) u_based, v_based = max_loading_is_positive(u_flipped, v_flipped) assert u_based assert not v_based # With transpose u_flipped_with_transpose, _, v_flipped_with_transpose = randomized_svd( - mat, 3, flip_sign=True, transpose=True) + mat, 3, flip_sign=True, transpose=True, random_state=0) u_based, v_based = max_loading_is_positive( u_flipped_with_transpose, v_flipped_with_transpose) assert u_based