From dac63075212449584d975acabf59fecc9143b8a8 Mon Sep 17 00:00:00 2001 From: cinbez Date: Mon, 15 Feb 2021 09:08:43 +0200 Subject: [PATCH 1/9] Changes to extmath.py and test_extmath.py --- sklearn/utils/extmath.py | 20 ++++++++++++++------ sklearn/utils/tests/test_extmath.py | 14 +++++++++----- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index ba8ce9e2879b4..f8e089615a933 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -245,7 +245,7 @@ def randomized_range_finder(A, *, size, n_iter, @_deprecate_positional_args def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto', power_iteration_normalizer='auto', transpose='auto', - flip_sign=True, random_state=0): + flip_sign=True, random_state='warn'): """Computes a truncated randomized SVD. Parameters @@ -296,11 +296,19 @@ def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto', set to `True`, the sign ambiguity is resolved by making the largest loadings for each component in the left singular vectors positive. - random_state : int, RandomState instance or None, default=0 - The seed of the pseudo random number generator to use when shuffling - the data, i.e. getting the random vectors to initialize the algorithm. - Pass an int for reproducible results across multiple function calls. - See :term:`Glossary `. + random_state : int, RandomState instance or None, default='warn' + The seed of the pseudo random number generator to use when shuffling + the data, i.e. getting the random vectors to initialize the algorithm. + Pass an int for reproducible results across multiple function calls. + See :term:`Glossary `. + During the deprecation cycle this will set to zero and afterwards + it will be set to None. + The previous behavior (`random_state=0`) is deprecated, and + from v1.2 the default value will be `random_state=None`. Set + the value of `random_state` explicitly to suppress the deprecation + warning. + + .. versionchanged:: 1.2 Notes ----- diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index cd0b1f3fd7f70..b5597e624a8b1 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -258,7 +258,8 @@ def test_randomized_svd_infinite_rank(): # compute the singular values of X using the fast approximate method # with iterated power method _, sap, _ = randomized_svd(X, k, n_iter=5, - power_iteration_normalizer=normalizer) + power_iteration_normalizer=normalizer, + random_state=0) # the iterated power method is still managing to get most of the # structure at the requested rank @@ -308,11 +309,13 @@ def test_randomized_svd_power_iteration_normalizer(): # Check that it diverges with many (non-normalized) power iterations U, s, Vt = randomized_svd(X, n_components, n_iter=2, - power_iteration_normalizer='none') + power_iteration_normalizer='none', + random_state=0) A = X - U.dot(np.diag(s).dot(Vt)) error_2 = linalg.norm(A, ord='fro') U, s, Vt = randomized_svd(X, n_components, n_iter=20, - power_iteration_normalizer='none') + power_iteration_normalizer='none', + random_state=0) A = X - U.dot(np.diag(s).dot(Vt)) error_20 = linalg.norm(A, ord='fro') assert np.abs(error_2 - error_20) > 100 @@ -402,14 +405,15 @@ def max_loading_is_positive(u, v): mat = np.arange(10 * 8).reshape(10, -1) # Without transpose - u_flipped, _, v_flipped = randomized_svd(mat, 3, flip_sign=True) + u_flipped, _, v_flipped = randomized_svd(mat, 3, flip_sign=True, + random_state=0) u_based, v_based = max_loading_is_positive(u_flipped, v_flipped) assert u_based assert not v_based # With transpose u_flipped_with_transpose, _, v_flipped_with_transpose = randomized_svd( - mat, 3, flip_sign=True, transpose=True) + mat, 3, flip_sign=True, transpose=True,random_state=0) u_based, v_based = max_loading_is_positive( u_flipped_with_transpose, v_flipped_with_transpose) assert u_based From 4a68328196112a7a4b37e582b4d8fbe02cc99f0b Mon Sep 17 00:00:00 2001 From: cinbez Date: Mon, 15 Feb 2021 09:17:43 +0200 Subject: [PATCH 2/9] Fixing flake8 issues --- sklearn/utils/extmath.py | 9 ++++----- sklearn/utils/tests/test_extmath.py | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index f8e089615a933..a8af06f22c5c2 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -297,17 +297,16 @@ def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto', loadings for each component in the left singular vectors positive. random_state : int, RandomState instance or None, default='warn' - The seed of the pseudo random number generator to use when shuffling - the data, i.e. getting the random vectors to initialize the algorithm. - Pass an int for reproducible results across multiple function calls. - See :term:`Glossary `. + The seed of the pseudo random number generator to use when + shuffling the data, i.e. getting the random vectors to initialize + the algorithm. Pass an int for reproducible results across multiple + function calls. See :term:`Glossary `. During the deprecation cycle this will set to zero and afterwards it will be set to None. The previous behavior (`random_state=0`) is deprecated, and from v1.2 the default value will be `random_state=None`. Set the value of `random_state` explicitly to suppress the deprecation warning. - .. versionchanged:: 1.2 Notes diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index b5597e624a8b1..d89cf426d72c7 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -406,14 +406,14 @@ def max_loading_is_positive(u, v): # Without transpose u_flipped, _, v_flipped = randomized_svd(mat, 3, flip_sign=True, - random_state=0) + random_state=0) u_based, v_based = max_loading_is_positive(u_flipped, v_flipped) assert u_based assert not v_based # With transpose u_flipped_with_transpose, _, v_flipped_with_transpose = randomized_svd( - mat, 3, flip_sign=True, transpose=True,random_state=0) + mat, 3, flip_sign=True, transpose=True, random_state=0) u_based, v_based = max_loading_is_positive( u_flipped_with_transpose, v_flipped_with_transpose) assert u_based From d15f1438fe54e452f5f890a039f431d5cac91618 Mon Sep 17 00:00:00 2001 From: cinbez Date: Mon, 15 Feb 2021 09:21:29 +0200 Subject: [PATCH 3/9] flake 8 changes --- sklearn/utils/extmath.py | 8 ++++---- sklearn/utils/tests/test_extmath.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index a8af06f22c5c2..adc3c320d1a4b 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -297,10 +297,10 @@ def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto', loadings for each component in the left singular vectors positive. random_state : int, RandomState instance or None, default='warn' - The seed of the pseudo random number generator to use when - shuffling the data, i.e. getting the random vectors to initialize - the algorithm. Pass an int for reproducible results across multiple - function calls. See :term:`Glossary `. + The seed of the pseudo random number generator to use when + shuffling the data, i.e. getting the random vectors to initialize + the algorithm. Pass an int for reproducible results across multiple + function calls. See :term:`Glossary `. During the deprecation cycle this will set to zero and afterwards it will be set to None. The previous behavior (`random_state=0`) is deprecated, and diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index d89cf426d72c7..5bf1821a40bd6 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -406,7 +406,7 @@ def max_loading_is_positive(u, v): # Without transpose u_flipped, _, v_flipped = randomized_svd(mat, 3, flip_sign=True, - random_state=0) + random_state=0) u_based, v_based = max_loading_is_positive(u_flipped, v_flipped) assert u_based assert not v_based From 2b35f4f2cca2ddb702110c12e2fee546edaae143 Mon Sep 17 00:00:00 2001 From: cinbez Date: Fri, 19 Feb 2021 10:43:19 +0200 Subject: [PATCH 4/9] Added in deprecation warning --- doc/whats_new/v1.0.rst | 9 +++++++++ sklearn/utils/extmath.py | 13 +++++++++++++ 2 files changed, 22 insertions(+) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index a00523ec2223b..4cef639d915fc 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -143,6 +143,15 @@ Changelog for non-English characters. :pr:`18959` by :user:`Zero ` and :user:`wstates `. +:mod:`sklearn.utils` +................................. + +- |Enhancement| Changed the default value of the random_state in + :func:`randomized_svd` of + :class:`~sklearn.utils.extmath`. + :pr:`19459` by :user:`Cindy Bezuidenhout ` and + :user:`Clifford Akai-Nettey`. + Code and Documentation Contributors ----------------------------------- diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index adc3c320d1a4b..f3eeaeb1e8e77 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -337,6 +337,19 @@ def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto', type(M).__name__), sparse.SparseEfficiencyWarning) + if random_state == 'warn': + warnings.warn( + "If 'random_state' is not supplied, the current default " + "is to use 0 as a fixed seed. This will change to " + "None in version 1.2 leading to non-deterministic results " + "that better reflect nature of the randomized_svd solver. " + "If you want to silence this warning, set 'random_state' " + "to an integer seed or to None explicitly depending " + "if you want your code to be deterministic or not.", + FutureWarning + ) + random_state = 0 + random_state = check_random_state(random_state) n_random = n_components + n_oversamples n_samples, n_features = M.shape From 00756b73b6388baec030e0815bee1aeacee161ff Mon Sep 17 00:00:00 2001 From: gipc Date: Sat, 13 Mar 2021 08:13:05 +0000 Subject: [PATCH 5/9] sets random_state to 0 for the test_randomized_svd_infinite_rank function --- sklearn/utils/tests/test_extmath.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 5bf1821a40bd6..ede6977a26898 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -250,7 +250,8 @@ def test_randomized_svd_infinite_rank(): # compute the singular values of X using the fast approximate method # without the iterated power method _, sa, _ = randomized_svd(X, k, n_iter=0, - power_iteration_normalizer=normalizer) + power_iteration_normalizer=normalizer, + random_state=0) # the approximation does not tolerate the noise: assert np.abs(s[:k] - sa).max() > 0.1 From 34c284732b96ab1dbe09401dc9a3ffd4c7909546 Mon Sep 17 00:00:00 2001 From: gipc Date: Sat, 13 Mar 2021 11:04:07 +0000 Subject: [PATCH 6/9] fixes assertion error --- sklearn/utils/extmath.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index f3eeaeb1e8e77..5eb051fdf6824 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -296,18 +296,18 @@ def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto', set to `True`, the sign ambiguity is resolved by making the largest loadings for each component in the left singular vectors positive. - random_state : int, RandomState instance or None, default='warn' - The seed of the pseudo random number generator to use when - shuffling the data, i.e. getting the random vectors to initialize - the algorithm. Pass an int for reproducible results across multiple - function calls. See :term:`Glossary `. - During the deprecation cycle this will set to zero and afterwards - it will be set to None. - The previous behavior (`random_state=0`) is deprecated, and - from v1.2 the default value will be `random_state=None`. Set - the value of `random_state` explicitly to suppress the deprecation - warning. - .. versionchanged:: 1.2 + random_state : int, RandomState instance or None, default='warn' + The seed of the pseudo random number generator to use when + shuffling the data, i.e. getting the random vectors to initialize + the algorithm. Pass an int for reproducible results across multiple + function calls. See :term:`Glossary `. + During the deprecation cycle this will set to zero and afterwards + it will be set to None. + The previous behavior (`random_state=0`) is deprecated, and + from v1.2 the default value will be `random_state=None`. Set + the value of `random_state` explicitly to suppress the deprecation + warning. + .. versionchanged:: 1.2 Notes ----- @@ -333,8 +333,7 @@ def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto', """ if isinstance(M, (sparse.lil_matrix, sparse.dok_matrix)): warnings.warn("Calculating SVD of a {} is expensive. " - "csr_matrix is more efficient.".format( - type(M).__name__), + "csr_matrix is more efficient.".format(type(M).__name__), sparse.SparseEfficiencyWarning) if random_state == 'warn': From 59f503a2d731d6d26d7a48070fdbf2407bea6f98 Mon Sep 17 00:00:00 2001 From: gipc Date: Sat, 13 Mar 2021 15:49:12 +0000 Subject: [PATCH 7/9] updated the docstring --- sklearn/utils/extmath.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index c232280436f63..42a014dcd8ade 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -301,13 +301,12 @@ def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto', shuffling the data, i.e. getting the random vectors to initialize the algorithm. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. - During the deprecation cycle this will set to zero and afterwards - it will be set to None. - The previous behavior (`random_state=0`) is deprecated, and - from v1.2 the default value will be `random_state=None`. Set - the value of `random_state` explicitly to suppress the deprecation - warning. + .. versionchanged:: 1.2 + The previous behavior (`random_state=0`) is deprecated, and + from v1.2 the default value will be `random_state=None`. Set + the value of `random_state` explicitly to suppress the deprecation + warning. Notes ----- From a830402bc185ba55e7cc43c9131fd9d3ca8f63cc Mon Sep 17 00:00:00 2001 From: cliffordEmmanuel <45907515+cliffordEmmanuel@users.noreply.github.com> Date: Sat, 13 Mar 2021 15:50:51 +0000 Subject: [PATCH 8/9] Update doc/whats_new/v1.0.rst Co-authored-by: Thomas J. Fan --- doc/whats_new/v1.0.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 0ce6d5649875e..0456c1f3d80d0 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -219,9 +219,9 @@ Changelog :mod:`sklearn.utils` ................................. -- |Enhancement| Changed the default value of the random_state in - :func:`randomized_svd` of - :class:`~sklearn.utils.extmath`. +- |Enhancement| Deprecated the default value of the `random_state=0` in + :func:`~sklearn.utils.extmath.randomized_svd`. Starting in 1.2, + the default value of `random_state` will be set to `None`. :pr:`19459` by :user:`Cindy Bezuidenhout ` and :user:`Clifford Akai-Nettey`. From ab7ae3b73987703a749e62785a640f58e7613c73 Mon Sep 17 00:00:00 2001 From: cliffordEmmanuel <45907515+cliffordEmmanuel@users.noreply.github.com> Date: Mon, 15 Mar 2021 06:27:01 +0000 Subject: [PATCH 9/9] Update doc/whats_new/v1.0.rst Co-authored-by: Thomas J. Fan --- doc/whats_new/v1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 0456c1f3d80d0..f6c58b1c3ce1d 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -217,7 +217,7 @@ Changelog and :user:`wstates `. :mod:`sklearn.utils` -................................. +.................... - |Enhancement| Deprecated the default value of the `random_state=0` in :func:`~sklearn.utils.extmath.randomized_svd`. Starting in 1.2,