From a0f1f7b108c553297c1734da1f1990dd2915a8a3 Mon Sep 17 00:00:00 2001 From: whitews Date: Wed, 24 Apr 2019 08:50:09 -0400 Subject: [PATCH 01/22] change AMG tolerance default & laplacian shift (fixes #13393) --- sklearn/manifold/spectral_embedding_.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index e387ecec0f4d5..2de79ccaa954f 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -288,11 +288,12 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, laplacian = check_array(laplacian, dtype=np.float64, accept_sparse=True) laplacian = _set_diag(laplacian, 1, norm_laplacian) + laplacian = laplacian + 1e-5 * sparse.eye(laplacian.shape[0]) ml = smoothed_aggregation_solver(check_array(laplacian, 'csr')) M = ml.aspreconditioner() X = random_state.rand(laplacian.shape[0], n_components + 1) X[:, 0] = dd.ravel() - lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12, + lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-5, largest=False) embedding = diffusion_map.T if norm_laplacian: @@ -320,7 +321,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # doesn't behave well in low dimension X = random_state.rand(laplacian.shape[0], n_components + 1) X[:, 0] = dd.ravel() - lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-15, + lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-5, largest=False, maxiter=2000) embedding = diffusion_map.T[:n_components] if norm_laplacian: From 6e5ecf6875dcb3aba974112f616ca9a74eb06c64 Mon Sep 17 00:00:00 2001 From: whitews Date: Wed, 24 Apr 2019 08:50:33 -0400 Subject: [PATCH 02/22] add spectral clustering test for AMG solver --- .../manifold/tests/test_spectral_embedding.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index bc9a718271271..d7477d2ed836f 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -181,6 +181,32 @@ def test_spectral_embedding_amg_solver(seed=36): assert _check_with_col_sign_flipping(embed_amg, embed_arpack, 0.05) +def test_spectral_embedding_amg_solver_failure(seed=36): + # Test spectral embedding with amg solver failure + try: + from pyamg import smoothed_aggregation_solver # noqa + except ImportError: + raise SkipTest("pyamg not available.") + + # The generated graph below is NOT fully connected if n_neighbors=3 + n_samples = 200 + n_clusters = 3 + n_features = 3 + centers = np.eye(n_clusters, n_features) + S, true_labels = make_blobs(n_samples=n_samples, centers=centers, + cluster_std=1., random_state=42) + + se_amg0 = SpectralEmbedding(n_components=3, affinity="nearest_neighbors", + eigen_solver="amg", n_neighbors=3, + random_state=np.random.RandomState(seed)) + se_amg1 = SpectralEmbedding(n_components=3, affinity="nearest_neighbors", + eigen_solver="amg", n_neighbors=3, + random_state=np.random.RandomState(seed+1)) + embed_amg0 = se_amg0.fit_transform(S) + embed_amg1 = se_amg1.fit_transform(S) + assert _check_with_col_sign_flipping(embed_amg0, embed_amg1, 0.05) + + @pytest.mark.filterwarnings("ignore:the behavior of nmi will " "change in version 0.22") def test_pipeline_spectral_clustering(seed=36): From d61cf3bcd519cac8d1f43e68423136b5b5641558 Mon Sep 17 00:00:00 2001 From: whitews Date: Wed, 24 Apr 2019 08:51:33 -0400 Subject: [PATCH 03/22] update docs with edits from Andrew Knyazev (& some fixed) --- doc/modules/clustering.rst | 50 +++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index 711283b6f8790..3f59b13b4bd92 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -428,21 +428,24 @@ given sample. Spectral clustering =================== -:class:`SpectralClustering` does a low-dimension embedding of the -affinity matrix between samples, followed by a KMeans in the low -dimensional space. It is especially efficient if the affinity matrix is -sparse and the `pyamg `_ module is installed. -SpectralClustering requires the number of clusters to be specified. It -works well for a small number of clusters but is not advised when using -many clusters. - -For two clusters, it solves a convex relaxation of the `normalised -cuts `_ problem on -the similarity graph: cutting the graph in two so that the weight of the -edges cut is small compared to the weights of the edges inside each -cluster. This criteria is especially interesting when working on images: -graph vertices are pixels, and edges of the similarity graph are a -function of the gradient of the image. +:class:`SpectralClustering` performs a low-dimension embedding of the +affinity matrix between samples, followed by clustering, e.g., by KMeans, +of the components of the eigenvectors in the low dimensional space. +It is especially computationally efficient if the affinity matrix is sparse +and the `amg` solver is used for the eigenvalue problem (Note, the `amg` solver +requires that the `pyamg `_ module is installed.) + +The present version of SpectralClustering requires the number of clusters +to be specified in advance. It works well for a small number of clusters, +but is not advised for many clusters. + +For two clusters, SpectralClustering solves a convex relaxation of the +`normalised cuts `_ +problem on the similarity graph: cutting the graph in two so that the weight of +the edges cut is small compared to the weights of the edges inside each +cluster. This criteria is especially interesting when working on images, where +graph vertices are pixels, and weights of the edges of the similarity graph are +computed using a function of a gradient of the image. .. |noisy_img| image:: ../auto_examples/cluster/images/sphx_glr_plot_segmentation_toy_001.png @@ -489,12 +492,11 @@ Different label assignment strategies Different label assignment strategies can be used, corresponding to the ``assign_labels`` parameter of :class:`SpectralClustering`. -The ``"kmeans"`` strategy can match finer details of the data, but it can be -more unstable. In particular, unless you control the ``random_state``, it -may not be reproducible from run-to-run, as it depends on a random -initialization. On the other hand, the ``"discretize"`` strategy is 100% -reproducible, but it tends to create parcels of fairly even and -geometrical shape. +``"kmeans"`` strategy can match finer details, but can be unstable. +In particular, unless you control the ``random_state``, it may not be +reproducible from run-to-run, as it depends on random initialization. +The alternative ``"discretize"`` strategy is 100% reproducible, but tends +to create parcels of fairly even and geometrical shape. ===================================== ===================================== ``assign_labels="kmeans"`` ``assign_labels="discretize"`` @@ -505,7 +507,7 @@ geometrical shape. Spectral Clustering Graphs -------------------------- -Spectral Clustering can also be used to cluster graphs by their spectral +Spectral Clustering can also be used to partition graphs via their spectral embeddings. In this case, the affinity matrix is the adjacency matrix of the graph, and SpectralClustering is initialized with `affinity='precomputed'`:: @@ -532,6 +534,10 @@ graph, and SpectralClustering is initialized with `affinity='precomputed'`:: `_ Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001 + * `"Preconditioned Spectral Clustering for Stochastic + Block Partition Streaming Graph Challenge" + `_ + David Zhuzhunashvili, Andrew Knyazev .. _hierarchical_clustering: From b0c4356356eb13918005bcb848014f1aa278cc63 Mon Sep 17 00:00:00 2001 From: whitews Date: Wed, 24 Apr 2019 09:39:16 -0400 Subject: [PATCH 04/22] revert tolerance value changes, not needed for AMG solver fix --- sklearn/manifold/spectral_embedding_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index 2de79ccaa954f..8e3c03fa07c64 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -293,7 +293,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, M = ml.aspreconditioner() X = random_state.rand(laplacian.shape[0], n_components + 1) X[:, 0] = dd.ravel() - lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-5, + lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12, largest=False) embedding = diffusion_map.T if norm_laplacian: @@ -321,7 +321,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # doesn't behave well in low dimension X = random_state.rand(laplacian.shape[0], n_components + 1) X[:, 0] = dd.ravel() - lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-5, + lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-15, largest=False, maxiter=2000) embedding = diffusion_map.T[:n_components] if norm_laplacian: From 0c8390bf53b165bf055ac49840cf06e79b9ed601 Mon Sep 17 00:00:00 2001 From: whitews Date: Wed, 24 Apr 2019 09:59:36 -0400 Subject: [PATCH 05/22] update v0.21 changelog noting #13393 fix --- doc/whats_new/v0.21.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 473aa8d576193..ba59492bd62c2 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -410,6 +410,10 @@ Support for Python 3.4 and below has been officially dropped. space. This improves efficiency in particular when computed with lots of neighbors and/or small datasets. :issue:`9907` by :user:`William de Vazelhes `. +- [Fix] Fixed a bug in :func:`manifold.spectral_embedding` where + ``eigen_solver="amg"`` would sometimes result in a LinAlgError. + :issue:`13393` by :user:`Andrew Knyazev ` and + :user:`Scott White `. :mod:`sklearn.metrics` ...................... From f64decb261eac8da6212c61f58322ecc6c866fbe Mon Sep 17 00:00:00 2001 From: whitews Date: Wed, 24 Apr 2019 19:36:40 -0400 Subject: [PATCH 06/22] simplify diag correction in spectral_embedding --- sklearn/manifold/spectral_embedding_.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index 8e3c03fa07c64..91670520cab52 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -287,8 +287,9 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # lobpcg needs double precision floats laplacian = check_array(laplacian, dtype=np.float64, accept_sparse=True) - laplacian = _set_diag(laplacian, 1, norm_laplacian) - laplacian = laplacian + 1e-5 * sparse.eye(laplacian.shape[0]) + laplacian = _set_diag(laplacian, 1 + 1e-5, norm_laplacian) + + # noinspection PyUnboundLocalVariable ml = smoothed_aggregation_solver(check_array(laplacian, 'csr')) M = ml.aspreconditioner() X = random_state.rand(laplacian.shape[0], n_components + 1) From cf126b208939532ba81237fee939ac9bb826fdc1 Mon Sep 17 00:00:00 2001 From: whitews Date: Wed, 24 Apr 2019 19:37:25 -0400 Subject: [PATCH 07/22] revert the reversion: increased tolerances are required --- sklearn/manifold/spectral_embedding_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index 91670520cab52..59c089f4e99ae 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -294,7 +294,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, M = ml.aspreconditioner() X = random_state.rand(laplacian.shape[0], n_components + 1) X[:, 0] = dd.ravel() - lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12, + lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-5, largest=False) embedding = diffusion_map.T if norm_laplacian: @@ -322,7 +322,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # doesn't behave well in low dimension X = random_state.rand(laplacian.shape[0], n_components + 1) X[:, 0] = dd.ravel() - lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-15, + lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-5, largest=False, maxiter=2000) embedding = diffusion_map.T[:n_components] if norm_laplacian: From d9fc5eefa9e9195ee4ceedffa6c4e91e08f1cb41 Mon Sep 17 00:00:00 2001 From: whitews Date: Wed, 24 Apr 2019 20:07:04 -0400 Subject: [PATCH 08/22] use importorskip instead of try/except clause for availability of pyamg --- sklearn/manifold/tests/test_spectral_embedding.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index d7477d2ed836f..ae3b14a075844 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -165,10 +165,7 @@ def test_spectral_embedding_callable_affinity(seed=36): def test_spectral_embedding_amg_solver(seed=36): # Test spectral embedding with amg solver - try: - from pyamg import smoothed_aggregation_solver # noqa - except ImportError: - raise SkipTest("pyamg not available.") + pytest.importorskip('pyamg') se_amg = SpectralEmbedding(n_components=2, affinity="nearest_neighbors", eigen_solver="amg", n_neighbors=5, @@ -183,10 +180,7 @@ def test_spectral_embedding_amg_solver(seed=36): def test_spectral_embedding_amg_solver_failure(seed=36): # Test spectral embedding with amg solver failure - try: - from pyamg import smoothed_aggregation_solver # noqa - except ImportError: - raise SkipTest("pyamg not available.") + pytest.importorskip('pyamg') # The generated graph below is NOT fully connected if n_neighbors=3 n_samples = 200 From 1f544eaaa0175d41dc1544a3491cbd5fe592e640 Mon Sep 17 00:00:00 2001 From: whitews Date: Wed, 24 Apr 2019 20:09:54 -0400 Subject: [PATCH 09/22] reference issue in amg solver failure test --- sklearn/manifold/tests/test_spectral_embedding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index ae3b14a075844..89021734d2797 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -179,7 +179,7 @@ def test_spectral_embedding_amg_solver(seed=36): def test_spectral_embedding_amg_solver_failure(seed=36): - # Test spectral embedding with amg solver failure + # Test spectral embedding with amg solver failure, see issue #13393 pytest.importorskip('pyamg') # The generated graph below is NOT fully connected if n_neighbors=3 From 5a3a058fee51483ac6331aac2dea8f757bef690d Mon Sep 17 00:00:00 2001 From: whitews Date: Wed, 24 Apr 2019 20:56:22 -0400 Subject: [PATCH 10/22] clarify random seed change for spectral embedding amg failure test --- sklearn/manifold/tests/test_spectral_embedding.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index 89021734d2797..e6d49a130c671 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -193,11 +193,11 @@ def test_spectral_embedding_amg_solver_failure(seed=36): se_amg0 = SpectralEmbedding(n_components=3, affinity="nearest_neighbors", eigen_solver="amg", n_neighbors=3, random_state=np.random.RandomState(seed)) - se_amg1 = SpectralEmbedding(n_components=3, affinity="nearest_neighbors", - eigen_solver="amg", n_neighbors=3, - random_state=np.random.RandomState(seed+1)) embed_amg0 = se_amg0.fit_transform(S) - embed_amg1 = se_amg1.fit_transform(S) + + se_amg0.set_params(random_state=np.random.RandomState(seed+1)) + embed_amg1 = se_amg0.fit_transform(S) + assert _check_with_col_sign_flipping(embed_amg0, embed_amg1, 0.05) From 346cff08c360d4b920dfb59273b867fc27498a17 Mon Sep 17 00:00:00 2001 From: whitews Date: Thu, 25 Apr 2019 08:53:22 -0400 Subject: [PATCH 11/22] leave original tolerance for 'lobpcg' eigen solver --- sklearn/manifold/spectral_embedding_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index 59c089f4e99ae..5d54d6831368a 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -322,7 +322,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # doesn't behave well in low dimension X = random_state.rand(laplacian.shape[0], n_components + 1) X[:, 0] = dd.ravel() - lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-5, + lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-15, largest=False, maxiter=2000) embedding = diffusion_map.T[:n_components] if norm_laplacian: From 98b17eca1b3c4001f11b2aef42daf7adc32fa427 Mon Sep 17 00:00:00 2001 From: whitews Date: Thu, 23 May 2019 12:59:22 -0400 Subject: [PATCH 12/22] implement original shift code from lobpcg, add comment --- sklearn/manifold/spectral_embedding_.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index 5d54d6831368a..b59bbd1ba0a41 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -287,7 +287,17 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # lobpcg needs double precision floats laplacian = check_array(laplacian, dtype=np.float64, accept_sparse=True) - laplacian = _set_diag(laplacian, 1 + 1e-5, norm_laplacian) + laplacian = _set_diag(laplacian, 1, norm_laplacian) + + # The Laplacian matrix is always singular, having at least one zero + # eigenvalue, corresponding to the trivial eigenvector, which is a + # constant. Using a singular matrix for preconditioning may result in + # random failures in LOBPCG and is not supported by the existing theory: + # see https://doi.org/10.1007/s10208-015-9297-1 + # Shift the Laplacian so its diagononal is not all ones. The shift + # does change the eigenpairs, however, if the shift is small, the + # changes are insignificant. + laplacian = laplacian + 1e-5 * sparse.eye(laplacian.shape[0]) # noinspection PyUnboundLocalVariable ml = smoothed_aggregation_solver(check_array(laplacian, 'csr')) From 2eed15e01c9d6e304f2b027d88a07bc92caf2032 Mon Sep 17 00:00:00 2001 From: whitews Date: Thu, 23 May 2019 13:04:35 -0400 Subject: [PATCH 13/22] fix long line --- sklearn/manifold/spectral_embedding_.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index ce72d5455e59e..8f07132303f96 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -292,7 +292,8 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # The Laplacian matrix is always singular, having at least one zero # eigenvalue, corresponding to the trivial eigenvector, which is a # constant. Using a singular matrix for preconditioning may result in - # random failures in LOBPCG and is not supported by the existing theory: + # random failures in LOBPCG and is not supported by the existing + # theory: # see https://doi.org/10.1007/s10208-015-9297-1 # Shift the Laplacian so its diagononal is not all ones. The shift # does change the eigenpairs, however, if the shift is small, the From 783e6e4ef6cbdb0cf300e94edbaa35d93b83e9b8 Mon Sep 17 00:00:00 2001 From: whitews Date: Thu, 23 May 2019 13:14:10 -0400 Subject: [PATCH 14/22] only shift laplacian for the solver, then un-shift back to original --- sklearn/manifold/spectral_embedding_.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index 8f07132303f96..4f03ae450d3ac 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -296,12 +296,13 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # theory: # see https://doi.org/10.1007/s10208-015-9297-1 # Shift the Laplacian so its diagononal is not all ones. The shift - # does change the eigenpairs, however, if the shift is small, the - # changes are insignificant. + # does change the eigenpairs however, so we'll feed the shifted + # matrix to the solver and afterward set it back to the original. laplacian = laplacian + 1e-5 * sparse.eye(laplacian.shape[0]) - # noinspection PyUnboundLocalVariable ml = smoothed_aggregation_solver(check_array(laplacian, 'csr')) + laplacian = laplacian - 1e-5 * sparse.eye(laplacian.shape[0]) + M = ml.aspreconditioner() X = random_state.rand(laplacian.shape[0], n_components + 1) X[:, 0] = dd.ravel() From 4a2e3dfdfb31b548c21bba6c1e93f8dd168a480c Mon Sep 17 00:00:00 2001 From: Scott White Date: Thu, 1 Aug 2019 21:49:21 -0400 Subject: [PATCH 15/22] Update sklearn/manifold/spectral_embedding_.py Co-Authored-By: Joel Nothman --- sklearn/manifold/spectral_embedding_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index 977cfa828a816..7a362563137d7 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -299,7 +299,7 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # Shift the Laplacian so its diagononal is not all ones. The shift # does change the eigenpairs however, so we'll feed the shifted # matrix to the solver and afterward set it back to the original. - laplacian = laplacian + 1e-5 * sparse.eye(laplacian.shape[0]) + laplacian += 1e-5 * sparse.eye(laplacian.shape[0]) # noinspection PyUnboundLocalVariable ml = smoothed_aggregation_solver(check_array(laplacian, 'csr')) laplacian = laplacian - 1e-5 * sparse.eye(laplacian.shape[0]) From 0362c399b7dc6d5754c55c7cc9af6a37f621f00c Mon Sep 17 00:00:00 2001 From: Scott White Date: Thu, 1 Aug 2019 21:51:47 -0400 Subject: [PATCH 16/22] remove noinspection comment --- sklearn/manifold/spectral_embedding_.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index 7a362563137d7..dd51e86cd9848 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -300,7 +300,6 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # does change the eigenpairs however, so we'll feed the shifted # matrix to the solver and afterward set it back to the original. laplacian += 1e-5 * sparse.eye(laplacian.shape[0]) - # noinspection PyUnboundLocalVariable ml = smoothed_aggregation_solver(check_array(laplacian, 'csr')) laplacian = laplacian - 1e-5 * sparse.eye(laplacian.shape[0]) From f46f91be4274cb0f8172d063145818da1c1e42e5 Mon Sep 17 00:00:00 2001 From: Scott White Date: Thu, 1 Aug 2019 21:54:29 -0400 Subject: [PATCH 17/22] removing spectral clustering bug text --- doc/whats_new/v0.21.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 4bf63f0a01a64..d2b274ca3d8ed 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -699,11 +699,6 @@ Support for Python 3.4 and below has been officially dropped. lots of neighbors and/or small datasets. :pr:`9907` by :user:`William de Vazelhes `. -- [Fix] Fixed a bug in :func:`manifold.spectral_embedding` where - ``eigen_solver="amg"`` would sometimes result in a LinAlgError. - :issue:`13393` by :user:`Andrew Knyazev ` - :pr:`13707` by :user:`Scott White ` - :mod:`sklearn.metrics` ...................... From 61d54de5b808ea1357fa295e9bae50cf38c1abc0 Mon Sep 17 00:00:00 2001 From: Scott White Date: Thu, 1 Aug 2019 21:59:48 -0400 Subject: [PATCH 18/22] add spectral clustering fix contribution --- doc/whats_new/v0.22.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index bcef08ff1881b..740cd6fe4d30e 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -129,6 +129,14 @@ Changelog requires less memory. :pr:`14108`, pr:`14170` by :user:`Alex Henrie `. +:mod:`sklearn.manifold` +....................... + +- [Fix] Fixed a bug in :func:`manifold.spectral_embedding` where + ``eigen_solver="amg"`` would sometimes result in a LinAlgError. + :issue:`13393` by :user:`Andrew Knyazev ` + :pr:`13707` by :user:`Scott White ` + :mod:`sklearn.metrics` ...................... From c48fbe01b7d70f7139b2315bdc18888d462cc58e Mon Sep 17 00:00:00 2001 From: Scott White Date: Thu, 1 Aug 2019 22:01:42 -0400 Subject: [PATCH 19/22] fix markup in last commit --- doc/whats_new/v0.22.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index 740cd6fe4d30e..534caee036c42 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -132,7 +132,7 @@ Changelog :mod:`sklearn.manifold` ....................... -- [Fix] Fixed a bug in :func:`manifold.spectral_embedding` where +- |Fix| Fixed a bug in :func:`manifold.spectral_embedding` where ``eigen_solver="amg"`` would sometimes result in a LinAlgError. :issue:`13393` by :user:`Andrew Knyazev ` :pr:`13707` by :user:`Scott White ` From a452c951c62996deb88ba96a351dfb2ff6deb4a7 Mon Sep 17 00:00:00 2001 From: whitews Date: Sun, 4 Aug 2019 15:08:57 -0400 Subject: [PATCH 20/22] mention SpectralEmbedding & SpectralClustering classes in release notes --- doc/whats_new/v0.22.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index 0efd8157ac566..a198f82a9006c 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -186,8 +186,9 @@ Changelog :mod:`sklearn.manifold` ....................... -- |Fix| Fixed a bug in :func:`manifold.spectral_embedding` where - ``eigen_solver="amg"`` would sometimes result in a LinAlgError. +- |Fix| Fixed a bug in :func:`manifold.spectral_embedding` used in + :class:`manifold.SpectralEmbedding` and :class:`cluster.spectral.SpectralClustering` + where ``eigen_solver="amg"`` would sometimes result in a LinAlgError. :issue:`13393` by :user:`Andrew Knyazev ` :pr:`13707` by :user:`Scott White ` From de645ba2fc57e77ac3dc7b9a373267f39a5348ff Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 29 Aug 2019 13:42:55 +0200 Subject: [PATCH 21/22] Update AMG docstring and improve codestyle --- sklearn/manifold/spectral_embedding_.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index 5ee0eadf506f7..9142237fd5042 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -299,9 +299,10 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None, # Shift the Laplacian so its diagononal is not all ones. The shift # does change the eigenpairs however, so we'll feed the shifted # matrix to the solver and afterward set it back to the original. - laplacian += 1e-5 * sparse.eye(laplacian.shape[0]) + diag_shift = 1e-5 * sparse.eye(laplacian.shape[0]) + laplacian += diag_shift ml = smoothed_aggregation_solver(check_array(laplacian, 'csr')) - laplacian = laplacian - 1e-5 * sparse.eye(laplacian.shape[0]) + laplacian -= diag_shift M = ml.aspreconditioner() X = random_state.rand(laplacian.shape[0], n_components + 1) @@ -388,8 +389,7 @@ class SpectralEmbedding(BaseEstimator): eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'} The eigenvalue decomposition strategy to use. AMG requires pyamg - to be installed. It can be faster on very large, sparse problems, - but may also lead to instabilities. + to be installed. It can be faster on very large, sparse problems. n_neighbors : int, default : max(n_samples/10 , 1) Number of nearest neighbors for nearest_neighbors graph building. From 0501603dd79a59a0163ffff094d3019cd34f17a9 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 29 Aug 2019 13:49:02 +0200 Subject: [PATCH 22/22] Stricter check in pyamg test --- sklearn/manifold/tests/test_spectral_embedding.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index c491380447862..d9c066c474b1c 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -207,10 +207,11 @@ def test_spectral_embedding_amg_solver_failure(seed=36): random_state=np.random.RandomState(seed)) embed_amg0 = se_amg0.fit_transform(S) - se_amg0.set_params(random_state=np.random.RandomState(seed+1)) - embed_amg1 = se_amg0.fit_transform(S) + for i in range(10): + se_amg0.set_params(random_state=np.random.RandomState(seed + 1)) + embed_amg1 = se_amg0.fit_transform(S) - assert _check_with_col_sign_flipping(embed_amg0, embed_amg1, 0.05) + assert _check_with_col_sign_flipping(embed_amg0, embed_amg1, 0.05) @pytest.mark.filterwarnings("ignore:the behavior of nmi will "