From 67f66b89d22848e3245cc953d0c28e878c0a671b Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 13:28:57 -0400
Subject: [PATCH 01/90] Update _spectral.py

re-introducing https://github.com/scikit-learn/scikit-learn/pull/12316
---
 sklearn/cluster/_spectral.py | 46 ++++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 8b80f9999b403..73c644cb0f352 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -18,6 +18,35 @@
 from ._kmeans import k_means
 
 
+def cluster_qr(vectors):
+    """Search for a partition matrix (clustering) which is
+    closest to the eigenvector embedding.
+    Parameters
+    ----------
+    vectors : array-like, shape: (n_samples, n_clusters)
+        The embedding space of the samples.
+    Returns
+    -------
+    labels : array of integers, shape: n_samples
+        The labels of the clusters.
+    References
+    ----------
+    https://github.com/asdamle/QR-spectral-clustering
+    https://arxiv.org/abs/1708.07481
+    """
+
+    from scipy.linalg import qr, svd
+
+    k = vectors.shape[1]
+    piv = qr(vectors.T, pivoting=True)[2]
+    piv = piv[0:k]
+    UtSV = svd(vectors[piv, :].T)
+    Ut = UtSV[0]
+    Vt = UtSV[2].T.conj()
+    vectors = abs(np.dot(vectors, np.dot(Ut, Vt.T)))
+    return vectors.argmax(axis=1).T
+
+
 def discretize(
     vectors, *, copy=True, max_svd_restarts=30, n_iter_max=20, random_state=None
 ):
@@ -229,12 +258,16 @@ def spectral_clustering(
         Stopping criterion for eigendecomposition of the Laplacian matrix
         when using arpack eigen_solver.
 
-    assign_labels : {'kmeans', 'discretize'}, default='kmeans'
+    assign_labels : {'kmeans', 'discretize', 'cluster_qr'}, default='kmeans'
         The strategy to use to assign labels in the embedding
-        space.  There are two ways to assign labels after the Laplacian
+        space.  There are three ways to assign labels after the Laplacian
         embedding.  k-means can be applied and is a popular choice. But it can
         also be sensitive to initialization. Discretization is another
         approach which is less sensitive to random initialization [3]_.
+        The newest cluster_qr method directly extract clusters from eigenvectors
+        in spectral clustering. In contrast to k-means and discretization, 
+        cluster_qr has no tuning parameters, e.g., runs no iterations, yet may outperform
+        k-means and discretization in terms of both quality and speed.
 
     verbose : bool, default=False
         Verbosity mode.
@@ -275,10 +308,11 @@ def spectral_clustering(
     This algorithm solves the normalized cut for k=2: it is a
     normalized spectral clustering.
     """
-    if assign_labels not in ("kmeans", "discretize"):
+    if assign_labels not in ("kmeans", "discretize", 'cluster_qr'):
         raise ValueError(
             "The 'assign_labels' parameter should be "
-            "'kmeans' or 'discretize', but '%s' was given" % assign_labels
+            "'kmeans' or 'discretize', or 'cluster_qr', but '%s' was given" 
+            % assign_labels
         )
     if isinstance(affinity, np.matrix):
         raise TypeError(
@@ -312,6 +346,8 @@ def spectral_clustering(
         _, labels, _ = k_means(
             maps, n_clusters, random_state=random_state, n_init=n_init, verbose=verbose
         )
+    elif assign_labels == 'cluster_qr':
+        labels = cluster_qr(maps)
     else:
         labels = discretize(maps, random_state=random_state)
 
@@ -407,7 +443,7 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
         Stopping criterion for eigendecomposition of the Laplacian matrix
         when ``eigen_solver='arpack'``.
 
-    assign_labels : {'kmeans', 'discretize'}, default='kmeans'
+    assign_labels : {'kmeans', 'discretize', 'cluster_qr'}, default='kmeans'
         The strategy for assigning labels in the embedding space. There are two
         ways to assign labels after the Laplacian embedding. k-means is a
         popular choice, but it can be sensitive to initialization.

From bcb9e8b4ca398cd5568abc4a7291d371d325d67c Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 13:42:24 -0400
Subject: [PATCH 02/90] Update _spectral.py

lint fix
---
 sklearn/cluster/_spectral.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 73c644cb0f352..b92a438e3ac67 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -311,8 +311,8 @@ def spectral_clustering(
     if assign_labels not in ("kmeans", "discretize", 'cluster_qr'):
         raise ValueError(
             "The 'assign_labels' parameter should be "
-            "'kmeans' or 'discretize', or 'cluster_qr', but '%s' was given" 
-            % assign_labels
+            "'kmeans' or 'discretize', or 'cluster_qr', " 
+            "but '%s' was given" % assign_labels
         )
     if isinstance(affinity, np.matrix):
         raise TypeError(

From d521873ea849fd6c5893ab62ac5c1942b5f3decb Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 13:44:39 -0400
Subject: [PATCH 03/90] Update _spectral.py

trailing space fixed
---
 sklearn/cluster/_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index b92a438e3ac67..1c0e2778dad44 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -311,7 +311,7 @@ def spectral_clustering(
     if assign_labels not in ("kmeans", "discretize", 'cluster_qr'):
         raise ValueError(
             "The 'assign_labels' parameter should be "
-            "'kmeans' or 'discretize', or 'cluster_qr', " 
+            "'kmeans' or 'discretize', or 'cluster_qr', "
             "but '%s' was given" % assign_labels
         )
     if isinstance(affinity, np.matrix):

From e72bbd9ae636a98c408a212b4007cd9ee1051ff0 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 13:55:29 -0400
Subject: [PATCH 04/90] Update _spectral.py

line too long
---
 sklearn/cluster/_spectral.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 1c0e2778dad44..0f01de451f236 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -265,8 +265,8 @@ def spectral_clustering(
         also be sensitive to initialization. Discretization is another
         approach which is less sensitive to random initialization [3]_.
         The newest cluster_qr method directly extract clusters from eigenvectors
-        in spectral clustering. In contrast to k-means and discretization, 
-        cluster_qr has no tuning parameters, e.g., runs no iterations, yet may outperform
+        in spectral clustering. In contrast to k-means and discretization, cluster_qr
+        has no tuning parametersand runs no iterations, yet may outperform
         k-means and discretization in terms of both quality and speed.
 
     verbose : bool, default=False

From 734dd37c7f0e31aaa3e62c30458991272db88e2a Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 14:11:02 -0400
Subject: [PATCH 05/90] Update test_spectral.py

added "cluster_qr"
---
 sklearn/cluster/tests/test_spectral.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 679adf27520e4..3561c48eb064d 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -12,6 +12,7 @@
 from sklearn.utils._testing import assert_array_equal
 
 from sklearn.cluster import SpectralClustering, spectral_clustering
+from sklearn.cluster._spectral import cluster_qr
 from sklearn.cluster._spectral import discretize
 from sklearn.feature_extraction import img_to_graph
 from sklearn.metrics import pairwise_distances
@@ -29,7 +30,9 @@
 
 
 @pytest.mark.parametrize("eigen_solver", ("arpack", "lobpcg"))
-@pytest.mark.parametrize("assign_labels", ("kmeans", "discretize"))
+@pytest.mark.parametrize(
+    "assign_labels",
+    ("kmeans", "discretize", "cluster_qr"))
 def test_spectral_clustering(eigen_solver, assign_labels):
     S = np.array(
         [
@@ -283,7 +286,7 @@ def test_n_components():
     assert not np.array_equal(labels, labels_diff_ncomp)
 
 
-@pytest.mark.parametrize("assign_labels", ("kmeans", "discretize"))
+@pytest.mark.parametrize("assign_labels", ("kmeans", "discretize", "cluster_qr"))
 def test_verbose(assign_labels, capsys):
     # Check verbose mode of KMeans for better coverage.
     X, y = make_blobs(

From 360b2f71528a4c3bcbbed02eda3a9acd3db3a57a Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 14:43:00 -0400
Subject: [PATCH 06/90] Update test_spectral.py

lint fix
---
 sklearn/cluster/tests/test_spectral.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 3561c48eb064d..747344e5563cc 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -12,7 +12,6 @@
 from sklearn.utils._testing import assert_array_equal
 
 from sklearn.cluster import SpectralClustering, spectral_clustering
-from sklearn.cluster._spectral import cluster_qr
 from sklearn.cluster._spectral import discretize
 from sklearn.feature_extraction import img_to_graph
 from sklearn.metrics import pairwise_distances

From 9a1f70fff79b57903e1de61f9054d39bb511c168 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 14:56:37 -0400
Subject: [PATCH 07/90] Update plot_coin_segmentation.py

added "cluster_qr"
---
 examples/cluster/plot_coin_segmentation.py | 33 +++++++++++++++-------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 9fb9b11be2753..96718a4925e19 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -10,12 +10,15 @@
 This procedure (spectral clustering on an image) is an efficient
 approximate solution for finding normalized graph cuts.
 
-There are two options to assign labels:
+There are three options to assign labels:
 
 * with 'kmeans' spectral clustering will cluster samples in the embedding space
   using a kmeans algorithm
+* with 'cluster_qr' will cluster samples in the embedding space
+  using a cluster_qr algorithm,
 * whereas 'discrete' will iteratively search for the closest partition
   space to the embedding space.
+
 """
 print(__doc__)
 
@@ -64,26 +67,36 @@
 
 # Apply spectral clustering (this step goes much faster if you have pyamg
 # installed)
-N_REGIONS = 25
+
+# The actual number of regions in this example is 27: background and 26 coins
+N_REGIONS = 26
 
 # %%
-# Visualize the resulting regions
+# Compute and visualize the resulting regions
 
-for assign_labels in ('kmeans', 'discretize'):
+# Any eigen_solver: 'arpack', 'lobpcg', 'amg' can be used. AMG is usually best
+# It often helps the spectral clustering to compute a few extra eigenvectors
+N_REGIONS_PLUS = 3
+
+for assign_labels in ('kmeans', 'discretize', 'cluster_qr'):
     t0 = time.time()
-    labels = spectral_clustering(graph, n_clusters=N_REGIONS,
-                                 assign_labels=assign_labels, random_state=42)
+    labels = spectral_clustering(graph,
+                             n_clusters=(N_REGIONS + N_REGIONS_PLUS),
+                             assign_labels=assign_labels, random_state=42,
+                             eigen_solver='arpack')
     t1 = time.time()
     labels = labels.reshape(rescaled_coins.shape)
 
     plt.figure(figsize=(5, 5))
-    plt.imshow(rescaled_coins, cmap=plt.cm.gray)
-    for l in range(N_REGIONS):
-        plt.contour(labels == l,
-                    colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])
+    plt.imshow(rescaled_coins, cmap=plt.get_cmap('gray'))
+
     plt.xticks(())
     plt.yticks(())
     title = 'Spectral clustering: %s, %.2fs' % (assign_labels, (t1 - t0))
     print(title)
     plt.title(title)
+    for l in range(N_REGIONS):
+        plt.contour(labels == l,
+                    colors=[plt.cm.nipy_spectral((l+3) / float(N_REGIONS+3))])
+        plt.pause(0.5)
 plt.show()

From 3f87ceedae226d4bff7db74d75caeaf607a3394b Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 15:02:18 -0400
Subject: [PATCH 08/90] Update clustering.rst

added cluster_qr
---
 doc/modules/clustering.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 65f33fe1fbebb..39cc2eece08be 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -497,6 +497,10 @@ computed using a function of a gradient of the image.
     :target: ../auto_examples/cluster/plot_coin_segmentation.html
     :scale: 65
 
+.. |coin_cluster_qr| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_003.png
+    :target: ../auto_examples/cluster/plot_coin_segmentation.html
+    :scale: 65
+
 Different label assignment strategies
 -------------------------------------
 
@@ -507,6 +511,8 @@ In particular, unless you control the ``random_state``, it may not be
 reproducible from run-to-run, as it depends on random initialization.
 The alternative ``"discretize"`` strategy is 100% reproducible, but tends
 to create parcels of fairly even and geometrical shape.
+The recently added option ``clusterQR`` is 100% also reproducible and tends
+to create the visually best partitioning.
 
 =====================================  =====================================
  ``assign_labels="kmeans"``              ``assign_labels="discretize"``

From 5fe24dc172e259e74b89aa6688117b56d58e5cf3 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 15:11:05 -0400
Subject: [PATCH 09/90] Update plot_coin_segmentation.py

E128 continuation line under-indented fixed
---
 examples/cluster/plot_coin_segmentation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 96718a4925e19..a2a8569e94743 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -81,9 +81,9 @@
 for assign_labels in ('kmeans', 'discretize', 'cluster_qr'):
     t0 = time.time()
     labels = spectral_clustering(graph,
-                             n_clusters=(N_REGIONS + N_REGIONS_PLUS),
-                             assign_labels=assign_labels, random_state=42,
-                             eigen_solver='arpack')
+                                 n_clusters=(N_REGIONS + N_REGIONS_PLUS),
+                                 assign_labels=assign_labels, random_state=42,
+                                 eigen_solver='arpack')
     t1 = time.time()
     labels = labels.reshape(rescaled_coins.shape)
 

From 10e649bf5b39f783d22b840142d94267eae1d327 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 16:17:07 -0400
Subject: [PATCH 10/90] Update v1.1.rst

added PR #21148 info
---
 doc/whats_new/v1.1.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 3aabed6214771..8824efaba3694 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -38,6 +38,16 @@ Changelog
     :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
     where 123456 is the *pull request* number, not the issue number.
 
+:mod:`sklearn.cluster`
+....................
+
+- |Enhancement| :func:`cluster._spectral` now includes the 'cluster_qr' method
+  that clusters samples in the embedding space just as 'kmeans' and 'discrete'.
+  :func:`cluster.plot_coin_segmentation' now compares all three alternatives.
+  Documentation :doc: 'modules/clustering' and unit :test: 'test_spectral.py'
+  have been updated to incorporate 'cluster_qr'.
+  :pr:`21148` by :user:`Andrew Knyazev <lobpcg>`
+
 :mod:`sklearn.linear_model`
 ...........................
 

From 70f0c400aa7cf4cb6f08ffc8ff3f46e6d0c29097 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 16:41:11 -0400
Subject: [PATCH 11/90] Update v1.1.rst

formatting fixed
---
 doc/whats_new/v1.1.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 8824efaba3694..8ef9d68dbe53e 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -43,8 +43,8 @@ Changelog
 
 - |Enhancement| :func:`cluster._spectral` now includes the 'cluster_qr' method
   that clusters samples in the embedding space just as 'kmeans' and 'discrete'.
-  :func:`cluster.plot_coin_segmentation' now compares all three alternatives.
-  Documentation :doc: 'modules/clustering' and unit :test: 'test_spectral.py'
+  :func:`cluster.plot_coin_segmentation` now compares all three alternatives.
+  Documentation :doc: `modules/clustering` and unit :test: `test_spectral.py`
   have been updated to incorporate 'cluster_qr'.
   :pr:`21148` by :user:`Andrew Knyazev <lobpcg>`
 

From fa943304fb1c781286ebe4ab33af2f2d48ee8d27 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 17:20:57 -0400
Subject: [PATCH 12/90] Update v1.1.rst

title underline fixed
---
 doc/whats_new/v1.1.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 8ef9d68dbe53e..f14a849315676 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -39,7 +39,7 @@ Changelog
     where 123456 is the *pull request* number, not the issue number.
 
 :mod:`sklearn.cluster`
-....................
+......................
 
 - |Enhancement| :func:`cluster._spectral` now includes the 'cluster_qr' method
   that clusters samples in the embedding space just as 'kmeans' and 'discrete'.

From a52aec01cabeef34bc4e04fbe731cbfb6a1fe8cd Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sat, 25 Sep 2021 19:22:11 -0400
Subject: [PATCH 13/90] Update clustering.rst

added cluster_qr to plots
---
 doc/modules/clustering.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 39cc2eece08be..5334657aedbc7 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -514,11 +514,11 @@ to create parcels of fairly even and geometrical shape.
 The recently added option ``clusterQR`` is 100% also reproducible and tends
 to create the visually best partitioning.
 
-=====================================  =====================================
- ``assign_labels="kmeans"``              ``assign_labels="discretize"``
-=====================================  =====================================
-|coin_kmeans|                          |coin_discretize|
-=====================================  =====================================
+================================  ================================  ================================
+ ``assign_labels="kmeans"``        ``assign_labels="discretize"``    ``assign_labels="cluster_qr"``
+================================  ================================  ================================
+|coin_kmeans|                          |coin_discretize|                 |coin_cluster_qr|
+================================  ================================  ================================
 
 Spectral Clustering Graphs
 --------------------------

From e661e22a9931f63b2742b10f5e5bfea8a857d94c Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 27 Sep 2021 12:22:15 -0400
Subject: [PATCH 14/90] Update _spectral.py

black formatting
---
 sklearn/cluster/_spectral.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 0f01de451f236..65b6abac9a5b5 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -308,7 +308,7 @@ def spectral_clustering(
     This algorithm solves the normalized cut for k=2: it is a
     normalized spectral clustering.
     """
-    if assign_labels not in ("kmeans", "discretize", 'cluster_qr'):
+    if assign_labels not in ("kmeans", "discretize", "cluster_qr"):
         raise ValueError(
             "The 'assign_labels' parameter should be "
             "'kmeans' or 'discretize', or 'cluster_qr', "
@@ -346,7 +346,7 @@ def spectral_clustering(
         _, labels, _ = k_means(
             maps, n_clusters, random_state=random_state, n_init=n_init, verbose=verbose
         )
-    elif assign_labels == 'cluster_qr':
+    elif assign_labels == "cluster_qr":
         labels = cluster_qr(maps)
     else:
         labels = discretize(maps, random_state=random_state)

From 79504f225d4c282c54d2d7e147ee94a241ace230 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 27 Sep 2021 12:26:22 -0400
Subject: [PATCH 15/90] Update test_spectral.py

black formatting
---
 sklearn/cluster/tests/test_spectral.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 747344e5563cc..519c09436303e 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -29,9 +29,7 @@
 
 
 @pytest.mark.parametrize("eigen_solver", ("arpack", "lobpcg"))
-@pytest.mark.parametrize(
-    "assign_labels",
-    ("kmeans", "discretize", "cluster_qr"))
+@pytest.mark.parametrize("assign_labels", ("kmeans", "discretize", "cluster_qr"))
 def test_spectral_clustering(eigen_solver, assign_labels):
     S = np.array(
         [

From 24fcf28f4aa775d501ecad7b655809ae510a662e Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 27 Sep 2021 15:20:05 -0400
Subject: [PATCH 16/90] Update test_spectral.py

trying to change the discretize test by itself to also test kmeans by itself and cluster_qr by itself
---
 sklearn/cluster/tests/test_spectral.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 519c09436303e..4d1d6c818e2d5 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -191,9 +191,10 @@ def histogram(x, y, **kwargs):
         sp.fit(X)
 
 
+@pytest.mark.parametrize("assign_labels", ("kmeans", "discretize", "cluster_qr"))
 @pytest.mark.parametrize("n_samples", [50, 100, 150, 500])
-def test_discretize(n_samples):
-    # Test the discretize using a noise assignment matrix
+def test_direct_clustering(n_samples, assign_labels):
+    # Test direct clustering using a noise assignment matrix
     random_state = np.random.RandomState(seed=8)
     for n_class in range(2, 10):
         # random class labels
@@ -207,7 +208,7 @@ def test_discretize(n_samples):
         y_true_noisy = y_indicator.toarray() + 0.1 * random_state.randn(
             n_samples, n_class + 1
         )
-        y_pred = discretize(y_true_noisy, random_state=random_state)
+        y_pred = assign_labels(y_true_noisy, random_state=random_state)
         assert adjusted_rand_score(y_true, y_pred) > 0.8
 
 

From 898a287f8937f995a80f3819de149166007384f2 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 27 Sep 2021 15:38:23 -0400
Subject: [PATCH 17/90] Update test_spectral.py

adding test cluster_qr by itself to the same test with discretize
---
 sklearn/cluster/tests/test_spectral.py | 33 +++++++++++++-------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 4d1d6c818e2d5..00d438c197919 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -12,7 +12,7 @@
 from sklearn.utils._testing import assert_array_equal
 
 from sklearn.cluster import SpectralClustering, spectral_clustering
-from sklearn.cluster._spectral import discretize
+from sklearn.cluster._spectral import discretize, cluster_qr
 from sklearn.feature_extraction import img_to_graph
 from sklearn.metrics import pairwise_distances
 from sklearn.metrics import adjusted_rand_score
@@ -191,25 +191,26 @@ def histogram(x, y, **kwargs):
         sp.fit(X)
 
 
-@pytest.mark.parametrize("assign_labels", ("kmeans", "discretize", "cluster_qr"))
+@pytest.mark.parametrize("assign_labels", ("discretize", "cluster_qr"))
 @pytest.mark.parametrize("n_samples", [50, 100, 150, 500])
 def test_direct_clustering(n_samples, assign_labels):
     # Test direct clustering using a noise assignment matrix
     random_state = np.random.RandomState(seed=8)
-    for n_class in range(2, 10):
-        # random class labels
-        y_true = random_state.randint(0, n_class + 1, n_samples)
-        y_true = np.array(y_true, float)
-        # noise class assignment matrix
-        y_indicator = sparse.coo_matrix(
-            (np.ones(n_samples), (np.arange(n_samples), y_true)),
-            shape=(n_samples, n_class + 1),
-        )
-        y_true_noisy = y_indicator.toarray() + 0.1 * random_state.randn(
-            n_samples, n_class + 1
-        )
-        y_pred = assign_labels(y_true_noisy, random_state=random_state)
-        assert adjusted_rand_score(y_true, y_pred) > 0.8
+    for fn in [assign_labels]:
+        for n_class in range(2, 10):
+            # random class labels
+            y_true = random_state.randint(0, n_class + 1, n_samples)
+            y_true = np.array(y_true, float)
+            # noise class assignment matrix
+            y_indicator = sparse.coo_matrix(
+                (np.ones(n_samples), (np.arange(n_samples), y_true)),
+                shape=(n_samples, n_class + 1),
+            )
+            y_true_noisy = y_indicator.toarray() + 0.1 * random_state.randn(
+                n_samples, n_class + 1
+            )
+            y_pred = fn(y_true_noisy, random_state=random_state)
+            assert adjusted_rand_score(y_true, y_pred) > 0.8
 
 
 # TODO: Remove when pyamg does replaces sp.rand call with np.random.rand

From fb6494581637bb78385c27f0c45dc4dcdf65bc0d Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 27 Sep 2021 15:46:14 -0400
Subject: [PATCH 18/90] Update test_spectral.py

error fixes
---
 sklearn/cluster/tests/test_spectral.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 00d438c197919..8c7fc9772b06d 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -191,12 +191,11 @@ def histogram(x, y, **kwargs):
         sp.fit(X)
 
 
-@pytest.mark.parametrize("assign_labels", ("discretize", "cluster_qr"))
 @pytest.mark.parametrize("n_samples", [50, 100, 150, 500])
 def test_direct_clustering(n_samples, assign_labels):
     # Test direct clustering using a noise assignment matrix
     random_state = np.random.RandomState(seed=8)
-    for fn in [assign_labels]:
+    for fn in [discretize, cluster_qr]:
         for n_class in range(2, 10):
             # random class labels
             y_true = random_state.randint(0, n_class + 1, n_samples)

From f8622453e72c7d5b23117e5d0a9da64ee06092c5 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 27 Sep 2021 17:34:45 -0400
Subject: [PATCH 19/90] Update test_spectral.py

error fix
---
 sklearn/cluster/tests/test_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 8c7fc9772b06d..77f636de46593 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -192,7 +192,7 @@ def histogram(x, y, **kwargs):
 
 
 @pytest.mark.parametrize("n_samples", [50, 100, 150, 500])
-def test_direct_clustering(n_samples, assign_labels):
+def test_direct_clustering(n_samples):
     # Test direct clustering using a noise assignment matrix
     random_state = np.random.RandomState(seed=8)
     for fn in [discretize, cluster_qr]:

From 41759aa41da3815358e149669d2c843d40906274 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 27 Sep 2021 18:51:20 -0400
Subject: [PATCH 20/90] Update test_spectral.py

cluster_qr apparently requires n_class>2, so change the test to start with n_class=3
---
 sklearn/cluster/tests/test_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 77f636de46593..f0e5dbd0b0528 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -196,7 +196,7 @@ def test_direct_clustering(n_samples):
     # Test direct clustering using a noise assignment matrix
     random_state = np.random.RandomState(seed=8)
     for fn in [discretize, cluster_qr]:
-        for n_class in range(2, 10):
+        for n_class in range(3, 10):
             # random class labels
             y_true = random_state.randint(0, n_class + 1, n_samples)
             y_true = np.array(y_true, float)

From 7f8c60fafaab9e7b484692aa2321981d66ab0bce Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 27 Sep 2021 20:59:58 -0400
Subject: [PATCH 21/90] Update test_spectral.py

reverted to working version
---
 sklearn/cluster/tests/test_spectral.py | 35 +++++++++++++-------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index f0e5dbd0b0528..519c09436303e 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -12,7 +12,7 @@
 from sklearn.utils._testing import assert_array_equal
 
 from sklearn.cluster import SpectralClustering, spectral_clustering
-from sklearn.cluster._spectral import discretize, cluster_qr
+from sklearn.cluster._spectral import discretize
 from sklearn.feature_extraction import img_to_graph
 from sklearn.metrics import pairwise_distances
 from sklearn.metrics import adjusted_rand_score
@@ -192,24 +192,23 @@ def histogram(x, y, **kwargs):
 
 
 @pytest.mark.parametrize("n_samples", [50, 100, 150, 500])
-def test_direct_clustering(n_samples):
-    # Test direct clustering using a noise assignment matrix
+def test_discretize(n_samples):
+    # Test the discretize using a noise assignment matrix
     random_state = np.random.RandomState(seed=8)
-    for fn in [discretize, cluster_qr]:
-        for n_class in range(3, 10):
-            # random class labels
-            y_true = random_state.randint(0, n_class + 1, n_samples)
-            y_true = np.array(y_true, float)
-            # noise class assignment matrix
-            y_indicator = sparse.coo_matrix(
-                (np.ones(n_samples), (np.arange(n_samples), y_true)),
-                shape=(n_samples, n_class + 1),
-            )
-            y_true_noisy = y_indicator.toarray() + 0.1 * random_state.randn(
-                n_samples, n_class + 1
-            )
-            y_pred = fn(y_true_noisy, random_state=random_state)
-            assert adjusted_rand_score(y_true, y_pred) > 0.8
+    for n_class in range(2, 10):
+        # random class labels
+        y_true = random_state.randint(0, n_class + 1, n_samples)
+        y_true = np.array(y_true, float)
+        # noise class assignment matrix
+        y_indicator = sparse.coo_matrix(
+            (np.ones(n_samples), (np.arange(n_samples), y_true)),
+            shape=(n_samples, n_class + 1),
+        )
+        y_true_noisy = y_indicator.toarray() + 0.1 * random_state.randn(
+            n_samples, n_class + 1
+        )
+        y_pred = discretize(y_true_noisy, random_state=random_state)
+        assert adjusted_rand_score(y_true, y_pred) > 0.8
 
 
 # TODO: Remove when pyamg does replaces sp.rand call with np.random.rand

From bbab00ae84206ffd76b58d331f0ff252416ecf35 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 28 Sep 2021 14:33:32 -0400
Subject: [PATCH 22/90] Update test_spectral.py

added a test of cluster_qr itself
---
 sklearn/cluster/tests/test_spectral.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 519c09436303e..8a295cc3c0e73 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -191,6 +191,14 @@ def histogram(x, y, **kwargs):
         sp.fit(X)
 
 
+def test_cluster_qr():
+    # Test cluster_qr for fixed data
+    random_state = np.random.RandomState(seed=8)
+    data = random_state.randn(10, 5)
+    labels = cluster_qr(data)
+    assert labels == [2 1 3 3 2 4 1 3 4 0]
+
+
 @pytest.mark.parametrize("n_samples", [50, 100, 150, 500])
 def test_discretize(n_samples):
     # Test the discretize using a noise assignment matrix

From 53dca44196a8cb91c93150f81127c39513fa39cb Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 28 Sep 2021 14:42:15 -0400
Subject: [PATCH 23/90] Update test_spectral.py

error fix
---
 sklearn/cluster/tests/test_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 8a295cc3c0e73..1a46efc1268c3 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -196,7 +196,7 @@ def test_cluster_qr():
     random_state = np.random.RandomState(seed=8)
     data = random_state.randn(10, 5)
     labels = cluster_qr(data)
-    assert labels == [2 1 3 3 2 4 1 3 4 0]
+    assert not np.array_equal(labels, [2, 1, 3, 3, 2, 4, 1, 3, 4, 0])
 
 
 @pytest.mark.parametrize("n_samples", [50, 100, 150, 500])

From a028dbafbae2b3de07dce5cb31a3025ca0a28585 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 28 Sep 2021 14:46:09 -0400
Subject: [PATCH 24/90] Update test_spectral.py

error fix
---
 sklearn/cluster/tests/test_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 1a46efc1268c3..055209eb3aea4 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -12,7 +12,7 @@
 from sklearn.utils._testing import assert_array_equal
 
 from sklearn.cluster import SpectralClustering, spectral_clustering
-from sklearn.cluster._spectral import discretize
+from sklearn.cluster._spectral import discretize, cluster_qr
 from sklearn.feature_extraction import img_to_graph
 from sklearn.metrics import pairwise_distances
 from sklearn.metrics import adjusted_rand_score

From 23f35dfb2270b42a8bf0ea4c99562328f32d4938 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 28 Sep 2021 15:08:12 -0400
Subject: [PATCH 25/90] Update test_spectral.py

error fix
---
 sklearn/cluster/tests/test_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 055209eb3aea4..603f79c43edd4 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -196,7 +196,7 @@ def test_cluster_qr():
     random_state = np.random.RandomState(seed=8)
     data = random_state.randn(10, 5)
     labels = cluster_qr(data)
-    assert not np.array_equal(labels, [2, 1, 3, 3, 2, 4, 1, 3, 4, 0])
+    assert not np.array_equal(labels, np.array([2, 1, 3, 3, 2, 4, 1, 3, 4, 0]))
 
 
 @pytest.mark.parametrize("n_samples", [50, 100, 150, 500])

From 6fa84241644a20417062da7c9b86c254e971cb7c Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 28 Sep 2021 15:09:30 -0400
Subject: [PATCH 26/90] Update test_spectral.py

error fix
---
 sklearn/cluster/tests/test_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 603f79c43edd4..1981911e80737 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -196,7 +196,7 @@ def test_cluster_qr():
     random_state = np.random.RandomState(seed=8)
     data = random_state.randn(10, 5)
     labels = cluster_qr(data)
-    assert not np.array_equal(labels, np.array([2, 1, 3, 3, 2, 4, 1, 3, 4, 0]))
+    assert np.array_equal(labels, np.array([2, 1, 3, 3, 2, 4, 1, 3, 4, 0]))
 
 
 @pytest.mark.parametrize("n_samples", [50, 100, 150, 500])

From 9f92b5a7211fd5f1339ab35f373aff555f36f72e Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 28 Sep 2021 15:59:15 -0400
Subject: [PATCH 27/90] Update test_spectral.py

added dtypes ["np.float32", "np.float64"] to the test
---
 sklearn/cluster/tests/test_spectral.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 1981911e80737..43c0e97c531c9 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -191,10 +191,11 @@ def histogram(x, y, **kwargs):
         sp.fit(X)
 
 
-def test_cluster_qr():
-    # Test cluster_qr for fixed data
+@pytest.mark.parametrize("_dtype", ["np.float32", "np.float64"])
+def test_cluster_qr(_dtype):
+    # Test cluster_qr for fixed data different dtypes
     random_state = np.random.RandomState(seed=8)
-    data = random_state.randn(10, 5)
+    data = random_state.randn(10, 5).astype(_dtype)
     labels = cluster_qr(data)
     assert np.array_equal(labels, np.array([2, 1, 3, 3, 2, 4, 1, 3, 4, 0]))
 

From 5cdc1ea49a00e0ad29e9b838a0073cca0ac6cd06 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 28 Sep 2021 16:21:11 -0400
Subject: [PATCH 28/90] Update test_spectral.py

changed dtypes to  ["single", "double"]
---
 sklearn/cluster/tests/test_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 43c0e97c531c9..79daa0d6c9080 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -191,7 +191,7 @@ def histogram(x, y, **kwargs):
         sp.fit(X)
 
 
-@pytest.mark.parametrize("_dtype", ["np.float32", "np.float64"])
+@pytest.mark.parametrize("_dtype", ["single", "double"])
 def test_cluster_qr(_dtype):
     # Test cluster_qr for fixed data different dtypes
     random_state = np.random.RandomState(seed=8)

From 8454a692f4870cd0285d6bef73a2ae01fe830110 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 28 Sep 2021 17:14:44 -0400
Subject: [PATCH 29/90] Update clustering.rst

changed plots scale from 65 to 35 to make space for cluster_qr and generate docs
---
 doc/modules/clustering.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 5334657aedbc7..e68a51709bb0a 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -491,15 +491,15 @@ computed using a function of a gradient of the image.
 
 .. |coin_kmeans| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_001.png
     :target: ../auto_examples/cluster/plot_coin_segmentation.html
-    :scale: 65
+    :scale: 35
 
 .. |coin_discretize| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_002.png
     :target: ../auto_examples/cluster/plot_coin_segmentation.html
-    :scale: 65
+    :scale: 35
 
 .. |coin_cluster_qr| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_003.png
     :target: ../auto_examples/cluster/plot_coin_segmentation.html
-    :scale: 65
+    :scale: 35
 
 Different label assignment strategies
 -------------------------------------
@@ -517,7 +517,7 @@ to create the visually best partitioning.
 ================================  ================================  ================================
  ``assign_labels="kmeans"``        ``assign_labels="discretize"``    ``assign_labels="cluster_qr"``
 ================================  ================================  ================================
-|coin_kmeans|                          |coin_discretize|                 |coin_cluster_qr|
+|coin_kmeans|                          |coin_discretize|                  |coin_cluster_qr|
 ================================  ================================  ================================
 
 Spectral Clustering Graphs

From 82c35438a9358dc37a6396e57e0e2b557ec05244 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Wed, 29 Sep 2021 10:39:59 -0400
Subject: [PATCH 30/90] Update test_spectral.py

test all 3 options ("assign_labels", ("kmeans", "discretize", "cluster_qr")) in test_spectral_clustering_sparse
---
 sklearn/cluster/tests/test_spectral.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 79daa0d6c9080..f164a252f7c5b 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -101,6 +101,7 @@ def test_spectral_unknown_assign_labels():
         spectral_clustering(S, n_clusters=2, random_state=0, assign_labels="<unknown>")
 
 
+@pytest.mark.parametrize("assign_labels", ("kmeans", "discretize", "cluster_qr"))
 def test_spectral_clustering_sparse():
     X, y = make_blobs(
         n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01
@@ -111,7 +112,11 @@ def test_spectral_clustering_sparse():
     S = sparse.coo_matrix(S)
 
     labels = (
-        SpectralClustering(random_state=0, n_clusters=2, affinity="precomputed")
+        SpectralClustering(
+            random_state=0,
+            n_clusters=2,
+            affinity="precomputed",
+            assign_labels=assign_labels)
         .fit(S)
         .labels_
     )

From 524228ceaca699102a7c85d1b8404460834bec49 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Wed, 29 Sep 2021 11:07:22 -0400
Subject: [PATCH 31/90] Update test_spectral.py

black formatting
---
 sklearn/cluster/tests/test_spectral.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index f164a252f7c5b..735573cb9ba2e 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -116,7 +116,8 @@ def test_spectral_clustering_sparse():
             random_state=0,
             n_clusters=2,
             affinity="precomputed",
-            assign_labels=assign_labels)
+            assign_labels=assign_labels,
+        )
         .fit(S)
         .labels_
     )

From d992122747057ec7867b1e99e77e0dc3ab130f6d Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Wed, 29 Sep 2021 11:40:59 -0400
Subject: [PATCH 32/90] Update test_spectral.py

lint formatting errors fix
---
 sklearn/cluster/tests/test_spectral.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 735573cb9ba2e..a52a87d0ae5b9 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -49,7 +49,7 @@ def test_spectral_clustering(eigen_solver, assign_labels):
             n_clusters=2,
             affinity="precomputed",
             eigen_solver=eigen_solver,
-            assign_labels=assign_labels,
+            assign_labels=assign_labels
         ).fit(mat)
         labels = model.labels_
         if labels[0] == 0:
@@ -116,7 +116,7 @@ def test_spectral_clustering_sparse():
             random_state=0,
             n_clusters=2,
             affinity="precomputed",
-            assign_labels=assign_labels,
+            assign_labels=assign_labels
         )
         .fit(S)
         .labels_

From e207aa43990ad952657137f8df46e463c005ce71 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Wed, 29 Sep 2021 11:50:30 -0400
Subject: [PATCH 33/90] Update test_spectral.py

remove all trailing commas in multi-line function call for consistency
---
 sklearn/cluster/tests/test_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index a52a87d0ae5b9..f2196b30c7a7a 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -140,7 +140,7 @@ def test_precomputed_nearest_neighbors_filtering():
                 random_state=0,
                 n_clusters=2,
                 affinity="precomputed_nearest_neighbors",
-                n_neighbors=n_neighbors,
+                n_neighbors=n_neighbors
             )
             .fit(graph)
             .labels_

From 87b4ffdb8f32d31b60492cdf958010d4e01a0f4c Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Wed, 29 Sep 2021 11:58:20 -0400
Subject: [PATCH 34/90] Update test_spectral.py

run black
---
 sklearn/cluster/tests/test_spectral.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index f2196b30c7a7a..735573cb9ba2e 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -49,7 +49,7 @@ def test_spectral_clustering(eigen_solver, assign_labels):
             n_clusters=2,
             affinity="precomputed",
             eigen_solver=eigen_solver,
-            assign_labels=assign_labels
+            assign_labels=assign_labels,
         ).fit(mat)
         labels = model.labels_
         if labels[0] == 0:
@@ -116,7 +116,7 @@ def test_spectral_clustering_sparse():
             random_state=0,
             n_clusters=2,
             affinity="precomputed",
-            assign_labels=assign_labels
+            assign_labels=assign_labels,
         )
         .fit(S)
         .labels_
@@ -140,7 +140,7 @@ def test_precomputed_nearest_neighbors_filtering():
                 random_state=0,
                 n_clusters=2,
                 affinity="precomputed_nearest_neighbors",
-                n_neighbors=n_neighbors
+                n_neighbors=n_neighbors,
             )
             .fit(graph)
             .labels_

From 7d2b030f4bc74499f9db6aad7630ae96ae6c5bd5 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Wed, 29 Sep 2021 12:32:20 -0400
Subject: [PATCH 35/90] Update test_spectral.py

error fix
---
 sklearn/cluster/tests/test_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 735573cb9ba2e..f6243191f6441 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -102,7 +102,7 @@ def test_spectral_unknown_assign_labels():
 
 
 @pytest.mark.parametrize("assign_labels", ("kmeans", "discretize", "cluster_qr"))
-def test_spectral_clustering_sparse():
+def test_spectral_clustering_sparse(assign_labels):
     X, y = make_blobs(
         n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01
     )

From 73a629532567d0f7de17b5980158f5fa5ecab784 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sun, 3 Oct 2021 22:37:46 -0400
Subject: [PATCH 36/90] Update clustering.rst

minor
---
 doc/modules/clustering.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index e68a51709bb0a..84c3b31f4b2d9 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -511,7 +511,7 @@ In particular, unless you control the ``random_state``, it may not be
 reproducible from run-to-run, as it depends on random initialization.
 The alternative ``"discretize"`` strategy is 100% reproducible, but tends
 to create parcels of fairly even and geometrical shape.
-The recently added option ``clusterQR`` is 100% also reproducible and tends
+The recently added option ``clusterQR`` is also 100% reproducible and tends
 to create the visually best partitioning.
 
 ================================  ================================  ================================

From bf5486e906d25f7e32f83ad6c0bc910ba40882a5 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sun, 3 Oct 2021 22:46:50 -0400
Subject: [PATCH 37/90] Update plot_coin_segmentation.py

minor
---
 examples/cluster/plot_coin_segmentation.py | 25 +++++++++++++---------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index a2a8569e94743..602c1cd8a311b 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -14,10 +14,10 @@
 
 * with 'kmeans' spectral clustering will cluster samples in the embedding space
   using a kmeans algorithm
-* with 'cluster_qr' will cluster samples in the embedding space
-  using a cluster_qr algorithm,
+* with 'cluster_qr' spectral clustering will cluster samples in the embedding 
+  space using a cluster_qr algorithm,
 * whereas 'discrete' will iteratively search for the closest partition
-  space to the embedding space.
+  space to the embedding space of spectral clustering.
 
 """
 print(__doc__)
@@ -66,24 +66,26 @@
 graph.data = np.exp(-beta * graph.data / graph.data.std()) + eps
 
 # Apply spectral clustering (this step goes much faster if you have pyamg
-# installed)
+# installed and use eigen_solver = 'amg'). However, any valid solver can
+# be used (e.g., 'arpack', 'lobpcg', or 'amg').
+eigen_solver = 'arpack'
 
 # The actual number of regions in this example is 27: background and 26 coins
-N_REGIONS = 26
+n_regions = 26
 
 # %%
 # Compute and visualize the resulting regions
 
 # Any eigen_solver: 'arpack', 'lobpcg', 'amg' can be used. AMG is usually best
 # It often helps the spectral clustering to compute a few extra eigenvectors
-N_REGIONS_PLUS = 3
+n_regions_plus = 3
 
 for assign_labels in ('kmeans', 'discretize', 'cluster_qr'):
     t0 = time.time()
     labels = spectral_clustering(graph,
-                                 n_clusters=(N_REGIONS + N_REGIONS_PLUS),
+                                 n_clusters=(n_regions + n_regions_plus),
                                  assign_labels=assign_labels, random_state=42,
-                                 eigen_solver='arpack')
+                                 eigen_solver='eigen_solver')
     t1 = time.time()
     labels = labels.reshape(rescaled_coins.shape)
 
@@ -95,8 +97,11 @@
     title = 'Spectral clustering: %s, %.2fs' % (assign_labels, (t1 - t0))
     print(title)
     plt.title(title)
-    for l in range(N_REGIONS):
+    for l in range(n_regions):
         plt.contour(labels == l,
-                    colors=[plt.cm.nipy_spectral((l+3) / float(N_REGIONS+3))])
+                    colors=[plt.cm.nipy_spectral((l+3) / float(n_regions + 3))])
+        colors = plt.cm.nipy_spectral((l + n_regions_plus) /
+                              float(n_regions + n_regions_plus))
+        plt.contour(labels == l, colors=colors)
         plt.pause(0.5)
 plt.show()

From 20a18f2043c179f7ce9673debd39e9d270b41ea4 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sun, 3 Oct 2021 22:54:20 -0400
Subject: [PATCH 38/90] Update plot_coin_segmentation.py

trailing whitespace removed
---
 examples/cluster/plot_coin_segmentation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 602c1cd8a311b..850e901971b71 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -14,7 +14,7 @@
 
 * with 'kmeans' spectral clustering will cluster samples in the embedding space
   using a kmeans algorithm
-* with 'cluster_qr' spectral clustering will cluster samples in the embedding 
+* with 'cluster_qr' spectral clustering will cluster samples in the embedding
   space using a cluster_qr algorithm,
 * whereas 'discrete' will iteratively search for the closest partition
   space to the embedding space of spectral clustering.

From c28b9c918d9e7083aac9f947a3344255c0ffadf7 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Sun, 3 Oct 2021 23:01:10 -0400
Subject: [PATCH 39/90] Update plot_coin_segmentation.py

indentation fixed
---
 examples/cluster/plot_coin_segmentation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 850e901971b71..5fcf4fd908aed 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -101,7 +101,7 @@
         plt.contour(labels == l,
                     colors=[plt.cm.nipy_spectral((l+3) / float(n_regions + 3))])
         colors = plt.cm.nipy_spectral((l + n_regions_plus) /
-                              float(n_regions + n_regions_plus))
+                                      float(n_regions + n_regions_plus))
         plt.contour(labels == l, colors=colors)
         plt.pause(0.5)
 plt.show()

From 56c7bb43c15d2f1a07c78765874b867d6d6ab921 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 4 Oct 2021 00:09:15 -0400
Subject: [PATCH 40/90] Update plot_coin_segmentation.py

minor error
---
 examples/cluster/plot_coin_segmentation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 5fcf4fd908aed..b95d8bacfcfe0 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -85,7 +85,7 @@
     labels = spectral_clustering(graph,
                                  n_clusters=(n_regions + n_regions_plus),
                                  assign_labels=assign_labels, random_state=42,
-                                 eigen_solver='eigen_solver')
+                                 eigen_solver=eigen_solver)
     t1 = time.time()
     labels = labels.reshape(rescaled_coins.shape)
 

From 4d7112494500a39f5bb28a1d3ba2d3921cebdc0a Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 4 Oct 2021 01:42:48 -0400
Subject: [PATCH 41/90] Update plot_coin_segmentation.py

error fixed
---
 examples/cluster/plot_coin_segmentation.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index b95d8bacfcfe0..121d6b2797577 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -98,8 +98,6 @@
     print(title)
     plt.title(title)
     for l in range(n_regions):
-        plt.contour(labels == l,
-                    colors=[plt.cm.nipy_spectral((l+3) / float(n_regions + 3))])
         colors = plt.cm.nipy_spectral((l + n_regions_plus) /
                                       float(n_regions + n_regions_plus))
         plt.contour(labels == l, colors=colors)

From 2ae0513038d6d0745c1875f2e078f4471b54060f Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 4 Oct 2021 01:55:32 -0400
Subject: [PATCH 42/90] Update _spectral.py

proposed by @victorminden
---
 sklearn/cluster/_spectral.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 65b6abac9a5b5..2633616a85cce 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -21,18 +21,25 @@
 def cluster_qr(vectors):
     """Search for a partition matrix (clustering) which is
     closest to the eigenvector embedding.
+
+    This implementation was proposed in [1]_.
+
     Parameters
     ----------
     vectors : array-like, shape: (n_samples, n_clusters)
         The embedding space of the samples.
+
     Returns
     -------
     labels : array of integers, shape: n_samples
         The labels of the clusters.
+
     References
     ----------
-    https://github.com/asdamle/QR-spectral-clustering
-    https://arxiv.org/abs/1708.07481
+    .. [1] `Simple, direct, and efficient multi-way spectral clustering, 2019
+        Anil Damle, Victor Minden, Lexing Ying
+        <https://doi.org/10.1093/imaiai/iay008>`_
+
     """
 
     from scipy.linalg import qr, svd
@@ -40,10 +47,8 @@ def cluster_qr(vectors):
     k = vectors.shape[1]
     piv = qr(vectors.T, pivoting=True)[2]
     piv = piv[0:k]
-    UtSV = svd(vectors[piv, :].T)
-    Ut = UtSV[0]
-    Vt = UtSV[2].T.conj()
-    vectors = abs(np.dot(vectors, np.dot(Ut, Vt.T)))
+    ut, _, v = svd(vectors[piv[:k], :].T)
+    vectors = abs(np.dot(vectors, np.dot(ut, v.conj())))
     return vectors.argmax(axis=1).T
 
 
@@ -264,7 +269,7 @@ def spectral_clustering(
         embedding.  k-means can be applied and is a popular choice. But it can
         also be sensitive to initialization. Discretization is another
         approach which is less sensitive to random initialization [3]_.
-        The newest cluster_qr method directly extract clusters from eigenvectors
+        The newest cluster_qr method [5]_ directly extract clusters from eigenvectors
         in spectral clustering. In contrast to k-means and discretization, cluster_qr
         has no tuning parametersand runs no iterations, yet may outperform
         k-means and discretization in terms of both quality and speed.
@@ -300,9 +305,13 @@ def spectral_clustering(
            SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.
            <https://epubs.siam.org/doi/pdf/10.1137/S1064827500366124>`_
 
+    .. [5] `Simple, direct, and efficient multi-way spectral clustering, 2019
+           Anil Damle, Victor Minden, Lexing Ying
+           <https://doi.org/10.1093/imaiai/iay008>`_
+
     Notes
     -----
-    The graph should contain only one connect component, elsewhere
+    The graph should contain only one connected component, elsewhere
     the results make little sense.
 
     This algorithm solves the normalized cut for k=2: it is a

From 2f74cd7e118e45e11c7104359be70b2ad7fec556 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 4 Oct 2021 02:46:42 -0400
Subject: [PATCH 43/90] Update plot_coin_segmentation.py

        colors = plt.cm.nipy_spectral((l + n_regions_plus) /
                                      float(n_regions + n_regions_plus))
        plt.contour(labels == l, colors=colors)
->
plt.contour(labels == l,
                  colors=[plt.cm.nipy_spectral((l+3) / float(N_REGIONS+3))])
---
 examples/cluster/plot_coin_segmentation.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 121d6b2797577..5ebc0144818d5 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -98,8 +98,7 @@
     print(title)
     plt.title(title)
     for l in range(n_regions):
-        colors = plt.cm.nipy_spectral((l + n_regions_plus) /
-                                      float(n_regions + n_regions_plus))
-        plt.contour(labels == l, colors=colors)
+    plt.contour(labels == l,
+                colors=[plt.cm.nipy_spectral((l+3) / float(N_REGIONS+3))])
         plt.pause(0.5)
 plt.show()

From e9926c60cd4103af12c02437204e9a95733d4e2d Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 4 Oct 2021 02:57:15 -0400
Subject: [PATCH 44/90] Update plot_coin_segmentation.py

---
 examples/cluster/plot_coin_segmentation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 5ebc0144818d5..03bed5cdb91eb 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -98,7 +98,7 @@
     print(title)
     plt.title(title)
     for l in range(n_regions):
-    plt.contour(labels == l,
-                colors=[plt.cm.nipy_spectral((l+3) / float(N_REGIONS+3))])
-        plt.pause(0.5)
+      colors = [plt.cm.nipy_spectral((l + 3) / float(n_regions + 3))]
+      plt.contour(labels == l, colors=colors)
+      plt.pause(0.5)
 plt.show()

From 27efc117532fe5323f30cdbf6d85c01fd8a24567 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 4 Oct 2021 03:11:54 -0400
Subject: [PATCH 45/90] Update plot_coin_segmentation.py

---
 examples/cluster/plot_coin_segmentation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 03bed5cdb91eb..b5f8a9392f5fc 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -98,7 +98,7 @@
     print(title)
     plt.title(title)
     for l in range(n_regions):
-      colors = [plt.cm.nipy_spectral((l + 3) / float(n_regions + 3))]
-      plt.contour(labels == l, colors=colors)
-      plt.pause(0.5)
+        colors = [plt.cm.nipy_spectral((l + 3) / float(n_regions + 3))]
+        plt.contour(labels == l, colors=colors)
+        plt.pause(0.5)
 plt.show()

From 8e875f31f9c5942f1e9a557731252d48e5234f3a Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 4 Oct 2021 22:03:38 -0400
Subject: [PATCH 46/90] Update plot_coin_segmentation.py

typo fixed
---
 examples/cluster/plot_coin_segmentation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index b5f8a9392f5fc..4a322d093f437 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -70,7 +70,7 @@
 # be used (e.g., 'arpack', 'lobpcg', or 'amg').
 eigen_solver = 'arpack'
 
-# The actual number of regions in this example is 27: background and 26 coins
+# The number of regions
 n_regions = 26
 
 # %%

From 10077aad93c7a56c91095f4fa7b12a0798df6e89 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 11:56:36 -0400
Subject: [PATCH 47/90] Update sklearn/cluster/_spectral.py

nitpick: we can use f-strings since we dropped Python 3.6 support.

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/cluster/_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 2633616a85cce..438c02ed9278a 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -321,7 +321,7 @@ def spectral_clustering(
         raise ValueError(
             "The 'assign_labels' parameter should be "
             "'kmeans' or 'discretize', or 'cluster_qr', "
-            "but '%s' was given" % assign_labels
+            f"but {assign_labels!r} was given"
         )
     if isinstance(affinity, np.matrix):
         raise TypeError(

From b35f1ea2177670964d5403157ebd3086134112a9 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 13:18:14 -0400
Subject: [PATCH 48/90] Update plot_coin_segmentation.py

black
---
 examples/cluster/plot_coin_segmentation.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 8f51e75f798e6..7965ef0c33305 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -67,7 +67,7 @@
 # Apply spectral clustering (this step goes much faster if you have pyamg
 # installed and use eigen_solver = 'amg'). However, any valid solver can
 # be used (e.g., 'arpack', 'lobpcg', or 'amg').
-eigen_solver = 'arpack'
+eigen_solver = "arpack"
 
 # The number of regions
 n_regions = 26
@@ -79,18 +79,21 @@
 # It often helps the spectral clustering to compute a few extra eigenvectors
 n_regions_plus = 3
 
-for assign_labels in ('kmeans', 'discretize', 'cluster_qr'):
+for assign_labels in ("kmeans", "discretize", "cluster_qr"):
     t0 = time.time()
-    labels = spectral_clustering(graph,
-                                 n_clusters=(n_regions + n_regions_plus),
-                                 assign_labels=assign_labels, random_state=42,
-                                 eigen_solver=eigen_solver)
+    labels = spectral_clustering(
+        graph,
+        n_clusters=(n_regions + n_regions_plus),
+        assign_labels=assign_labels,
+        random_state=42,
+        eigen_solver=eigen_solver,
+    )
 
     t1 = time.time()
     labels = labels.reshape(rescaled_coins.shape)
     plt.figure(figsize=(5, 5))
     plt.imshow(rescaled_coins, cmap=plt.cm.gray)
-   
+
     plt.xticks(())
     plt.yticks(())
     title = "Spectral clustering: %s, %.2fs" % (assign_labels, (t1 - t0))

From 84066a688e9f68c3ddeda51ae10ee78556ff5d2d Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 14:27:02 -0400
Subject: [PATCH 49/90] Update doc/whats_new/v1.1.rst

minor edit

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 doc/whats_new/v1.1.rst | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index bed8e9797b3c7..d179015e7578e 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -49,10 +49,8 @@ Changelog
 ......................
 
 - |Enhancement| :func:`cluster._spectral` now includes the 'cluster_qr' method
-  that clusters samples in the embedding space just as 'kmeans' and 'discrete'.
-  :func:`cluster.plot_coin_segmentation` now compares all three alternatives.
-  Documentation :doc: `modules/clustering` and unit :test: `test_spectral.py`
-  have been updated to incorporate 'cluster_qr'.
+  that clusters samples in the embedding space as an alternative to the existing
+  'kmeans' and 'discrete' methods. See `spectral_clustering`_ for more details.
   :pr:`21148` by :user:`Andrew Knyazev <lobpcg>`
 
 :mod:`sklearn.ensemble`

From 7974f2829112ecf38766ca2a68f3465f0a9377d4 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 14:28:34 -0400
Subject: [PATCH 50/90] Update sklearn/cluster/tests/test_spectral.py

dtype edits

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/cluster/tests/test_spectral.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index f6243191f6441..a74d9d3fab5e2 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -197,11 +197,11 @@ def histogram(x, y, **kwargs):
         sp.fit(X)
 
 
-@pytest.mark.parametrize("_dtype", ["single", "double"])
-def test_cluster_qr(_dtype):
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_cluster_qr(dtype):
     # Test cluster_qr for fixed data different dtypes
     random_state = np.random.RandomState(seed=8)
-    data = random_state.randn(10, 5).astype(_dtype)
+    data = random_state.randn(10, 5).astype(dtype)
     labels = cluster_qr(data)
     assert np.array_equal(labels, np.array([2, 1, 3, 3, 2, 4, 1, 3, 4, 0]))
 

From 3e3d3ccd4073a6df947f922bb0d960345f902856 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 15:27:59 -0400
Subject: [PATCH 51/90] Update v1.1.rst

https://github.com/scikit-learn/scikit-learn/pull/21148/files#r723413166
---
 doc/whats_new/v1.1.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index d179015e7578e..8c6c63f35f114 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -48,7 +48,8 @@ Changelog
 :mod:`sklearn.cluster`
 ......................
 
-- |Enhancement| :func:`cluster._spectral` now includes the 'cluster_qr' method
+- |Enhancement| :class:`cluster.SpectralClustering` and :func:`cluster.spectral` 
+  now includes the new 'cluster_qr' method from :func:`cluster.cluster_qr`
   that clusters samples in the embedding space as an alternative to the existing
   'kmeans' and 'discrete' methods. See `spectral_clustering`_ for more details.
   :pr:`21148` by :user:`Andrew Knyazev <lobpcg>`

From 26dd8fe6cfee7c1d62f36d80dc17c16338ef45cd Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 15:45:26 -0400
Subject: [PATCH 52/90] Update plot_coin_segmentation.py

.. versionchanged:: 1.1
   Added new labeling method 'cluster_qr'. Etc
---
 examples/cluster/plot_coin_segmentation.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 7965ef0c33305..33db521abce0b 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -18,7 +18,12 @@
   space using a cluster_qr algorithm,
 * whereas 'discrete' will iteratively search for the closest partition
   space to the embedding space of spectral clustering.
-
+.. versionchanged:: 1.1
+   Added new labeling method 'cluster_qr'.
+   Changed the color scheme in plotting for better visibility.
+   Introduced a pause between plotting subsequent lables to visualize
+   individual labels when run manually.
+   Indtroduced an ablility to plot less lables than actually computed.
 """
 print(__doc__)
 

From 02d380450f17dac88c44b33bebfb4f25f0ffda01 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 15:47:44 -0400
Subject: [PATCH 53/90] Update plot_coin_segmentation.py

3 -> 4
---
 examples/cluster/plot_coin_segmentation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 33db521abce0b..9ed638beb0f5c 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -105,7 +105,7 @@
     print(title)
     plt.title(title)
     for l in range(n_regions):
-        colors = [plt.cm.nipy_spectral((l + 3) / float(n_regions + 3))]
+        colors = [plt.cm.nipy_spectral((l + 4) / float(n_regions + 4))]
         plt.contour(labels == l, colors=colors)
         plt.pause(0.5)
 plt.show()

From 98a4078c32eb31b5dab71025be55d7f4c5f27073 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 15:56:13 -0400
Subject: [PATCH 54/90] Update _spectral.py

added .. versionchanged::
---
 sklearn/cluster/_spectral.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 438c02ed9278a..0a2693028f7ea 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -40,6 +40,7 @@ def cluster_qr(vectors):
         Anil Damle, Victor Minden, Lexing Ying
         <https://doi.org/10.1093/imaiai/iay008>`_
 
+    .. versionadded:: 1.1
     """
 
     from scipy.linalg import qr, svd
@@ -274,6 +275,9 @@ def spectral_clustering(
         has no tuning parametersand runs no iterations, yet may outperform
         k-means and discretization in terms of both quality and speed.
 
+    .. versionchanged:: 1.1
+       Added new labeling method 'cluster_qr'.
+
     verbose : bool, default=False
         Verbosity mode.
 
@@ -309,6 +313,9 @@ def spectral_clustering(
            Anil Damle, Victor Minden, Lexing Ying
            <https://doi.org/10.1093/imaiai/iay008>`_
 
+    .. versionchanged:: 1.1
+       Added new reference for the new labeling method 'cluster_qr'.
+
     Notes
     -----
     The graph should contain only one connected component, elsewhere
@@ -458,6 +465,13 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
         popular choice, but it can be sensitive to initialization.
         Discretization is another approach which is less sensitive to random
         initialization [3]_.
+        The newest cluster_qr method [5]_ directly extract clusters from eigenvectors
+        in spectral clustering. In contrast to k-means and discretization, cluster_qr
+        has no tuning parametersand runs no iterations, yet may outperform
+        k-means and discretization in terms of both quality and speed.
+
+    .. versionchanged:: 1.1
+       Added new labeling method 'cluster_qr'.
 
     degree : float, default=3
         Degree of the polynomial kernel. Ignored by other kernels.
@@ -547,6 +561,13 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
            SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.
            <https://epubs.siam.org/doi/pdf/10.1137/S1064827500366124>`_
 
+    .. [5] `Simple, direct, and efficient multi-way spectral clustering, 2019
+           Anil Damle, Victor Minden, Lexing Ying
+           <https://doi.org/10.1093/imaiai/iay008>`_
+
+    .. versionchanged:: 1.1
+       Added new reference for the new labeling method 'cluster_qr'.
+
     Examples
     --------
     >>> from sklearn.cluster import SpectralClustering

From 8b5c52d50347a73807bde00d07cd98eb2375d9f4 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 16:19:50 -0400
Subject: [PATCH 55/90] Update v1.1.rst

Unknown reference to `spectral_clustering`_ ? removed
---
 doc/whats_new/v1.1.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 8c6c63f35f114..061013c8684f8 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -51,7 +51,7 @@ Changelog
 - |Enhancement| :class:`cluster.SpectralClustering` and :func:`cluster.spectral` 
   now includes the new 'cluster_qr' method from :func:`cluster.cluster_qr`
   that clusters samples in the embedding space as an alternative to the existing
-  'kmeans' and 'discrete' methods. See `spectral_clustering`_ for more details.
+  'kmeans' and 'discrete' methods. See `spectral_clustering` for more details.
   :pr:`21148` by :user:`Andrew Knyazev <lobpcg>`
 
 :mod:`sklearn.ensemble`

From e22e2c1782ac29c25f0ea8916ec289267767c9b6 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 16:57:05 -0400
Subject: [PATCH 56/90] Update _spectral.py

.. versionchanged:: 1.1' spacing
---
 sklearn/cluster/_spectral.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 0a2693028f7ea..46f946973d6cf 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -275,8 +275,8 @@ def spectral_clustering(
         has no tuning parametersand runs no iterations, yet may outperform
         k-means and discretization in terms of both quality and speed.
 
-    .. versionchanged:: 1.1
-       Added new labeling method 'cluster_qr'.
+        .. versionchanged:: 1.1
+           Added new labeling method 'cluster_qr'.
 
     verbose : bool, default=False
         Verbosity mode.
@@ -470,8 +470,8 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
         has no tuning parametersand runs no iterations, yet may outperform
         k-means and discretization in terms of both quality and speed.
 
-    .. versionchanged:: 1.1
-       Added new labeling method 'cluster_qr'.
+        .. versionchanged:: 1.1
+           Added new labeling method 'cluster_qr'.
 
     degree : float, default=3
         Degree of the polynomial kernel. Ignored by other kernels.

From 8a2838d00a70f4bd91a026a077ec338c7db2bcfe Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 17:47:32 -0400
Subject: [PATCH 57/90] Update _spectral.py

typos fixed
---
 sklearn/cluster/_spectral.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 46f946973d6cf..bc1e99549a756 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -270,9 +270,9 @@ def spectral_clustering(
         embedding.  k-means can be applied and is a popular choice. But it can
         also be sensitive to initialization. Discretization is another
         approach which is less sensitive to random initialization [3]_.
-        The newest cluster_qr method [5]_ directly extract clusters from eigenvectors
+        The newest cluster_qr method [5]_ directly extracts clusters from eigenvectors
         in spectral clustering. In contrast to k-means and discretization, cluster_qr
-        has no tuning parametersand runs no iterations, yet may outperform
+        has no tuning parameters and runs no iterations, yet may outperform
         k-means and discretization in terms of both quality and speed.
 
         .. versionchanged:: 1.1

From 81c749d68f9169a7917e6e6e9e4132a54cc6f095 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 17:49:07 -0400
Subject: [PATCH 58/90] Update plot_coin_segmentation.py

typos fixed
---
 examples/cluster/plot_coin_segmentation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 9ed638beb0f5c..a9b4073850215 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -21,9 +21,9 @@
 .. versionchanged:: 1.1
    Added new labeling method 'cluster_qr'.
    Changed the color scheme in plotting for better visibility.
-   Introduced a pause between plotting subsequent lables to visualize
+   Introduced a pause between plotting subsequent labels to visualize
    individual labels when run manually.
-   Indtroduced an ablility to plot less lables than actually computed.
+   Introduced an ablility to plot fewer labels than actually computed.
 """
 print(__doc__)
 

From fabcb7b9783e4eb33c2775b46f6e9932db92688a Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 17:52:19 -0400
Subject: [PATCH 59/90] Update _spectral.py

redundant line removed
---
 sklearn/cluster/_spectral.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index bc1e99549a756..c987e933ddbdb 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -47,7 +47,6 @@ def cluster_qr(vectors):
 
     k = vectors.shape[1]
     piv = qr(vectors.T, pivoting=True)[2]
-    piv = piv[0:k]
     ut, _, v = svd(vectors[piv[:k], :].T)
     vectors = abs(np.dot(vectors, np.dot(ut, v.conj())))
     return vectors.argmax(axis=1).T

From 3baa9daa78a8a7564df7f9620aded35d91a4ace8 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Thu, 7 Oct 2021 20:17:38 -0400
Subject: [PATCH 60/90] Update sklearn/cluster/_spectral.py

light edit

Co-authored-by: Victor Minden <victorminden@gmail.com>
---
 sklearn/cluster/_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index c987e933ddbdb..ce705ecfca964 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -466,7 +466,7 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
         initialization [3]_.
         The newest cluster_qr method [5]_ directly extract clusters from eigenvectors
         in spectral clustering. In contrast to k-means and discretization, cluster_qr
-        has no tuning parametersand runs no iterations, yet may outperform
+        has no tuning parameters and runs no iterations, yet may outperform
         k-means and discretization in terms of both quality and speed.
 
         .. versionchanged:: 1.1

From 234e026152fe29355589154dccc76ad2e95c8271 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Fri, 8 Oct 2021 00:45:20 -0400
Subject: [PATCH 61/90] Update plot_coin_segmentation.py

removed specific eigen_solver, see https://github.com/scikit-learn/scikit-learn/pull/21148#discussion_r723439157
and added a TODO comment to reflect https://github.com/scikit-learn/scikit-learn/pull/21148#discussion_r723425034
---
 examples/cluster/plot_coin_segmentation.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index a9b4073850215..2ef4e7b38afeb 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -72,7 +72,6 @@
 # Apply spectral clustering (this step goes much faster if you have pyamg
 # installed and use eigen_solver = 'amg'). However, any valid solver can
 # be used (e.g., 'arpack', 'lobpcg', or 'amg').
-eigen_solver = "arpack"
 
 # The number of regions
 n_regions = 26
@@ -91,7 +90,6 @@
         n_clusters=(n_regions + n_regions_plus),
         assign_labels=assign_labels,
         random_state=42,
-        eigen_solver=eigen_solver,
     )
 
     t1 = time.time()
@@ -109,3 +107,6 @@
         plt.contour(labels == l, colors=colors)
         plt.pause(0.5)
 plt.show()
+
+# TODO: After #21194 is merged and lobpcg is faster than amg, as expected, 
+# we should probably use eigen_solver = 'lopbcg' explicitly in this example.

From 2552663a1b11bdbb78e0556dcf074a2b60465716 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Fri, 8 Oct 2021 00:48:48 -0400
Subject: [PATCH 62/90] Update plot_coin_segmentation.py

trailing whitespace removed
---
 examples/cluster/plot_coin_segmentation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 2ef4e7b38afeb..e19b654c05bc1 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -108,5 +108,5 @@
         plt.pause(0.5)
 plt.show()
 
-# TODO: After #21194 is merged and lobpcg is faster than amg, as expected, 
+# TODO: After #21194 is merged and lobpcg is faster than amg, as expected,
 # we should probably use eigen_solver = 'lopbcg' explicitly in this example.

From d9f69eda1db10bd6ed645af0373a308016f3b547 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Fri, 8 Oct 2021 12:13:57 -0400
Subject: [PATCH 63/90] Update plot_coin_segmentation.py

Introduced explicit eigen_tol=1e-7
Improved and extended the comments
Commented out plt.pause(0.5) to speed up the auto test
Manually tuned n_regions,  n_regions_plus, and eigen_tol to speed up the test and still get good visuals.
---
 examples/cluster/plot_coin_segmentation.py | 27 ++++++++++++++--------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index e19b654c05bc1..f19e0945bf5a8 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -69,25 +69,31 @@
 eps = 1e-6
 graph.data = np.exp(-beta * graph.data / graph.data.std()) + eps
 
-# Apply spectral clustering (this step goes much faster if you have pyamg
-# installed and use eigen_solver = 'amg'). However, any valid solver can
-# be used (e.g., 'arpack', 'lobpcg', or 'amg').
-
-# The number of regions
+# The number of segmented regions to display needs to be chosen manually.
+# The current version of 'spectral_clustering' does not support determining
+# the number of good quality clusters automatically.
 n_regions = 26
 
 # %%
 # Compute and visualize the resulting regions
 
-# Any eigen_solver: 'arpack', 'lobpcg', 'amg' can be used. AMG is usually best
-# It often helps the spectral clustering to compute a few extra eigenvectors
+# Computing a few extra eigenvectors may speed up the eigen_solver.
+# The spectral clustering quality may also benetif from requesting
+# extra regions for segmentation.
 n_regions_plus = 3
 
+# Apply spectral clustering using the default eigen_solver='arpack'.  
+# Any implemented solver can be used: eigen_solver='arpack', 'lobpcg', or 'amg'.
+# Choosing eigen_solver='amg' requires an extra package called 'pyamg'.
+# The quality of segmentation and the speed of calculations is mostly determined
+# by the choice of the solver and the value of the tolerance 'eigen_tol'.
+# TODO: varying eigen_tol seems to have no effect for 'lobpcg' and 'amg' #21243.
 for assign_labels in ("kmeans", "discretize", "cluster_qr"):
     t0 = time.time()
     labels = spectral_clustering(
         graph,
         n_clusters=(n_regions + n_regions_plus),
+        eigen_tol=1e-7,
         assign_labels=assign_labels,
         random_state=42,
     )
@@ -105,8 +111,9 @@
     for l in range(n_regions):
         colors = [plt.cm.nipy_spectral((l + 4) / float(n_regions + 4))]
         plt.contour(labels == l, colors=colors)
-        plt.pause(0.5)
+        # To view individual segments as appear comment in plt.pause(0.5)
 plt.show()
 
-# TODO: After #21194 is merged and lobpcg is faster than amg, as expected,
-# we should probably use eigen_solver = 'lopbcg' explicitly in this example.
+# TODO: After #21194 is merged and #21243 is fixed, check which eigen_solver
+# is the best and set eigen_solver='arpack', 'lobpcg', or 'amg' and eigen_tol
+# explicitly in this example.

From 7435c18916dc64dacdeb986b9522b60adda4761e Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Fri, 8 Oct 2021 12:22:31 -0400
Subject: [PATCH 64/90] Update plot_coin_segmentation.py

black
---
 examples/cluster/plot_coin_segmentation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index f19e0945bf5a8..01ebab5df4aa2 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -82,7 +82,7 @@
 # extra regions for segmentation.
 n_regions_plus = 3
 
-# Apply spectral clustering using the default eigen_solver='arpack'.  
+# Apply spectral clustering using the default eigen_solver='arpack'.
 # Any implemented solver can be used: eigen_solver='arpack', 'lobpcg', or 'amg'.
 # Choosing eigen_solver='amg' requires an extra package called 'pyamg'.
 # The quality of segmentation and the speed of calculations is mostly determined

From 05eaebd1237f51042b4c30cc69c0be3a016b89b6 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 12 Oct 2021 18:45:49 -0400
Subject: [PATCH 65/90] Apply suggestions from code review

minor editing

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 examples/cluster/plot_coin_segmentation.py |  6 ------
 sklearn/cluster/_spectral.py               | 11 ++---------
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index 01ebab5df4aa2..f31b4a422771f 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -18,12 +18,6 @@
   space using a cluster_qr algorithm,
 * whereas 'discrete' will iteratively search for the closest partition
   space to the embedding space of spectral clustering.
-.. versionchanged:: 1.1
-   Added new labeling method 'cluster_qr'.
-   Changed the color scheme in plotting for better visibility.
-   Introduced a pause between plotting subsequent labels to visualize
-   individual labels when run manually.
-   Introduced an ablility to plot fewer labels than actually computed.
 """
 print(__doc__)
 
diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index ce705ecfca964..0734c4bf36dd4 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -19,8 +19,7 @@
 
 
 def cluster_qr(vectors):
-    """Search for a partition matrix (clustering) which is
-    closest to the eigenvector embedding.
+    """Find the discrete partition closest to the eigenvector embedding.
 
     This implementation was proposed in [1]_.
 
@@ -49,7 +48,7 @@ def cluster_qr(vectors):
     piv = qr(vectors.T, pivoting=True)[2]
     ut, _, v = svd(vectors[piv[:k], :].T)
     vectors = abs(np.dot(vectors, np.dot(ut, v.conj())))
-    return vectors.argmax(axis=1).T
+    return vectors.argmax(axis=1)
 
 
 def discretize(
@@ -312,9 +311,6 @@ def spectral_clustering(
            Anil Damle, Victor Minden, Lexing Ying
            <https://doi.org/10.1093/imaiai/iay008>`_
 
-    .. versionchanged:: 1.1
-       Added new reference for the new labeling method 'cluster_qr'.
-
     Notes
     -----
     The graph should contain only one connected component, elsewhere
@@ -564,9 +560,6 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
            Anil Damle, Victor Minden, Lexing Ying
            <https://doi.org/10.1093/imaiai/iay008>`_
 
-    .. versionchanged:: 1.1
-       Added new reference for the new labeling method 'cluster_qr'.
-
     Examples
     --------
     >>> from sklearn.cluster import SpectralClustering

From 7c9b353a17c8e9dbb8001324a264e93c814bbcd7 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 12 Oct 2021 19:11:54 -0400
Subject: [PATCH 66/90] Update doc/modules/clustering.rst

minor

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 doc/modules/clustering.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 84c3b31f4b2d9..5ffceb2b0b671 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -511,8 +511,9 @@ In particular, unless you control the ``random_state``, it may not be
 reproducible from run-to-run, as it depends on random initialization.
 The alternative ``"discretize"`` strategy is 100% reproducible, but tends
 to create parcels of fairly even and geometrical shape.
-The recently added option ``clusterQR`` is also 100% reproducible and tends
-to create the visually best partitioning.
+The recently added ``"cluster_qr"`` option is a deterministic alternative that
+tends to create the visually best partitioning on the example application
+below.
 
 ================================  ================================  ================================
  ``assign_labels="kmeans"``        ``assign_labels="discretize"``    ``assign_labels="cluster_qr"``

From b635d649a4759049b8cde18cb99733c82ea68a8f Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 12 Oct 2021 20:54:15 -0400
Subject: [PATCH 67/90] Update test_spectral.py

added test_cluster_qr_permutation_invariance as suggested
---
 sklearn/cluster/tests/test_spectral.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index a74d9d3fab5e2..320f39387d033 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -199,13 +199,25 @@ def histogram(x, y, **kwargs):
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 def test_cluster_qr(dtype):
-    # Test cluster_qr for fixed data different dtypes
+    # Test cluster_qr for fixed data different dtypes return the same lables
     random_state = np.random.RandomState(seed=8)
     data = random_state.randn(10, 5).astype(dtype)
     labels = cluster_qr(data)
     assert np.array_equal(labels, np.array([2, 1, 3, 3, 2, 4, 1, 3, 4, 0]))
 
 
+def test_cluster_qr_permutation_invariance():
+    # Test that cluster_qr is invariant to sample permutation
+    random_state = np.random.RandomState(seed=8)
+    n_samples, n_components = 100, 5
+    data = random_state.randn(n_samples, n_components)
+    perm = random_state.permutation(n_samples)
+    assert assert np.array_equal(
+        cluster_qr(data)[perm],
+        cluster_qr(data[perm]),
+    )
+
+
 @pytest.mark.parametrize("n_samples", [50, 100, 150, 500])
 def test_discretize(n_samples):
     # Test the discretize using a noise assignment matrix

From 370227438246744ae4f2940c99c15b6b8f1e187e Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 12 Oct 2021 20:57:47 -0400
Subject: [PATCH 68/90] Update test_spectral.py

typo fixed
---
 sklearn/cluster/tests/test_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 320f39387d033..3925ac7c25684 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -212,7 +212,7 @@ def test_cluster_qr_permutation_invariance():
     n_samples, n_components = 100, 5
     data = random_state.randn(n_samples, n_components)
     perm = random_state.permutation(n_samples)
-    assert assert np.array_equal(
+    assert np.array_equal(
         cluster_qr(data)[perm],
         cluster_qr(data[perm]),
     )

From af56485d2e02253f28a9a228cefc3d7a3097bb7d Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 12 Oct 2021 21:14:42 -0400
Subject: [PATCH 69/90] Update plot_coin_segmentation.py

edited the DocString to address a suggestion
---
 examples/cluster/plot_coin_segmentation.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index f31b4a422771f..0c99a30c96e77 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -12,12 +12,12 @@
 
 There are three options to assign labels:
 
-* with 'kmeans' spectral clustering will cluster samples in the embedding space
+* 'kmeans' spectral clustering clusters samples in the embedding space
   using a kmeans algorithm
-* with 'cluster_qr' spectral clustering will cluster samples in the embedding
-  space using a cluster_qr algorithm,
-* whereas 'discrete' will iteratively search for the closest partition
+* 'discrete' iteratively searchs for the closest partition
   space to the embedding space of spectral clustering.
+* 'cluster_qr' assigns lables using the QR factorization with pivoting
+  that directly determines the partiion in the embedding space.
 """
 print(__doc__)
 

From 7e361fb8135995d22ed2d944d846449d3260f89f Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Wed, 13 Oct 2021 09:22:03 -0400
Subject: [PATCH 70/90] Update _spectral.py

move all imports to the top
---
 sklearn/cluster/_spectral.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 0734c4bf36dd4..702dd635c9092 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -9,6 +9,13 @@
 
 import numpy as np
 
+# Required in cluster_qr
+from scipy.linalg import qr, svd
+
+# Required in discretize
+from scipy.sparse import csc_matrix
+from scipy.linalg import LinAlgError
+
 from ..base import BaseEstimator, ClusterMixin
 from ..utils import check_random_state, as_float_array
 from ..utils.deprecation import deprecated
@@ -42,8 +49,6 @@ def cluster_qr(vectors):
     .. versionadded:: 1.1
     """
 
-    from scipy.linalg import qr, svd
-
     k = vectors.shape[1]
     piv = qr(vectors.T, pivoting=True)[2]
     ut, _, v = svd(vectors[piv[:k], :].T)
@@ -106,9 +111,6 @@ def discretize(
 
     """
 
-    from scipy.sparse import csc_matrix
-    from scipy.linalg import LinAlgError
-
     random_state = check_random_state(random_state)
 
     vectors = as_float_array(vectors, copy=copy)

From 5f62fd46af88ded2a67447e28422984d30d9fda1 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Wed, 13 Oct 2021 10:05:39 -0400
Subject: [PATCH 71/90] Update _spectral.py

Unrelated to this PR, but while at it, improved comments on eigensolvers
---
 sklearn/cluster/_spectral.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 702dd635c9092..849d6d1985478 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -235,10 +235,11 @@ def spectral_clustering(
         Number of eigenvectors to use for the spectral embedding
 
     eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
-        The eigenvalue decomposition strategy to use. AMG requires pyamg
-        to be installed. It can be faster on very large, sparse problems,
-        but may also lead to instabilities. If None, then ``'arpack'`` is
-        used. See [4]_ for more details regarding `'lobpcg'`.
+        The eigenvalue decomposition method. If None then ``'arpack'`` is used.
+        See [4]_ for more details regarding ``'lobpcg'``.
+        Eigensolver ``'amg'`` runs ``'lobpcg'`` with optional
+        Algebraic MultiGrid preconditioning and requires pyamg to be installed.
+        It can be faster on very large sparse problems, but may be instabile.
 
     random_state : int, RandomState instance, default=None
         A pseudo random number generator used for the initialization

From 995e2f4b03d6bb79d809b0509e34443a166b587d Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Wed, 13 Oct 2021 10:23:10 -0400
Subject: [PATCH 72/90] Apply suggestions from code review

editing

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/cluster/_spectral.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 849d6d1985478..556f4555c7061 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -30,6 +30,8 @@ def cluster_qr(vectors):
 
     This implementation was proposed in [1]_.
 
+.. versionadded:: 1.1
+
     Parameters
     ----------
     vectors : array-like, shape: (n_samples, n_clusters)
@@ -46,7 +48,6 @@ def cluster_qr(vectors):
         Anil Damle, Victor Minden, Lexing Ying
         <https://doi.org/10.1093/imaiai/iay008>`_
 
-    .. versionadded:: 1.1
     """
 
     k = vectors.shape[1]
@@ -239,7 +240,7 @@ def spectral_clustering(
         See [4]_ for more details regarding ``'lobpcg'``.
         Eigensolver ``'amg'`` runs ``'lobpcg'`` with optional
         Algebraic MultiGrid preconditioning and requires pyamg to be installed.
-        It can be faster on very large sparse problems, but may be instabile.
+        It can be faster on very large sparse problems, but may be unstable.
 
     random_state : int, RandomState instance, default=None
         A pseudo random number generator used for the initialization

From df3c1aa71e3a7e394546922b6fcf5d463760ad7a Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Wed, 13 Oct 2021 10:51:34 -0400
Subject: [PATCH 73/90] Update _spectral.py

black
---
 sklearn/cluster/_spectral.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 556f4555c7061..9a62ed9c639ef 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -28,25 +28,25 @@
 def cluster_qr(vectors):
     """Find the discrete partition closest to the eigenvector embedding.
 
-    This implementation was proposed in [1]_.
+        This implementation was proposed in [1]_.
 
-.. versionadded:: 1.1
+    .. versionadded:: 1.1
 
-    Parameters
-    ----------
-    vectors : array-like, shape: (n_samples, n_clusters)
-        The embedding space of the samples.
+        Parameters
+        ----------
+        vectors : array-like, shape: (n_samples, n_clusters)
+            The embedding space of the samples.
 
-    Returns
-    -------
-    labels : array of integers, shape: n_samples
-        The labels of the clusters.
+        Returns
+        -------
+        labels : array of integers, shape: n_samples
+            The labels of the clusters.
 
-    References
-    ----------
-    .. [1] `Simple, direct, and efficient multi-way spectral clustering, 2019
-        Anil Damle, Victor Minden, Lexing Ying
-        <https://doi.org/10.1093/imaiai/iay008>`_
+        References
+        ----------
+        .. [1] `Simple, direct, and efficient multi-way spectral clustering, 2019
+            Anil Damle, Victor Minden, Lexing Ying
+            <https://doi.org/10.1093/imaiai/iay008>`_
 
     """
 

From ceb88cc6ad9dead73991415b26af89495168969b Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Fri, 15 Oct 2021 10:09:06 -0400
Subject: [PATCH 74/90] Update v1.1.rst

typo fixed
---
 doc/whats_new/v1.1.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index a4b5fbf553a55..1266bb8d9144b 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -49,7 +49,7 @@ Changelog
 ......................
 
 - |Enhancement| :class:`cluster.SpectralClustering` and :func:`cluster.spectral` 
-  now includes the new 'cluster_qr' method from :func:`cluster.cluster_qr`
+  now include the new 'cluster_qr' method from :func:`cluster.cluster_qr`
   that clusters samples in the embedding space as an alternative to the existing
   'kmeans' and 'discrete' methods. See `spectral_clustering` for more details.
   :pr:`21148` by :user:`Andrew Knyazev <lobpcg>`

From 1168562b28db18e70aa9c7c2e981bb7f759947b0 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 25 Oct 2021 21:26:43 -0400
Subject: [PATCH 75/90] Update examples/cluster/plot_coin_segmentation.py

typos fixed

Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
---
 examples/cluster/plot_coin_segmentation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index ad1dfe554f928..e599c986646e0 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -14,10 +14,10 @@
 
 * 'kmeans' spectral clustering clusters samples in the embedding space
   using a kmeans algorithm
-* 'discrete' iteratively searchs for the closest partition
+* 'discrete' iteratively searches for the closest partition
   space to the embedding space of spectral clustering.
-* 'cluster_qr' assigns lables using the QR factorization with pivoting
-  that directly determines the partiion in the embedding space.
+* 'cluster_qr' assigns labels using the QR factorization with pivoting
+  that directly determines the partition in the embedding space.
 """
 
 # Author: Gael Varoquaux <gael.varoquaux@normalesup.org>, Brian Cheung

From 299fb0da9da5d6022529df23ed1ec1fe2214dab7 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 25 Oct 2021 21:27:22 -0400
Subject: [PATCH 76/90] Update doc/whats_new/v1.1.rst

formatting

Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
---
 doc/whats_new/v1.1.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index ce6b15b545a94..519d73e58a7a0 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -49,9 +49,10 @@ Changelog
 ......................
 
 - |Enhancement| :class:`cluster.SpectralClustering` and :func:`cluster.spectral` 
-  now include the new 'cluster_qr' method from :func:`cluster.cluster_qr`
+  now include the new `'cluster_qr'` method from :func:`cluster.cluster_qr`
   that clusters samples in the embedding space as an alternative to the existing
-  'kmeans' and 'discrete' methods. See `spectral_clustering` for more details.
+  `'kmeans'` and `'discrete'` methods.
+  See :func:`cluster.spectral_clustering` for more details.
   :pr:`21148` by :user:`Andrew Knyazev <lobpcg>`
 
 :mod:`sklearn.cross_decomposition`

From c99a58e9981090e98ebf637ae28b11c62b37f5f5 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 25 Oct 2021 21:28:28 -0400
Subject: [PATCH 77/90] Update sklearn/cluster/_spectral.py

minor

Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
---
 sklearn/cluster/_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 9a62ed9c639ef..58c2bb32c466f 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -272,7 +272,7 @@ def spectral_clustering(
         embedding.  k-means can be applied and is a popular choice. But it can
         also be sensitive to initialization. Discretization is another
         approach which is less sensitive to random initialization [3]_.
-        The newest cluster_qr method [5]_ directly extracts clusters from eigenvectors
+        The cluster_qr method [5]_ directly extracts clusters from eigenvectors
         in spectral clustering. In contrast to k-means and discretization, cluster_qr
         has no tuning parameters and runs no iterations, yet may outperform
         k-means and discretization in terms of both quality and speed.

From 006bc964a2ec0b0734ff3045a72551acf07dc60c Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 25 Oct 2021 21:28:56 -0400
Subject: [PATCH 78/90] Update sklearn/cluster/_spectral.py

minor

Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
---
 sklearn/cluster/_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 58c2bb32c466f..4cf194d51b6a7 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -464,7 +464,7 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
         popular choice, but it can be sensitive to initialization.
         Discretization is another approach which is less sensitive to random
         initialization [3]_.
-        The newest cluster_qr method [5]_ directly extract clusters from eigenvectors
+        The cluster_qr method [5]_ directly extract clusters from eigenvectors
         in spectral clustering. In contrast to k-means and discretization, cluster_qr
         has no tuning parameters and runs no iterations, yet may outperform
         k-means and discretization in terms of both quality and speed.

From f367dbe4b254105765dcb55ab36afac1afdf1225 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 25 Oct 2021 21:29:50 -0400
Subject: [PATCH 79/90] Update sklearn/cluster/_spectral.py

remove comments

Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
---
 sklearn/cluster/_spectral.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 4cf194d51b6a7..93432293f3dc6 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -9,12 +9,8 @@
 
 import numpy as np
 
-# Required in cluster_qr
-from scipy.linalg import qr, svd
-
-# Required in discretize
+from scipy.linalg import LinAlgError, qr, svd
 from scipy.sparse import csc_matrix
-from scipy.linalg import LinAlgError
 
 from ..base import BaseEstimator, ClusterMixin
 from ..utils import check_random_state, as_float_array

From 1904c87392a64b9ed3ba975c55e14d434788be48 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 25 Oct 2021 21:38:56 -0400
Subject: [PATCH 80/90] Update plot_coin_segmentation.py

an author added as suggested
---
 examples/cluster/plot_coin_segmentation.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index e599c986646e0..cf916df3167c2 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -20,7 +20,9 @@
   that directly determines the partition in the embedding space.
 """
 
-# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>, Brian Cheung
+# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
+#         Brian Cheung
+#         Andrew Knyazev <Andrew.Knyazev@ucdenver.edu>
 # License: BSD 3 clause
 
 import time

From 17c4b108e33cc902720126a7806f50d4fa43d1b5 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 25 Oct 2021 21:51:39 -0400
Subject: [PATCH 81/90] Update _spectral.py

doi + author
---
 sklearn/cluster/_spectral.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 93432293f3dc6..88386595cd2e7 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-
 """Algorithms for spectral clustering"""
 
-# Author: Gael Varoquaux gael.varoquaux@normalesup.org
+# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
 #         Brian Cheung
 #         Wei LI <kuantkid@gmail.com>
+#         Andrew Knyazev <Andrew.Knyazev@ucdenver.edu>
 # License: BSD 3 clause
 import warnings
 
@@ -42,7 +43,7 @@ def cluster_qr(vectors):
         ----------
         .. [1] `Simple, direct, and efficient multi-way spectral clustering, 2019
             Anil Damle, Victor Minden, Lexing Ying
-            <https://doi.org/10.1093/imaiai/iay008>`_
+            <:doi:`10.1093/imaiai/iay008`>`_
 
     """
 
@@ -305,11 +306,11 @@ def spectral_clustering(
            Locally Optimal Block Preconditioned Conjugate Gradient Method, 2001.
            A. V. Knyazev
            SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.
-           <https://epubs.siam.org/doi/pdf/10.1137/S1064827500366124>`_
+           <:doi:`10.1137/S1064827500366124`>`_
 
     .. [5] `Simple, direct, and efficient multi-way spectral clustering, 2019
            Anil Damle, Victor Minden, Lexing Ying
-           <https://doi.org/10.1093/imaiai/iay008>`_
+           <:doi:`10.1093/imaiai/iay008`>`_
 
     Notes
     -----

From 30cf92ffbe08a7b0bb027742066709466e77d477 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 25 Oct 2021 22:01:21 -0400
Subject: [PATCH 82/90] Update clustering.rst

added a reference to cluster_qr
---
 doc/modules/clustering.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index cb21c21572c7b..89162dbb98254 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -522,6 +522,12 @@ below.
 |coin_kmeans|                          |coin_discretize|                  |coin_cluster_qr|
 ================================  ================================  ================================
 
+.. topic:: References:
+
+ * `"Simple, direct, and efficient multi-way spectral clustering"
+    <:doi:`10.1093/imaiai/iay008`>`_
+    Anil Damle, Victor Minden, Lexing Ying, 2019
+
 Spectral Clustering Graphs
 --------------------------
 

From 01367117146fc243aafef120d3490c2ac94034a3 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 25 Oct 2021 22:35:16 -0400
Subject: [PATCH 83/90] Update _spectral.py

lobpcg with amg references added as requested
---
 sklearn/cluster/_spectral.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 88386595cd2e7..cbe9f1d7f66dd 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -237,7 +237,7 @@ def spectral_clustering(
         See [4]_ for more details regarding ``'lobpcg'``.
         Eigensolver ``'amg'`` runs ``'lobpcg'`` with optional
         Algebraic MultiGrid preconditioning and requires pyamg to be installed.
-        It can be faster on very large sparse problems, but may be unstable.
+        It can be faster on very large sparse problems [6]_ and [7]_.
 
     random_state : int, RandomState instance, default=None
         A pseudo random number generator used for the initialization
@@ -303,7 +303,7 @@ def spectral_clustering(
            <https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf>`_
 
     .. [4] `Toward the Optimal Preconditioned Eigensolver:
-           Locally Optimal Block Preconditioned Conjugate Gradient Method, 2001.
+           Locally Optimal Block Preconditioned Conjugate Gradient Method, 2001
            A. V. Knyazev
            SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.
            <:doi:`10.1137/S1064827500366124`>`_
@@ -312,6 +312,16 @@ def spectral_clustering(
            Anil Damle, Victor Minden, Lexing Ying
            <:doi:`10.1093/imaiai/iay008`>`_
 
+    .. [6] `Multiscale Spectral Image Segmentation Multiscale preconditioning
+           for computing eigenvalues of graph Laplacians in image segmentation, 2006
+           Andrew Knyazev
+           <:doi:`10.13140/RG.2.2.35280.02565`>`_
+
+    .. [7] `Preconditioned spectral clustering for stochastic block partition
+           streaming graph challenge (Preliminary version at arXiv.)
+           David Zhuzhunashvili, Andrew Knyazev
+           <:doi:`10.1109/HPEC.2017.8091045`>`_
+
     Notes
     -----
     The graph should contain only one connected component, elsewhere

From 13b266bdda9080cda88d3c182b1ac1ba7d8130cb Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Mon, 25 Oct 2021 22:58:12 -0400
Subject: [PATCH 84/90] Update clustering.rst

' error fix
---
 doc/modules/clustering.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 89162dbb98254..442cc35a16e77 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -524,8 +524,7 @@ below.
 
 .. topic:: References:
 
- * `"Simple, direct, and efficient multi-way spectral clustering"
-    <:doi:`10.1093/imaiai/iay008`>`_
+ * :doi:`"Simple, direct, and efficient multi-way spectral clustering"<10.1093/imaiai/iay008>`
     Anil Damle, Victor Minden, Lexing Ying, 2019
 
 Spectral Clustering Graphs

From 64f5dc15154f991bdaf70505b68fd36531a5b174 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Tue, 26 Oct 2021 00:28:50 -0400
Subject: [PATCH 85/90] Update clustering.rst

added the ``"discretize"`` reference
---
 doc/modules/clustering.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 442cc35a16e77..ac4807e052f66 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -523,6 +523,10 @@ below.
 ================================  ================================  ================================
 
 .. topic:: References:
+       
+ * `"Multiclass spectral clustering"
+   <https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf>`_
+   Stella X. Yu, Jianbo Shi, 2003
 
  * :doi:`"Simple, direct, and efficient multi-way spectral clustering"<10.1093/imaiai/iay008>`
     Anil Damle, Victor Minden, Lexing Ying, 2019

From 545f8d965f54db5ab776bf311e9961baf21cd6e6 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Fri, 29 Oct 2021 13:01:40 -0400
Subject: [PATCH 86/90] Apply suggestions from code review

misc editing

Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
---
 sklearn/cluster/_spectral.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index cbe9f1d7f66dd..f96a11c177c8a 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -37,7 +37,7 @@ def cluster_qr(vectors):
         Returns
         -------
         labels : array of integers, shape: n_samples
-            The labels of the clusters.
+            The cluster labels of vectors.
 
         References
         ----------
@@ -48,7 +48,7 @@ def cluster_qr(vectors):
     """
 
     k = vectors.shape[1]
-    piv = qr(vectors.T, pivoting=True)[2]
+    _, _, piv = qr(vectors.T, pivoting=True)
     ut, _, v = svd(vectors[piv[:k], :].T)
     vectors = abs(np.dot(vectors, np.dot(ut, v.conj())))
     return vectors.argmax(axis=1)
@@ -271,7 +271,7 @@ def spectral_clustering(
         approach which is less sensitive to random initialization [3]_.
         The cluster_qr method [5]_ directly extracts clusters from eigenvectors
         in spectral clustering. In contrast to k-means and discretization, cluster_qr
-        has no tuning parameters and runs no iterations, yet may outperform
+        has no tuning parameters and is not an iterative method, yet may outperform
         k-means and discretization in terms of both quality and speed.
 
         .. versionchanged:: 1.1
@@ -569,7 +569,7 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
 
     .. [5] `Simple, direct, and efficient multi-way spectral clustering, 2019
            Anil Damle, Victor Minden, Lexing Ying
-           <https://doi.org/10.1093/imaiai/iay008>`_
+           <:doi:`10.1093/imaiai/iay008`>`_
 
     Examples
     --------

From 69717220a1dc2e0fbc6c24b665dd24bbd4dd4d73 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Fri, 29 Oct 2021 14:15:05 -0400
Subject: [PATCH 87/90] Apply suggestions from code review

previous missed suggestions finally committed

Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
---
 sklearn/cluster/tests/test_spectral.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 3925ac7c25684..1ef71ddfc3b48 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -197,17 +197,18 @@ def histogram(x, y, **kwargs):
         sp.fit(X)
 
 
-@pytest.mark.parametrize("dtype", [np.float32, np.float64])
-def test_cluster_qr(dtype):
-    # Test cluster_qr for fixed data different dtypes return the same lables
+def test_cluster_qr():
+    # cluster_qr for fixed data but different dtypes must return the same labels.
     random_state = np.random.RandomState(seed=8)
-    data = random_state.randn(10, 5).astype(dtype)
-    labels = cluster_qr(data)
-    assert np.array_equal(labels, np.array([2, 1, 3, 3, 2, 4, 1, 3, 4, 0]))
+    X_64 = random_state.randn(10, 5).astype(np.float64)
+    X_32 = random_state.randn(10, 5).astype(np.float32)
+    labels_64 = cluster_qr(X_64)
+    labels_32 = cluster_qr(X_32)
+    assert np.array_equal(labels_64, labels_32)
 
 
 def test_cluster_qr_permutation_invariance():
-    # Test that cluster_qr is invariant to sample permutation
+    # cluster_qr must be invariant to sample permutation.
     random_state = np.random.RandomState(seed=8)
     n_samples, n_components = 100, 5
     data = random_state.randn(n_samples, n_components)

From f59971123b5bf3f9905c7076219cc00a9ef0954e Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Fri, 29 Oct 2021 14:58:17 -0400
Subject: [PATCH 88/90] Update test_spectral.py

as proposed in https://github.com/scikit-learn/scikit-learn/pull/21148#discussion_r726948340
---
 sklearn/cluster/tests/test_spectral.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 1ef71ddfc3b48..c7d4d02a1daac 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -198,13 +198,22 @@ def histogram(x, y, **kwargs):
 
 
 def test_cluster_qr():
-    # cluster_qr for fixed data but different dtypes must return the same labels.
+    # cluster_qr by itself should not be used for clusteing generic data
+    # other than the rows of the eigenvectors within spectral clustering,
+    # but cluster_qr must still preserve the labels for different dtypes
+    # of the generic fixed input even if the labels may be meaningless.
     random_state = np.random.RandomState(seed=8)
-    X_64 = random_state.randn(10, 5).astype(np.float64)
-    X_32 = random_state.randn(10, 5).astype(np.float32)
-    labels_64 = cluster_qr(X_64)
-    labels_32 = cluster_qr(X_32)
-    assert np.array_equal(labels_64, labels_32)
+    n_samples, n_components = 10, 5
+    data = random_state.randn(n_samples, n_components)
+    labels_float64 = cluster_qr(data.astype(np.float64))
+    # Each sample is assigned a cluster identifier
+    assert labels_float64.shape == (n_samples,)
+    assert labels_float64.dtype == np.int64
+    # All components should be covered by the assignment
+    assert np.array_equal(np.unique(labels_float64), np.arange(n_components))
+    # Single precision data should yield the same cluster assignments
+    labels_float32 = cluster_qr(data.astype(np.float32))
+    assert np.array_equal(labels_float64, labels_float32)
 
 
 def test_cluster_qr_permutation_invariance():

From 4be590cc75823194b521979e9fabe1e07d4c8739 Mon Sep 17 00:00:00 2001
From: Andrew Knyazev <andrew.knyazev@ucdenver.edu>
Date: Fri, 29 Oct 2021 15:43:53 -0400
Subject: [PATCH 89/90] Update test_spectral.py

    assert labels_float64.dtype == np.int64 naturally failed on 32-bit OS so was removed
---
 sklearn/cluster/tests/test_spectral.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index c7d4d02a1daac..fecb5dfcd7014 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -208,7 +208,6 @@ def test_cluster_qr():
     labels_float64 = cluster_qr(data.astype(np.float64))
     # Each sample is assigned a cluster identifier
     assert labels_float64.shape == (n_samples,)
-    assert labels_float64.dtype == np.int64
     # All components should be covered by the assignment
     assert np.array_equal(np.unique(labels_float64), np.arange(n_components))
     # Single precision data should yield the same cluster assignments

From ff3547680f537d6665e43b1e6f1962b7c5f21f58 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 2 Nov 2021 11:31:48 +0100
Subject: [PATCH 90/90] Typo in comment

---
 sklearn/cluster/tests/test_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index fecb5dfcd7014..07dd4b64514ac 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -198,7 +198,7 @@ def histogram(x, y, **kwargs):
 
 
 def test_cluster_qr():
-    # cluster_qr by itself should not be used for clusteing generic data
+    # cluster_qr by itself should not be used for clustering generic data
     # other than the rows of the eigenvectors within spectral clustering,
     # but cluster_qr must still preserve the labels for different dtypes
     # of the generic fixed input even if the labels may be meaningless.