autopep8 and minor formatting

lobpcg · lobpcg · commit 2771657299fe · 2018-10-04T16:33:29.000-04:00
diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
@@ -15,13 +15,14 @@
 * with 'kmeans' spectral clustering will cluster samples in the embedding space
   using a kmeans algorithm
 * with 'clusterQR' will cluster samples in the embedding space
-  using a clusterQR algorithm, 
+  using a clusterQR algorithm,
 * whereas 'discrete' will iteratively search for the closest partition
   space to the embedding space.
 """
 print(__doc__)
 
 # Author: Gael Varoquaux <gael.varoquaux@normalesup.org>, Brian Cheung
+# Andrew Knyazev added clusterQR
 # License: BSD 3 clause
 
 import time
@@ -63,7 +64,7 @@
 # compute and visualize the resulting regions
 
 # if often helps the spectral clustering to compute a few extra eigenvectors
-N_REGIONS_PLUS = 3 
+N_REGIONS_PLUS = 3
 
 for assign_labels in ('kmeans', 'discretize', 'clusterQR'):
     t0 = time.time()
diff --git a/sklearn/cluster/spectral.py b/sklearn/cluster/spectral.py
@@ -4,14 +4,13 @@
 # Author: Gael Varoquaux gael.varoquaux@normalesup.org
 #         Brian Cheung
 #         Wei LI <kuantkid@gmail.com>
-# Modified by Andrew Knyazev to add clusterQR 
+# Modified by Andrew Knyazev to add clusterQR
 # License: BSD 3 clause
 import warnings
 
 import numpy as np
 
 from scipy.linalg import qr, svd
-from numpy import argmax
 
 from ..base import BaseEstimator, ClusterMixin
 from ..utils import check_random_state, as_float_array
@@ -21,24 +20,25 @@
 from ..manifold import spectral_embedding
 from .k_means_ import k_means
 
+
 def clusterQR(vectors):
-    """Search for a partition matrix (clustering) which is closest to the
-    eigenvector embedding.
+    """Search for a partition matrix (clustering) which is
+    closest to the eigenvector embedding.
 
     Parameters
     ----------
     vectors : array-like, shape: (n_samples, n_clusters)
         The embedding space of the samples.
-        
+
     Returns
     -------
     labels : array of integers, shape: n_samples
         The labels of the clusters.
 
     References
     ----------
-    https://github.com/asdamle/QR-spectral-clustering 
-    https://arxiv.org/abs/1708.07481    
+    https://github.com/asdamle/QR-spectral-clustering
+    https://arxiv.org/abs/1708.07481
 
     Notes
     -----
@@ -49,10 +49,12 @@ def clusterQR(vectors):
     k = vectors.shape[1]
     piv = qr(vectors.T.conj(), pivoting=True)[2]
     piv = piv[0:k]
-    Ut, Vt = svd(vectors[piv,:].T.conj())[0], svd(vectors[piv,:].T.conj())[2].T.conj()
-    vectors = abs(np.dot(vectors, np.dot(Ut,Vt.T.conj())))
+    Ut, Vt = svd(vectors[piv, :].T.conj())[0],
+    svd(vectors[piv, :].T.conj())[2].T.conj()
+    vectors = abs(np.dot(vectors, np.dot(Ut, Vt.T.conj())))
     return (vectors.argmax(axis=1)).T
 
+
 def discretize(vectors, copy=True, max_svd_restarts=30, n_iter_max=20,
                random_state=None):
     """Search for a partition matrix (clustering) which is closest to the
@@ -282,9 +284,10 @@ def spectral_clustering(affinity, n_clusters=8, n_components=None,
     normalized spectral clustering.
     """
     if assign_labels not in ('kmeans', 'discretize', 'clusterQR'):
-        raise ValueError("The 'assign_labels' parameter should be "
-                         "'kmeans', 'discretize', or  'clusterQR' but '%s' was given"
-                         % assign_labels)
+        raise ValueError(
+            "The 'assign_labels' parameter should be "
+            "'kmeans', 'discretize', or  'clusterQR' but '%s' was given" %
+            assign_labels)
 
     random_state = check_random_state(random_state)
     n_components = n_clusters if n_components is None else n_components
@@ -300,7 +303,7 @@ def spectral_clustering(affinity, n_clusters=8, n_components=None,
     if assign_labels == 'kmeans':
         _, labels, _ = k_means(maps, n_clusters, random_state=random_state,
                                n_init=n_init)
-    elif assign_labels == 'clusterQR': 
+    elif assign_labels == 'clusterQR':
         labels = clusterQR(maps)
     else:
         labels = discretize(maps, random_state=random_state)
diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
@@ -30,7 +30,11 @@
 
 
 @pytest.mark.parametrize('eigen_solver', ('arpack', 'lobpcg'))
-@pytest.mark.parametrize('assign_labels', ('kmeans', 'discretize', 'clusterQR'))
+@pytest.mark.parametrize(
+    'assign_labels',
+    ('kmeans',
+     'discretize',
+     'clusterQR'))
 def test_spectral_clustering(eigen_solver, assign_labels):
     S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
                   [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
@@ -109,7 +113,7 @@ def test_affinities():
     # on OSX and Linux
     X, y = make_blobs(n_samples=20, random_state=0,
                       centers=[[1, 1], [-1, -1]], cluster_std=0.01
-                     )
+                      )
     # nearest neighbors affinity
     sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors',
                             random_state=0)
diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
@@ -7,15 +7,15 @@
 #         Denis A. Engemann <denis-alexander.engemann@inria.fr>
 #         Michael Eickenberg <michael.eickenberg@inria.fr>
 #         Giorgio Patrini <giorgio.patrini@anu.edu.au>
-#
+#         Andrew Knyazev added lobpcg
 # License: BSD 3 clause
 
 from math import log, sqrt
 import numbers
 
 import numpy as np
 from scipy import linalg
-from scipy.special import gammaln
+# from scipy.special import gammaln
 from scipy.sparse import issparse
 from scipy.sparse.linalg import svds
 
@@ -356,7 +356,7 @@ def fit_transform(self, X, y=None):
         X_new : array-like, shape (n_samples, n_components)
 
         """
-        U, S, V = self._fit(X)
+        U, S, _ = self._fit(X)
         U = U[:, :self.n_components_]
 
         if self.whiten:
@@ -528,9 +528,9 @@ def _fit_truncated(self, X, n_components, svd_solver):
         elif svd_solver == 'lobpcg':
             # sign flipping is done inside
             U, S, V = lobpcg_svd(X, n_components=n_components,
-                                     n_iter=self.iterated_power,
-                                     flip_sign=True,
-                                     random_state=random_state)
+                                 n_iter=self.iterated_power,
+                                 flip_sign=True,
+                                 random_state=random_state)
 
         self.n_samples_, self.n_features_ = n_samples, n_features
         self.components_ = V
diff --git a/sklearn/decomposition/truncated_svd.py b/sklearn/decomposition/truncated_svd.py
@@ -96,9 +96,9 @@ class TruncatedSVD(BaseEstimator, TransformerMixin):
     0.249...
     >>> print(svd.singular_values_)  # doctest: +ELLIPSIS
     [2.5841... 2.5245... 2.3201... 2.1753... 2.0443...]
-    >>> # to test LOBPCG instead of randomized:
-    >>> svd = TruncatedSVD(algorithm='lobpcg', n_components=5, n_iter=7, random_state=42)
-    >>> svd.fit(X)
+    ## to test LOBPCG instead of randomized:
+    # svd = TruncatedSVD(algorithm='lobpcg', n_components=5)
+    # svd.fit(X)
 
     See also
     --------
@@ -118,6 +118,7 @@ class TruncatedSVD(BaseEstimator, TransformerMixin):
     class to data once, then keep the instance around to do transformations.
 
     """
+
     def __init__(self, n_components=2, algorithm="randomized", n_iter=5,
                  random_state=None, tol=0.):
         self.algorithm = algorithm
@@ -175,8 +176,8 @@ def fit_transform(self, X, y=None):
                 raise ValueError("n_components must be < n_features;"
                                  " got %d >= %d" % (k, n_features))
             U, Sigma, VT = lobpcg_svd(X, self.n_components,
-                                          n_iter=self.n_iter,
-                                          random_state=random_state)
+                                      n_iter=self.n_iter,
+                                      random_state=random_state)
         elif self.algorithm == "randomized":
             k = self.n_components
             n_features = X.shape[1]
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
@@ -9,6 +9,7 @@
 #          Stefan van der Walt
 #          Kyle Kastner
 #          Giorgio Patrini
+#          Andrew Knyazev added lobpcg
 # License: BSD 3 clause
 
 from __future__ import division
@@ -26,6 +27,7 @@
 from .validation import check_array
 from scipy.sparse.linalg import lobpcg
 
+
 @deprecated("sklearn.utils.extmath.norm was deprecated in version 0.19 "
             "and will be removed in 0.21. Use scipy.linalg.norm instead.")
 def norm(x):
@@ -241,7 +243,7 @@ def randomized_range_finder(A, size, n_iter,
 
     # Perform power iterations with Q to further 'imprint' the top
     # singular vectors of A in Q
-    for i in range(n_iter):
+    for _ in range(n_iter):
         if power_iteration_normalizer == 'none':
             Q = safe_sparse_dot(A, Q)
             Q = safe_sparse_dot(A.T, Q)
@@ -387,9 +389,10 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter='auto',
     else:
         return U[:, :n_components], s[:n_components], V[:n_components, :]
 
+
 def lobpcg_svd(M, n_components, n_oversamples=10, n_iter='auto',
-                   transpose='auto', lobpcg_tol=None,
-                   flip_sign=True, random_state=0):
+               transpose='auto', lobpcg_tol=None,
+               flip_sign=True, random_state=0):
     """Computes a truncated SVD using LOBPCG mimicking the randomized SVD setup
 
     Parameters
@@ -436,14 +439,14 @@ def lobpcg_svd(M, n_components, n_oversamples=10, n_iter='auto',
     Notes
     -----
     This algorithm finds a (usually very good) approximate truncated
-    singular value decomposition using LOBPCG with randomization to speed up 
+    singular value decomposition using LOBPCG with randomization to speed up
     the computations. It is particularly fast on large matrices on which
     you wish to extract only a small number of components. In order to
     obtain further speed up, `n_iter` can be set <=2 (at the cost of
-    loss of precision). Compared to 'ranomised', the 'lobpcg' option gives 
-    more accurate approximations, with the same n_iter, n_components, and 
-    n_oversamples, at the slightly increased costs, allows setting  
-    the tolerance, and can output the accuracy. 
+    loss of precision). Compared to 'ranomised', the 'lobpcg' option gives
+    more accurate approximations, with the same n_iter, n_components, and
+    n_oversamples, at the slightly increased costs, allows setting
+    the tolerance, and can output the accuracy.
 
     References
     ----------
@@ -475,16 +478,16 @@ def lobpcg_svd(M, n_components, n_oversamples=10, n_iter='auto',
     if transpose:
         # this implementation is a bit faster with smaller shape[1]
         M = M.T
-    
+
     Q = random_state.normal(size=(M.shape[0], n_random))
     if M.dtype.kind == 'f':
         # Ensure f32 is preserved as f32
         Q = Q.astype(M.dtype, copy=False)
 
     A = - safe_sparse_dot(M, M.T)
-    # LOBPCG default option largest=True is currently broken, so we go the 
+    # LOBPCG default option largest=True is currently broken, so we go the
     # smallest (negative) of the negative normal matrix A
-    lambdas, Q = lobpcg(A, Q, tol=lobpcg_tol, maxiter=n_iter, largest=False)
+    _, Q = lobpcg(A, Q, tol=lobpcg_tol, maxiter=n_iter, largest=False)
 
     # project M to the (k + p) dimensional space using the basis vectors
     # project M to the (k + p) dimensional space using the basis vectors
@@ -652,7 +655,7 @@ def cartesian(arrays, out=None):
     if out is None:
         out = np.empty_like(ix, dtype=dtype)
 
-    for n, arr in enumerate(arrays):
+    for n, _ in enumerate(arrays):
         out[:, n] = arrays[n][ix[:, n]]
 
     return out