Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2771657

Browse files
committed
autopep8 and minor formatting
1 parent 73a54d0 commit 2771657

File tree

6 files changed

+52
-40
lines changed

6 files changed

+52
-40
lines changed

examples/cluster/plot_coin_segmentation.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@
1515
* with 'kmeans' spectral clustering will cluster samples in the embedding space
1616
using a kmeans algorithm
1717
* with 'clusterQR' will cluster samples in the embedding space
18-
using a clusterQR algorithm,
18+
using a clusterQR algorithm,
1919
* whereas 'discrete' will iteratively search for the closest partition
2020
space to the embedding space.
2121
"""
2222
print(__doc__)
2323

2424
# Author: Gael Varoquaux <[email protected]>, Brian Cheung
25+
# Andrew Knyazev added clusterQR
2526
# License: BSD 3 clause
2627

2728
import time
@@ -63,7 +64,7 @@
6364
# compute and visualize the resulting regions
6465

6566
# if often helps the spectral clustering to compute a few extra eigenvectors
66-
N_REGIONS_PLUS = 3
67+
N_REGIONS_PLUS = 3
6768

6869
for assign_labels in ('kmeans', 'discretize', 'clusterQR'):
6970
t0 = time.time()

sklearn/cluster/spectral.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,13 @@
44
# Author: Gael Varoquaux [email protected]
55
# Brian Cheung
66
7-
# Modified by Andrew Knyazev to add clusterQR
7+
# Modified by Andrew Knyazev to add clusterQR
88
# License: BSD 3 clause
99
import warnings
1010

1111
import numpy as np
1212

1313
from scipy.linalg import qr, svd
14-
from numpy import argmax
1514

1615
from ..base import BaseEstimator, ClusterMixin
1716
from ..utils import check_random_state, as_float_array
@@ -21,24 +20,25 @@
2120
from ..manifold import spectral_embedding
2221
from .k_means_ import k_means
2322

23+
2424
def clusterQR(vectors):
25-
"""Search for a partition matrix (clustering) which is closest to the
26-
eigenvector embedding.
25+
"""Search for a partition matrix (clustering) which is
26+
closest to the eigenvector embedding.
2727
2828
Parameters
2929
----------
3030
vectors : array-like, shape: (n_samples, n_clusters)
3131
The embedding space of the samples.
32-
32+
3333
Returns
3434
-------
3535
labels : array of integers, shape: n_samples
3636
The labels of the clusters.
3737
3838
References
3939
----------
40-
https://github.com/asdamle/QR-spectral-clustering
41-
https://arxiv.org/abs/1708.07481
40+
https://github.com/asdamle/QR-spectral-clustering
41+
https://arxiv.org/abs/1708.07481
4242
4343
Notes
4444
-----
@@ -49,10 +49,12 @@ def clusterQR(vectors):
4949
k = vectors.shape[1]
5050
piv = qr(vectors.T.conj(), pivoting=True)[2]
5151
piv = piv[0:k]
52-
Ut, Vt = svd(vectors[piv,:].T.conj())[0], svd(vectors[piv,:].T.conj())[2].T.conj()
53-
vectors = abs(np.dot(vectors, np.dot(Ut,Vt.T.conj())))
52+
Ut, Vt = svd(vectors[piv, :].T.conj())[0],
53+
svd(vectors[piv, :].T.conj())[2].T.conj()
54+
vectors = abs(np.dot(vectors, np.dot(Ut, Vt.T.conj())))
5455
return (vectors.argmax(axis=1)).T
5556

57+
5658
def discretize(vectors, copy=True, max_svd_restarts=30, n_iter_max=20,
5759
random_state=None):
5860
"""Search for a partition matrix (clustering) which is closest to the
@@ -282,9 +284,10 @@ def spectral_clustering(affinity, n_clusters=8, n_components=None,
282284
normalized spectral clustering.
283285
"""
284286
if assign_labels not in ('kmeans', 'discretize', 'clusterQR'):
285-
raise ValueError("The 'assign_labels' parameter should be "
286-
"'kmeans', 'discretize', or 'clusterQR' but '%s' was given"
287-
% assign_labels)
287+
raise ValueError(
288+
"The 'assign_labels' parameter should be "
289+
"'kmeans', 'discretize', or 'clusterQR' but '%s' was given" %
290+
assign_labels)
288291

289292
random_state = check_random_state(random_state)
290293
n_components = n_clusters if n_components is None else n_components
@@ -300,7 +303,7 @@ def spectral_clustering(affinity, n_clusters=8, n_components=None,
300303
if assign_labels == 'kmeans':
301304
_, labels, _ = k_means(maps, n_clusters, random_state=random_state,
302305
n_init=n_init)
303-
elif assign_labels == 'clusterQR':
306+
elif assign_labels == 'clusterQR':
304307
labels = clusterQR(maps)
305308
else:
306309
labels = discretize(maps, random_state=random_state)

sklearn/cluster/tests/test_spectral.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@
3030

3131

3232
@pytest.mark.parametrize('eigen_solver', ('arpack', 'lobpcg'))
33-
@pytest.mark.parametrize('assign_labels', ('kmeans', 'discretize', 'clusterQR'))
33+
@pytest.mark.parametrize(
34+
'assign_labels',
35+
('kmeans',
36+
'discretize',
37+
'clusterQR'))
3438
def test_spectral_clustering(eigen_solver, assign_labels):
3539
S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
3640
[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
@@ -109,7 +113,7 @@ def test_affinities():
109113
# on OSX and Linux
110114
X, y = make_blobs(n_samples=20, random_state=0,
111115
centers=[[1, 1], [-1, -1]], cluster_std=0.01
112-
)
116+
)
113117
# nearest neighbors affinity
114118
sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors',
115119
random_state=0)

sklearn/decomposition/pca.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,15 @@
77
# Denis A. Engemann <[email protected]>
88
# Michael Eickenberg <[email protected]>
99
# Giorgio Patrini <[email protected]>
10-
#
10+
# Andrew Knyazev added lobpcg
1111
# License: BSD 3 clause
1212

1313
from math import log, sqrt
1414
import numbers
1515

1616
import numpy as np
1717
from scipy import linalg
18-
from scipy.special import gammaln
18+
# from scipy.special import gammaln
1919
from scipy.sparse import issparse
2020
from scipy.sparse.linalg import svds
2121

@@ -356,7 +356,7 @@ def fit_transform(self, X, y=None):
356356
X_new : array-like, shape (n_samples, n_components)
357357
358358
"""
359-
U, S, V = self._fit(X)
359+
U, S, _ = self._fit(X)
360360
U = U[:, :self.n_components_]
361361

362362
if self.whiten:
@@ -528,9 +528,9 @@ def _fit_truncated(self, X, n_components, svd_solver):
528528
elif svd_solver == 'lobpcg':
529529
# sign flipping is done inside
530530
U, S, V = lobpcg_svd(X, n_components=n_components,
531-
n_iter=self.iterated_power,
532-
flip_sign=True,
533-
random_state=random_state)
531+
n_iter=self.iterated_power,
532+
flip_sign=True,
533+
random_state=random_state)
534534

535535
self.n_samples_, self.n_features_ = n_samples, n_features
536536
self.components_ = V

sklearn/decomposition/truncated_svd.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,9 @@ class TruncatedSVD(BaseEstimator, TransformerMixin):
9696
0.249...
9797
>>> print(svd.singular_values_) # doctest: +ELLIPSIS
9898
[2.5841... 2.5245... 2.3201... 2.1753... 2.0443...]
99-
>>> # to test LOBPCG instead of randomized:
100-
>>> svd = TruncatedSVD(algorithm='lobpcg', n_components=5, n_iter=7, random_state=42)
101-
>>> svd.fit(X)
99+
## to test LOBPCG instead of randomized:
100+
# svd = TruncatedSVD(algorithm='lobpcg', n_components=5)
101+
# svd.fit(X)
102102
103103
See also
104104
--------
@@ -118,6 +118,7 @@ class TruncatedSVD(BaseEstimator, TransformerMixin):
118118
class to data once, then keep the instance around to do transformations.
119119
120120
"""
121+
121122
def __init__(self, n_components=2, algorithm="randomized", n_iter=5,
122123
random_state=None, tol=0.):
123124
self.algorithm = algorithm
@@ -175,8 +176,8 @@ def fit_transform(self, X, y=None):
175176
raise ValueError("n_components must be < n_features;"
176177
" got %d >= %d" % (k, n_features))
177178
U, Sigma, VT = lobpcg_svd(X, self.n_components,
178-
n_iter=self.n_iter,
179-
random_state=random_state)
179+
n_iter=self.n_iter,
180+
random_state=random_state)
180181
elif self.algorithm == "randomized":
181182
k = self.n_components
182183
n_features = X.shape[1]

sklearn/utils/extmath.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# Stefan van der Walt
1010
# Kyle Kastner
1111
# Giorgio Patrini
12+
# Andrew Knyazev added lobpcg
1213
# License: BSD 3 clause
1314

1415
from __future__ import division
@@ -26,6 +27,7 @@
2627
from .validation import check_array
2728
from scipy.sparse.linalg import lobpcg
2829

30+
2931
@deprecated("sklearn.utils.extmath.norm was deprecated in version 0.19 "
3032
"and will be removed in 0.21. Use scipy.linalg.norm instead.")
3133
def norm(x):
@@ -241,7 +243,7 @@ def randomized_range_finder(A, size, n_iter,
241243

242244
# Perform power iterations with Q to further 'imprint' the top
243245
# singular vectors of A in Q
244-
for i in range(n_iter):
246+
for _ in range(n_iter):
245247
if power_iteration_normalizer == 'none':
246248
Q = safe_sparse_dot(A, Q)
247249
Q = safe_sparse_dot(A.T, Q)
@@ -387,9 +389,10 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter='auto',
387389
else:
388390
return U[:, :n_components], s[:n_components], V[:n_components, :]
389391

392+
390393
def lobpcg_svd(M, n_components, n_oversamples=10, n_iter='auto',
391-
transpose='auto', lobpcg_tol=None,
392-
flip_sign=True, random_state=0):
394+
transpose='auto', lobpcg_tol=None,
395+
flip_sign=True, random_state=0):
393396
"""Computes a truncated SVD using LOBPCG mimicking the randomized SVD setup
394397
395398
Parameters
@@ -436,14 +439,14 @@ def lobpcg_svd(M, n_components, n_oversamples=10, n_iter='auto',
436439
Notes
437440
-----
438441
This algorithm finds a (usually very good) approximate truncated
439-
singular value decomposition using LOBPCG with randomization to speed up
442+
singular value decomposition using LOBPCG with randomization to speed up
440443
the computations. It is particularly fast on large matrices on which
441444
you wish to extract only a small number of components. In order to
442445
obtain further speed up, `n_iter` can be set <=2 (at the cost of
443-
loss of precision). Compared to 'ranomised', the 'lobpcg' option gives
444-
more accurate approximations, with the same n_iter, n_components, and
445-
n_oversamples, at the slightly increased costs, allows setting
446-
the tolerance, and can output the accuracy.
446+
loss of precision). Compared to 'ranomised', the 'lobpcg' option gives
447+
more accurate approximations, with the same n_iter, n_components, and
448+
n_oversamples, at the slightly increased costs, allows setting
449+
the tolerance, and can output the accuracy.
447450
448451
References
449452
----------
@@ -475,16 +478,16 @@ def lobpcg_svd(M, n_components, n_oversamples=10, n_iter='auto',
475478
if transpose:
476479
# this implementation is a bit faster with smaller shape[1]
477480
M = M.T
478-
481+
479482
Q = random_state.normal(size=(M.shape[0], n_random))
480483
if M.dtype.kind == 'f':
481484
# Ensure f32 is preserved as f32
482485
Q = Q.astype(M.dtype, copy=False)
483486

484487
A = - safe_sparse_dot(M, M.T)
485-
# LOBPCG default option largest=True is currently broken, so we go the
488+
# LOBPCG default option largest=True is currently broken, so we go the
486489
# smallest (negative) of the negative normal matrix A
487-
lambdas, Q = lobpcg(A, Q, tol=lobpcg_tol, maxiter=n_iter, largest=False)
490+
_, Q = lobpcg(A, Q, tol=lobpcg_tol, maxiter=n_iter, largest=False)
488491

489492
# project M to the (k + p) dimensional space using the basis vectors
490493
# project M to the (k + p) dimensional space using the basis vectors
@@ -652,7 +655,7 @@ def cartesian(arrays, out=None):
652655
if out is None:
653656
out = np.empty_like(ix, dtype=dtype)
654657

655-
for n, arr in enumerate(arrays):
658+
for n, _ in enumerate(arrays):
656659
out[:, n] = arrays[n][ix[:, n]]
657660

658661
return out

0 commit comments

Comments
 (0)