Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b561299

Browse files
Chipe1norvig
authored andcommitted
* Fixed truncated_svd * Moved SVD algorithm to learning.py
1 parent d435758 commit b561299

File tree

4 files changed

+89
-55
lines changed

4 files changed

+89
-55
lines changed

learning.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
removeall, unique, product, mode, argmax, argmax_random_tie, isclose, gaussian,
55
dotproduct, vector_add, scalar_vector_product, weighted_sample_with_replacement,
66
weighted_sampler, num_or_str, normalize, clip, sigmoid, print_table,
7-
open_data, sigmoid_derivative, probability
7+
open_data, sigmoid_derivative, probability, norm, matrix_multiplication
88
)
99

1010
import copy
@@ -377,6 +377,66 @@ def predict(example):
377377
# ______________________________________________________________________________
378378

379379

380+
def truncated_svd(X, num_val=2, max_iter=1000):
381+
"""Computes the first component of SVD"""
382+
383+
def normalize_vec(X, n = 2):
384+
"""Normalizes two parts (:m and m:) of the vector"""
385+
X_m = X[:m]
386+
X_n = X[m:]
387+
norm_X_m = norm(X_m, n)
388+
Y_m = [x/norm_X_m for x in X_m]
389+
norm_X_n = norm(X_n, n)
390+
Y_n = [x/norm_X_n for x in X_n]
391+
return Y_m + Y_n
392+
393+
def remove_component(X):
394+
"""Removes components of already obtained eigen vectors from X"""
395+
X_m = X[:m]
396+
X_n = X[m:]
397+
for eivec in eivec_m:
398+
coeff = dotproduct(X_m, eivec)
399+
X_m = [x1 - coeff*x2 for x1, x2 in zip(X_m, eivec)]
400+
for eivec in eivec_n:
401+
coeff = dotproduct(X_n, eivec)
402+
X_n = [x1 - coeff*x2 for x1, x2 in zip(X_n, eivec)]
403+
return X_m + X_n
404+
405+
m, n = len(X), len(X[0])
406+
A = [[0 for _ in range(n + m)] for _ in range(n + m)]
407+
for i in range(m):
408+
for j in range(n):
409+
A[i][m + j] = A[m + j][i] = X[i][j]
410+
411+
eivec_m = []
412+
eivec_n = []
413+
eivals = []
414+
415+
for _ in range(num_val):
416+
X = [random.random() for _ in range(m + n)]
417+
X = remove_component(X)
418+
X = normalize_vec(X)
419+
420+
for _ in range(max_iter):
421+
old_X = X
422+
X = matrix_multiplication(A, [[x] for x in X])
423+
X = [x[0] for x in X]
424+
X = remove_component(X)
425+
X = normalize_vec(X)
426+
# check for convergence
427+
if norm([x1 - x2 for x1, x2 in zip(old_X, X)]) <= 1e-10:
428+
break
429+
430+
projected_X = matrix_multiplication(A, [[x] for x in X])
431+
projected_X = [x[0] for x in projected_X]
432+
eivals.append(norm(projected_X, 1)/norm(X, 1))
433+
eivec_m.append(X[:m])
434+
eivec_n.append(X[m:])
435+
return (eivec_m, eivec_n, eivals)
436+
437+
# ______________________________________________________________________________
438+
439+
380440
class DecisionFork:
381441
"""A fork of a decision tree holds an attribute to test, and a dict
382442
of branches, one for each of the attribute's values."""

tests/test_learning.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,34 @@ def test_k_nearest_neighbors():
115115
assert kNN([7.5, 4, 6, 2]) == "virginica"
116116

117117

118+
def test_truncated_svd():
119+
test_mat = [[17, 0],
120+
[0, 11]]
121+
_, _, eival = truncated_svd(test_mat)
122+
assert isclose(abs(eival[0]), 17)
123+
assert isclose(abs(eival[1]), 11)
124+
125+
test_mat = [[17, 0],
126+
[0, -34]]
127+
_, _, eival = truncated_svd(test_mat)
128+
assert isclose(abs(eival[0]), 34)
129+
assert isclose(abs(eival[1]), 17)
130+
131+
test_mat = [[1, 0, 0, 0, 2],
132+
[0, 0, 3, 0, 0],
133+
[0, 0, 0, 0, 0],
134+
[0, 2, 0, 0, 0]]
135+
_, _, eival = truncated_svd(test_mat)
136+
assert isclose(abs(eival[0]), 3)
137+
assert isclose(abs(eival[1]), 5**0.5)
138+
139+
test_mat = [[3, 2, 2],
140+
[2, 3, -2]]
141+
_, _, eival = truncated_svd(test_mat)
142+
assert isclose(abs(eival[0]), 5)
143+
assert isclose(abs(eival[1]), 3)
144+
145+
118146
def test_decision_tree_learner():
119147
iris = DataSet(name="iris")
120148
dTL = DecisionTreeLearner(iris)

tests/test_utils.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -161,25 +161,6 @@ def test_gaussian():
161161
assert gaussian(3,1,3) == 0.3989422804014327
162162

163163

164-
def test_truncated_svd():
165-
test_mat = [[17, 0],
166-
[0, 11]]
167-
_, _, eival = truncated_svd(test_mat)
168-
assert isclose(eival, 17)
169-
170-
test_mat = [[17, 0],
171-
[0, -34]]
172-
_, _, eival = truncated_svd(test_mat)
173-
assert isclose(eival, -34)
174-
175-
test_mat = [[1, 0, 0, 0, 2],
176-
[0, 0, 3, 0, 0],
177-
[0, 0, 0, 0, 0],
178-
[0, 2, 0, 0, 0]]
179-
_, _, eival = truncated_svd(test_mat)
180-
assert isclose(eival, 3)
181-
182-
183164
def test_sigmoid_derivative():
184165
value = 1
185166
assert sigmoid_derivative(value) == 0

utils.py

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -275,41 +275,6 @@ def gaussian(mean, st_dev, x):
275275
return 1/(math.sqrt(2*math.pi)*st_dev)*math.e**(-0.5*(float(x-mean)/st_dev)**2)
276276

277277

278-
def truncated_svd(X, max_iter=1000):
279-
"""Computes the first component of SVD"""
280-
281-
def normalize_vec(X, n = 2):
282-
"""Returns normalized vector"""
283-
norm_X = norm(X, n)
284-
Y = [x/norm_X for x in X]
285-
return Y
286-
287-
m, n = len(X), len(X[0])
288-
A = [[0 for _ in range(n + m)] for _ in range(n + m)]
289-
for i in range(m):
290-
for j in range(n):
291-
A[i][j] = A[m + j][n + i] = X[i][j]
292-
293-
X = [random.random() for _ in range(n + m)]
294-
X = normalize_vec(X)
295-
for _ in range(max_iter):
296-
old_X = X
297-
X = matrix_multiplication(A, [[x] for x in X])
298-
X = [x[0] for x in X]
299-
X = normalize_vec(X)
300-
# check for convergence
301-
if norm([x1 - x2 for x1, x2 in zip(old_X, X)]) <= 1e-10:
302-
break
303-
304-
projected_X = matrix_multiplication(A, [[x] for x in X])
305-
projected_X = [x[0] for x in projected_X]
306-
eival = norm(projected_X, 1)/norm(X, 1)
307-
eivec_n = normalize_vec(X[:n])
308-
eivec_m = normalize_vec(X[n:])
309-
310-
return (eivec_m, eivec_n, eival)
311-
312-
313278
try: # math.isclose was added in Python 3.5; but we might be in 3.4
314279
from math import isclose
315280
except ImportError:

0 commit comments

Comments
 (0)