Thanks to visit codestin.com
Credit goes to github.com

Skip to content

SVD #592

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 24, 2017
Merged

SVD #592

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 61 additions & 1 deletion learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
removeall, unique, product, mode, argmax, argmax_random_tie, isclose, gaussian,
dotproduct, vector_add, scalar_vector_product, weighted_sample_with_replacement,
weighted_sampler, num_or_str, normalize, clip, sigmoid, print_table,
open_data, sigmoid_derivative, probability
open_data, sigmoid_derivative, probability, norm, matrix_multiplication
)

import copy
Expand Down Expand Up @@ -377,6 +377,66 @@ def predict(example):
# ______________________________________________________________________________


def truncated_svd(X, num_val=2, max_iter=1000):
"""Computes the first component of SVD"""

def normalize_vec(X, n = 2):
"""Normalizes two parts (:m and m:) of the vector"""
X_m = X[:m]
X_n = X[m:]
norm_X_m = norm(X_m, n)
Y_m = [x/norm_X_m for x in X_m]
norm_X_n = norm(X_n, n)
Y_n = [x/norm_X_n for x in X_n]
return Y_m + Y_n

def remove_component(X):
"""Removes components of already obtained eigen vectors from X"""
X_m = X[:m]
X_n = X[m:]
for eivec in eivec_m:
coeff = dotproduct(X_m, eivec)
X_m = [x1 - coeff*x2 for x1, x2 in zip(X_m, eivec)]
for eivec in eivec_n:
coeff = dotproduct(X_n, eivec)
X_n = [x1 - coeff*x2 for x1, x2 in zip(X_n, eivec)]
return X_m + X_n

m, n = len(X), len(X[0])
A = [[0 for _ in range(n + m)] for _ in range(n + m)]
for i in range(m):
for j in range(n):
A[i][m + j] = A[m + j][i] = X[i][j]

eivec_m = []
eivec_n = []
eivals = []

for _ in range(num_val):
X = [random.random() for _ in range(m + n)]
X = remove_component(X)
X = normalize_vec(X)

for _ in range(max_iter):
old_X = X
X = matrix_multiplication(A, [[x] for x in X])
X = [x[0] for x in X]
X = remove_component(X)
X = normalize_vec(X)
# check for convergence
if norm([x1 - x2 for x1, x2 in zip(old_X, X)]) <= 1e-10:
break

projected_X = matrix_multiplication(A, [[x] for x in X])
projected_X = [x[0] for x in projected_X]
eivals.append(norm(projected_X, 1)/norm(X, 1))
eivec_m.append(X[:m])
eivec_n.append(X[m:])
return (eivec_m, eivec_n, eivals)

# ______________________________________________________________________________


class DecisionFork:
"""A fork of a decision tree holds an attribute to test, and a dict
of branches, one for each of the attribute's values."""
Expand Down
28 changes: 28 additions & 0 deletions tests/test_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,34 @@ def test_k_nearest_neighbors():
assert kNN([7.5, 4, 6, 2]) == "virginica"


def test_truncated_svd():
test_mat = [[17, 0],
[0, 11]]
_, _, eival = truncated_svd(test_mat)
assert isclose(abs(eival[0]), 17)
assert isclose(abs(eival[1]), 11)

test_mat = [[17, 0],
[0, -34]]
_, _, eival = truncated_svd(test_mat)
assert isclose(abs(eival[0]), 34)
assert isclose(abs(eival[1]), 17)

test_mat = [[1, 0, 0, 0, 2],
[0, 0, 3, 0, 0],
[0, 0, 0, 0, 0],
[0, 2, 0, 0, 0]]
_, _, eival = truncated_svd(test_mat)
assert isclose(abs(eival[0]), 3)
assert isclose(abs(eival[1]), 5**0.5)

test_mat = [[3, 2, 2],
[2, 3, -2]]
_, _, eival = truncated_svd(test_mat)
assert isclose(abs(eival[0]), 5)
assert isclose(abs(eival[1]), 3)


def test_decision_tree_learner():
iris = DataSet(name="iris")
dTL = DecisionTreeLearner(iris)
Expand Down
19 changes: 0 additions & 19 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,25 +161,6 @@ def test_gaussian():
assert gaussian(3,1,3) == 0.3989422804014327


def test_truncated_svd():
test_mat = [[17, 0],
[0, 11]]
_, _, eival = truncated_svd(test_mat)
assert isclose(eival, 17)

test_mat = [[17, 0],
[0, -34]]
_, _, eival = truncated_svd(test_mat)
assert isclose(eival, -34)

test_mat = [[1, 0, 0, 0, 2],
[0, 0, 3, 0, 0],
[0, 0, 0, 0, 0],
[0, 2, 0, 0, 0]]
_, _, eival = truncated_svd(test_mat)
assert isclose(eival, 3)


def test_sigmoid_derivative():
value = 1
assert sigmoid_derivative(value) == 0
Expand Down
35 changes: 0 additions & 35 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,41 +275,6 @@ def gaussian(mean, st_dev, x):
return 1/(math.sqrt(2*math.pi)*st_dev)*math.e**(-0.5*(float(x-mean)/st_dev)**2)


def truncated_svd(X, max_iter=1000):
"""Computes the first component of SVD"""

def normalize_vec(X, n = 2):
"""Returns normalized vector"""
norm_X = norm(X, n)
Y = [x/norm_X for x in X]
return Y

m, n = len(X), len(X[0])
A = [[0 for _ in range(n + m)] for _ in range(n + m)]
for i in range(m):
for j in range(n):
A[i][j] = A[m + j][n + i] = X[i][j]

X = [random.random() for _ in range(n + m)]
X = normalize_vec(X)
for _ in range(max_iter):
old_X = X
X = matrix_multiplication(A, [[x] for x in X])
X = [x[0] for x in X]
X = normalize_vec(X)
# check for convergence
if norm([x1 - x2 for x1, x2 in zip(old_X, X)]) <= 1e-10:
break

projected_X = matrix_multiplication(A, [[x] for x in X])
projected_X = [x[0] for x in projected_X]
eival = norm(projected_X, 1)/norm(X, 1)
eivec_n = normalize_vec(X[:n])
eivec_m = normalize_vec(X[n:])

return (eivec_m, eivec_n, eival)


try: # math.isclose was added in Python 3.5; but we might be in 3.4
from math import isclose
except ImportError:
Expand Down