Last active
June 30, 2018 11:57
-
-
Save aligusnet/a9d4738810d225a933a303d3746df5a8 to your computer and use it in GitHub Desktop.
SVD using Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Principal component analysis (PCA) | |
>>> np.random.seed(111) | |
>>> m = np.array([[3, 2], [2, 6]]) | |
>>> frobenius_norm(m) | |
7.280109889280518 | |
>>> power_iter(m, np.ones((2,1))) | |
array([[0.52999894], | |
[0.8479983 ]]) | |
>>> power_iteration(m) | |
array([[0.4472136 ], | |
[0.89442719]]) | |
>>> eigen_value(m, np.array([[0.4472136 ], [0.89442719]])) | |
7.000000015653793 | |
>>> next_matrix(m, np.array([[0.4472136 ], [0.89442719]]), 7) | |
array([[ 1.59999997, -0.80000003], | |
[-0.80000003, 0.40000001]]) | |
>>> x, v = eigens(m) | |
>>> x | |
array([[ 0.4472136 , 0.89442719], | |
[ 0.89442719, -0.4472136 ]]) | |
>>> v | |
array([7., 2.]) | |
>>> a = np.array([[1, 2], \ | |
[2, 1], \ | |
[3, 4], \ | |
[4, 3]]) | |
>>> x, v = eigens(a.T.dot(a)) | |
>>> x | |
array([[ 0.70710678, -0.70710678], | |
[ 0.70710678, 0.70710678]]) | |
>>> v | |
array([58., 2.]) | |
>>> movies = np.array([[1, 1, 1, 0, 0], \ | |
[3, 3, 3, 0, 0], \ | |
[4, 4, 4, 0, 0], \ | |
[5, 5, 5, 0, 0], \ | |
[0, 0, 0, 4, 4], \ | |
[0, 0, 0, 5, 5], \ | |
[0, 0, 0, 2, 2]]) | |
>>> u, sigma, vT = svd(movies) | |
>>> sigma | |
array([[12.36931688, 0. ], | |
[ 0. , 9.48683298]]) | |
>>> movies_restored = u.dot(sigma).dot(vT) | |
>>> equal(movies, movies_restored, eps = 1e-8) | |
True | |
>>> movies2 = np.array([[1, 1, 1, 0, 0], \ | |
[3, 3, 3, 0, 0], \ | |
[4, 4, 4, 0, 0], \ | |
[5, 5, 5, 0, 0], \ | |
[0, 2, 0, 4, 4], \ | |
[0, 0, 0, 5, 5], \ | |
[0, 1, 0, 2, 2]]) | |
>>> u, sigma, vT = svd(movies2) | |
>>> sigma | |
array([[12.48101469, 0. , 0. ], | |
[ 0. , 9.50861406, 0. ], | |
[ 0. , 0. , 1.34555971]]) | |
>>> movies2_restored = u.dot(sigma).dot(vT) | |
>>> abs(frobenius_norm(movies2_restored) - frobenius_norm(movies2)) < 1e-10 | |
True | |
""" | |
import numpy as np | |
def svd(m): | |
mmT = m.dot(m.T) | |
mTm = m.T.dot(m) | |
u, sigma2 = eigens(mmT) | |
sigma = np.sqrt(sigma2) | |
v, _ = eigens(mTm) | |
return u, np.diag(sigma), v.T | |
def eigens(m, eps = 1e-10): | |
vectors = [] | |
values = [] | |
for _ in range(m.shape[0]): | |
x = power_iteration(m, eps = eps) | |
lam = eigen_value(m, x) | |
if np.abs(lam) < eps: | |
break | |
vectors.append(x.flatten()) | |
values.append(lam) | |
m = next_matrix(m, x, lam) | |
return np.array(vectors).T, np.array(values) | |
def eigen_value(m, x): | |
return (x.T.dot(m).dot(x))[0][0] | |
def next_matrix(m, x, lam): | |
return m - lam * x.dot(x.T) | |
def power_iteration(m, num_iters = 1000, eps = 1e-10): | |
v = np.random.rand(m.shape[0], 1) | |
for _ in range(num_iters): | |
v_next = power_iter(m, v) | |
if equal(v, v_next, eps): | |
return v_next | |
v = v_next | |
return v | |
def power_iter(m, v): | |
v_next = m.dot(v) | |
return v_next/frobenius_norm(v_next) | |
def frobenius_norm(a): | |
return np.sqrt(np.sum(a * a)) | |
def equal(lhs, rhs, eps = 1e-10): | |
""" | |
>>> equal(np.array([11, 10]), np.array([11, 11])) | |
False | |
>>> equal(np.array([[1, 2], [3, 4]]), np.array([[1, 2], [3, 4]])) | |
True | |
""" | |
return np.all(np.abs(lhs - rhs) < eps) | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment