Skip to content

Instantly share code, notes, and snippets.

@j20232
Last active January 13, 2021 13:37
Show Gist options
  • Save j20232/ba587107c7b34923d64bdec533c2fc1a to your computer and use it in GitHub Desktop.
Save j20232/ba587107c7b34923d64bdec533c2fc1a to your computer and use it in GitHub Desktop.
import numpy as np
def first_order_cp(X, rank, la=0.01, eta=0.01, eps=1e-5):
"""L2-regularized cp decomposition with first order alternating gradient descent
Args:
X (np.ndarray): IxJxK matrix
rank (int): rank for decomposed matrices ([I, J] -> [I, rank], [rank, J])
la (float, optional): coefficient for regularization. Defaults to 0.01.
eta (float, optional): learning rate. Defaults to 0.01.
eps (float, optional): accuracy. Defaults to 1e-5.
"""
I, J, K = X.shape
def f(X, U, V, W):
recon = np.einsum("ir,jr,kr->ijk", U, V, W)
first = 0.5 * np.sum((X - recon) ** 2)
second = 0.5 * la * (np.sum(U ** 2) + np.sum(V ** 2) + np.sum(W ** 2))
return first + second
def update_U(X, U, V, W):
for i in range(I):
theta_i = np.einsum("jr,jk,kr->r", V, X[i], W)
U[i] = U[i] - eta * (((V.T @ V) * (W.T @ W) + la * np.identity(rank)) @ U[i] - theta_i)
return U
def update_V(X, U, V, W):
for j in range(J):
theta_j = np.einsum("ir,ik,kr->r", U, X[:, j], W)
V[j] = V[j] - eta * (((U.T @ U) * (W.T @ W) + la * np.identity(rank)) @ V[j] - theta_j)
return V
def update_W(X, U, V, W):
for k in range(K):
theta_k = np.einsum("ir,ij,jr->r", U, X[:, :, k], V)
W[k] = W[k] - eta * (((U.T @ U) * (V.T @ V) + la * np.identity(rank)) @ W[k] - theta_k)
return W
U = np.random.rand(I, rank)
V = np.random.rand(J, rank)
W = np.random.rand(K, rank)
while True:
g = f(X, U, V, W)
U = update_U(X, U, V, W)
V = update_V(X, U, V, W)
W = update_W(X, U, V, W)
if abs(g - f(X, U, V, W)) / f(X, U, V, W) < eps:
break
return U, V, W
def main():
I = 20
J = 10
K = 5
rank = 3
X = np.random.rand(I, J, K)
U, V, W = first_order_cp(X, rank)
recon = np.einsum("ir,jr,kr->ijk", U, V, W)
loss = np.sum((X - recon)**2)
print("Loss: ", loss) # \approx 0
print("U: ", U.shape) # (I, rank)
print("V: ", V.shape) # (J, rank)
print("W: ", W.shape) # (K, rank)
if __name__ == "__main__":
main()
import numpy as np
def second_order_cp(X, rank, la=0.01, eta=0.01, eps=1e-5):
"""L2-regularized cp decomposition with second order alternating gradient descent
Args:
X (np.ndarray): IxJxK matrix
rank (int): rank for decomposed matrices ([I, J] -> [I, rank], [rank, J])
la (float, optional): coefficient for regularization. Defaults to 0.01.
eta (float, optional): learning rate. Defaults to 0.01.
eps (float, optional): accuracy. Defaults to 1e-5.
"""
I, J, K = X.shape
def f(X, U, V, W):
recon = np.einsum("ir,jr,kr->ijk", U, V, W)
first = 0.5 * np.sum((X - recon) ** 2)
second = 0.5 * la * (np.sum(U ** 2) + np.sum(V ** 2) + np.sum(W ** 2))
return first + second
def update_U(X, U, V, W):
for i in range(I):
theta_i = np.einsum("jr,jk,kr->r", V, X[i], W)
U[i] = (1 - eta) * U[i] + eta * (np.linalg.inv((V.T @ V) * (W.T @ W) + la * np.identity(rank)) @ theta_i)
return U
def update_V(X, U, V, W):
for j in range(J):
theta_j = np.einsum("ir,ik,kr->r", U, X[:, j], W)
V[j] = (1 - eta) * V[j] + eta * (np.linalg.inv((U.T @ U) * (W.T @ W) + la * np.identity(rank)) @ theta_j)
return V
def update_W(X, U, V, W):
for k in range(K):
theta_k = np.einsum("ir,ij,jr->r", U, X[:, :, k], V)
W[k] = (1 - eta) * W[k] + eta * (np.linalg.inv((U.T @ U) * (V.T @ V) + la * np.identity(rank)) @ theta_k)
return W
U = np.random.rand(I, rank)
V = np.random.rand(J, rank)
W = np.random.rand(K, rank)
while True:
g = f(X, U, V, W)
U = update_U(X, U, V, W)
V = update_V(X, U, V, W)
W = update_W(X, U, V, W)
if abs(g - f(X, U, V, W)) / f(X, U, V, W) < eps:
break
return U, V, W
def main():
I = 20
J = 10
K = 5
rank = 3
X = np.random.rand(I, J, K)
U, V, W = second_order_cp(X, rank)
recon = np.einsum("ir,jr,kr->ijk", U, V, W)
loss = np.sum((X - recon)**2)
print("Loss: ", loss) # \approx 0
print("U: ", U.shape) # (I, rank)
print("V: ", V.shape) # (J, rank)
print("W: ", W.shape) # (K, rank)
if __name__ == "__main__":
main()
import numpy as np
def first_order_agd(X, rank, la=0.01, eta=0.01, eps=1e-5):
"""L2-regularized matrix decomposition with first order alternating gradient descent
Args:
X (np.ndarray): IxJ matrix (I: n_samples, J: n_features)
rank (int): rank for decomposed matrices ([I, J] -> [I, rank], [rank, J])
la (float, optional): coefficient for regularization. Defaults to 0.01.
eta (float, optional): learning rate. Defaults to 0.01.
eps (float, optional): accuracy. Defaults to 1e-5.
"""
def f(X, U, V):
first = 0.5 * np.sum((X - U @ V.T) ** 2)
second = 0.5 * la * np.sum((np.sum(U ** 2, axis=0) + np.sum(V ** 2, axis=0)))
return first + second
def grad_fu(X, U, V):
return - X @ V + U @ (V.T @ V + la * np.identity(V.shape[1]))
def grad_fv(X, U, V):
return - X.T @ U + V @ (U.T @ U + la * np.identity(U.shape[1]))
n_samples, n_features = X.shape
U = np.random.rand(n_samples, rank)
V = np.random.rand(n_features, rank)
while True:
g = f(X, U, V)
U = U - eta * grad_fu(X, U, V)
V = V - eta * grad_fv(X, U, V)
if abs(g - f(X, U, V)) / f(X, U, V) < eps:
break
return U, V
def second_order_agd(X, rank, la=0.01, eta=0.01, eps=1e-5):
"""L2-regularized matrix decomposition with second order alternating gradient descent
Args:
X (np.ndarray): IxJ matrix (I: n_samples, J: n_features)
rank (int): rank for decomposed matrices ([I, J] -> [I, rank], [rank, J])
la (float, optional): coefficient for regularization. Defaults to 0.01.
eta (float, optional): learning rate. Defaults to 0.01.
eps (float, optional): accuracy. Defaults to 1e-5.
"""
def f(X, U, V):
first = 0.5 * np.sum((X - U @ V.T) ** 2)
second = 0.5 * la * np.sum((np.sum(U ** 2, axis=0) + np.sum(V ** 2, axis=0)))
return first + second
n_samples, n_features = X.shape
U = np.random.rand(n_samples, rank)
V = np.random.rand(n_features, rank)
while True:
g = f(X, U, V)
U = (1 - eta) * U + eta * X @ V @ np.linalg.inv(V.T @ V + la * np.identity(V.shape[1]))
V = (1 - eta) * V + eta * X.T @ U @ np.linalg.inv(U.T @ U + la * np.identity(U.shape[1]))
if abs(g - f(X, U, V)) / f(X, U, V) < eps:
break
return U, V
def main():
n_samples = 100
n_features = 10
rank = 5
X = np.random.rand(n_samples, n_features)
U, V = first_order_agd(X, rank)
loss = np.sum(X - U @ V.T)
print("Loss: ", loss) # \approx 0
print("U: ", U.shape) # (n_samples, rank)
print("V: ", V.shape) # (n_features, rank)
U, V = second_order_agd(X, rank)
loss = np.sum(X - U @ V.T)
print("Loss: ", loss) # \approx 0
print("U: ", U.shape) # (n_samples, rank)
print("V: ", V.shape) # (n_features, rank)
if __name__ == "__main__":
main()
import numpy as np
def non_negative_pgd(X, rank, la=0.01, eta=0.01, eps=1e-5):
"""L2-regularized non-negativematrix decomposition with second order alternating projected gradient descent
Args:
X (np.ndarray): IxJ matrix (I: n_samples, J: n_features)
rank (int): rank for decomposed matrices ([I, J] -> [I, rank], [rank, J])
la (float, optional): coefficient for regularization. Defaults to 0.01.
eta (float, optional): learning rate. Defaults to 0.01.
eps (float, optional): accuracy. Defaults to 1e-5.
"""
def f(X, U, V):
first = 0.5 * np.sum((X - U @ V.T) ** 2)
second = 0.5 * la * np.sum((np.sum(U ** 2, axis=0) + np.sum(V ** 2, axis=0)))
return first + second
n_samples, n_features = X.shape
U = np.random.rand(n_samples, rank)
V = np.random.rand(n_features, rank)
while True:
g = f(X, U, V)
U = (1 - eta) * U + eta * X @ V @ np.linalg.inv(V.T @ V + la * np.identity(V.shape[1]))
V = (1 - eta) * V + eta * X.T @ U @ np.linalg.inv(U.T @ U + la * np.identity(U.shape[1]))
U = np.clip(U, 0, None)
V = np.clip(V, 0, None)
if abs(g - f(X, U, V)) / f(X, U, V) < eps:
break
return U, V
def main():
n_samples = 10
n_features = 5
rank = 3
X = np.random.rand(n_samples, n_features)
U, V = non_negative_pgd(X, rank)
loss = np.sum(X - U @ V.T)
print("Loss: ", loss) # \approx 0
print("U: ", np.min(U)) # (n_samples, rank)
print("V: ", np.min(V)) # (n_features, rank)
if __name__ == "__main__":
main()
import numpy as np
def element_sgd(X, rank, T=500, la=0.01, a=1.0, b=1.0):
"""L2-regularized decomposition with stochastic gradient descent
Args:
X (np.ndarray): IxJ matrix (I: n_samples, J: n_features)
rank (int): rank for decomposed matrices ([I, J] -> [I, rank], [rank, J])
T (int, optional): iteration. Defaults to 500.
la (float, optional): coefficient for regularization. Defaults to 0.01.
a (float, optional): factor for learning rate. Defaults to 1.0.
b (float, optional): factor for learning rate. Defaults to 1.0.
"""
n_samples, n_features = X.shape
U = np.random.rand(n_samples, rank)
V = np.random.rand(n_features, rank)
for t in range(T):
i = np.random.randint(0, n_samples)
j = np.random.randint(0, n_features)
eta = 1.0 / (a * np.sqrt(t) + b)
grad_fu = -X[i, j] * V[j] + U[i] * (V[j].T @ V[j] + la / n_features)
grad_fv = -X[i, j] * U[i] + V[j] * (U[i].T @ U[i] + la / n_samples)
U[i] = U[i] - eta * grad_fu
V[j] = V[j] - eta * grad_fv
return U, V
def main():
n_samples = 10
n_features = 5
rank = 3
X = np.random.rand(n_samples, n_features)
U, V = element_sgd(X, rank)
loss = np.sum(X - U @ V.T)
print("Loss: ", loss) # \approx 0
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment