Last active
January 13, 2021 13:37
-
-
Save j20232/ba587107c7b34923d64bdec533c2fc1a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def first_order_cp(X, rank, la=0.01, eta=0.01, eps=1e-5): | |
"""L2-regularized cp decomposition with first order alternating gradient descent | |
Args: | |
X (np.ndarray): IxJxK matrix | |
rank (int): rank for decomposed matrices ([I, J] -> [I, rank], [rank, J]) | |
la (float, optional): coefficient for regularization. Defaults to 0.01. | |
eta (float, optional): learning rate. Defaults to 0.01. | |
eps (float, optional): accuracy. Defaults to 1e-5. | |
""" | |
I, J, K = X.shape | |
def f(X, U, V, W): | |
recon = np.einsum("ir,jr,kr->ijk", U, V, W) | |
first = 0.5 * np.sum((X - recon) ** 2) | |
second = 0.5 * la * (np.sum(U ** 2) + np.sum(V ** 2) + np.sum(W ** 2)) | |
return first + second | |
def update_U(X, U, V, W): | |
for i in range(I): | |
theta_i = np.einsum("jr,jk,kr->r", V, X[i], W) | |
U[i] = U[i] - eta * (((V.T @ V) * (W.T @ W) + la * np.identity(rank)) @ U[i] - theta_i) | |
return U | |
def update_V(X, U, V, W): | |
for j in range(J): | |
theta_j = np.einsum("ir,ik,kr->r", U, X[:, j], W) | |
V[j] = V[j] - eta * (((U.T @ U) * (W.T @ W) + la * np.identity(rank)) @ V[j] - theta_j) | |
return V | |
def update_W(X, U, V, W): | |
for k in range(K): | |
theta_k = np.einsum("ir,ij,jr->r", U, X[:, :, k], V) | |
W[k] = W[k] - eta * (((U.T @ U) * (V.T @ V) + la * np.identity(rank)) @ W[k] - theta_k) | |
return W | |
U = np.random.rand(I, rank) | |
V = np.random.rand(J, rank) | |
W = np.random.rand(K, rank) | |
while True: | |
g = f(X, U, V, W) | |
U = update_U(X, U, V, W) | |
V = update_V(X, U, V, W) | |
W = update_W(X, U, V, W) | |
if abs(g - f(X, U, V, W)) / f(X, U, V, W) < eps: | |
break | |
return U, V, W | |
def main(): | |
I = 20 | |
J = 10 | |
K = 5 | |
rank = 3 | |
X = np.random.rand(I, J, K) | |
U, V, W = first_order_cp(X, rank) | |
recon = np.einsum("ir,jr,kr->ijk", U, V, W) | |
loss = np.sum((X - recon)**2) | |
print("Loss: ", loss) # \approx 0 | |
print("U: ", U.shape) # (I, rank) | |
print("V: ", V.shape) # (J, rank) | |
print("W: ", W.shape) # (K, rank) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def second_order_cp(X, rank, la=0.01, eta=0.01, eps=1e-5): | |
"""L2-regularized cp decomposition with second order alternating gradient descent | |
Args: | |
X (np.ndarray): IxJxK matrix | |
rank (int): rank for decomposed matrices ([I, J] -> [I, rank], [rank, J]) | |
la (float, optional): coefficient for regularization. Defaults to 0.01. | |
eta (float, optional): learning rate. Defaults to 0.01. | |
eps (float, optional): accuracy. Defaults to 1e-5. | |
""" | |
I, J, K = X.shape | |
def f(X, U, V, W): | |
recon = np.einsum("ir,jr,kr->ijk", U, V, W) | |
first = 0.5 * np.sum((X - recon) ** 2) | |
second = 0.5 * la * (np.sum(U ** 2) + np.sum(V ** 2) + np.sum(W ** 2)) | |
return first + second | |
def update_U(X, U, V, W): | |
for i in range(I): | |
theta_i = np.einsum("jr,jk,kr->r", V, X[i], W) | |
U[i] = (1 - eta) * U[i] + eta * (np.linalg.inv((V.T @ V) * (W.T @ W) + la * np.identity(rank)) @ theta_i) | |
return U | |
def update_V(X, U, V, W): | |
for j in range(J): | |
theta_j = np.einsum("ir,ik,kr->r", U, X[:, j], W) | |
V[j] = (1 - eta) * V[j] + eta * (np.linalg.inv((U.T @ U) * (W.T @ W) + la * np.identity(rank)) @ theta_j) | |
return V | |
def update_W(X, U, V, W): | |
for k in range(K): | |
theta_k = np.einsum("ir,ij,jr->r", U, X[:, :, k], V) | |
W[k] = (1 - eta) * W[k] + eta * (np.linalg.inv((U.T @ U) * (V.T @ V) + la * np.identity(rank)) @ theta_k) | |
return W | |
U = np.random.rand(I, rank) | |
V = np.random.rand(J, rank) | |
W = np.random.rand(K, rank) | |
while True: | |
g = f(X, U, V, W) | |
U = update_U(X, U, V, W) | |
V = update_V(X, U, V, W) | |
W = update_W(X, U, V, W) | |
if abs(g - f(X, U, V, W)) / f(X, U, V, W) < eps: | |
break | |
return U, V, W | |
def main(): | |
I = 20 | |
J = 10 | |
K = 5 | |
rank = 3 | |
X = np.random.rand(I, J, K) | |
U, V, W = second_order_cp(X, rank) | |
recon = np.einsum("ir,jr,kr->ijk", U, V, W) | |
loss = np.sum((X - recon)**2) | |
print("Loss: ", loss) # \approx 0 | |
print("U: ", U.shape) # (I, rank) | |
print("V: ", V.shape) # (J, rank) | |
print("W: ", W.shape) # (K, rank) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def first_order_agd(X, rank, la=0.01, eta=0.01, eps=1e-5): | |
"""L2-regularized matrix decomposition with first order alternating gradient descent | |
Args: | |
X (np.ndarray): IxJ matrix (I: n_samples, J: n_features) | |
rank (int): rank for decomposed matrices ([I, J] -> [I, rank], [rank, J]) | |
la (float, optional): coefficient for regularization. Defaults to 0.01. | |
eta (float, optional): learning rate. Defaults to 0.01. | |
eps (float, optional): accuracy. Defaults to 1e-5. | |
""" | |
def f(X, U, V): | |
first = 0.5 * np.sum((X - U @ V.T) ** 2) | |
second = 0.5 * la * np.sum((np.sum(U ** 2, axis=0) + np.sum(V ** 2, axis=0))) | |
return first + second | |
def grad_fu(X, U, V): | |
return - X @ V + U @ (V.T @ V + la * np.identity(V.shape[1])) | |
def grad_fv(X, U, V): | |
return - X.T @ U + V @ (U.T @ U + la * np.identity(U.shape[1])) | |
n_samples, n_features = X.shape | |
U = np.random.rand(n_samples, rank) | |
V = np.random.rand(n_features, rank) | |
while True: | |
g = f(X, U, V) | |
U = U - eta * grad_fu(X, U, V) | |
V = V - eta * grad_fv(X, U, V) | |
if abs(g - f(X, U, V)) / f(X, U, V) < eps: | |
break | |
return U, V | |
def second_order_agd(X, rank, la=0.01, eta=0.01, eps=1e-5): | |
"""L2-regularized matrix decomposition with second order alternating gradient descent | |
Args: | |
X (np.ndarray): IxJ matrix (I: n_samples, J: n_features) | |
rank (int): rank for decomposed matrices ([I, J] -> [I, rank], [rank, J]) | |
la (float, optional): coefficient for regularization. Defaults to 0.01. | |
eta (float, optional): learning rate. Defaults to 0.01. | |
eps (float, optional): accuracy. Defaults to 1e-5. | |
""" | |
def f(X, U, V): | |
first = 0.5 * np.sum((X - U @ V.T) ** 2) | |
second = 0.5 * la * np.sum((np.sum(U ** 2, axis=0) + np.sum(V ** 2, axis=0))) | |
return first + second | |
n_samples, n_features = X.shape | |
U = np.random.rand(n_samples, rank) | |
V = np.random.rand(n_features, rank) | |
while True: | |
g = f(X, U, V) | |
U = (1 - eta) * U + eta * X @ V @ np.linalg.inv(V.T @ V + la * np.identity(V.shape[1])) | |
V = (1 - eta) * V + eta * X.T @ U @ np.linalg.inv(U.T @ U + la * np.identity(U.shape[1])) | |
if abs(g - f(X, U, V)) / f(X, U, V) < eps: | |
break | |
return U, V | |
def main(): | |
n_samples = 100 | |
n_features = 10 | |
rank = 5 | |
X = np.random.rand(n_samples, n_features) | |
U, V = first_order_agd(X, rank) | |
loss = np.sum(X - U @ V.T) | |
print("Loss: ", loss) # \approx 0 | |
print("U: ", U.shape) # (n_samples, rank) | |
print("V: ", V.shape) # (n_features, rank) | |
U, V = second_order_agd(X, rank) | |
loss = np.sum(X - U @ V.T) | |
print("Loss: ", loss) # \approx 0 | |
print("U: ", U.shape) # (n_samples, rank) | |
print("V: ", V.shape) # (n_features, rank) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def non_negative_pgd(X, rank, la=0.01, eta=0.01, eps=1e-5): | |
"""L2-regularized non-negativematrix decomposition with second order alternating projected gradient descent | |
Args: | |
X (np.ndarray): IxJ matrix (I: n_samples, J: n_features) | |
rank (int): rank for decomposed matrices ([I, J] -> [I, rank], [rank, J]) | |
la (float, optional): coefficient for regularization. Defaults to 0.01. | |
eta (float, optional): learning rate. Defaults to 0.01. | |
eps (float, optional): accuracy. Defaults to 1e-5. | |
""" | |
def f(X, U, V): | |
first = 0.5 * np.sum((X - U @ V.T) ** 2) | |
second = 0.5 * la * np.sum((np.sum(U ** 2, axis=0) + np.sum(V ** 2, axis=0))) | |
return first + second | |
n_samples, n_features = X.shape | |
U = np.random.rand(n_samples, rank) | |
V = np.random.rand(n_features, rank) | |
while True: | |
g = f(X, U, V) | |
U = (1 - eta) * U + eta * X @ V @ np.linalg.inv(V.T @ V + la * np.identity(V.shape[1])) | |
V = (1 - eta) * V + eta * X.T @ U @ np.linalg.inv(U.T @ U + la * np.identity(U.shape[1])) | |
U = np.clip(U, 0, None) | |
V = np.clip(V, 0, None) | |
if abs(g - f(X, U, V)) / f(X, U, V) < eps: | |
break | |
return U, V | |
def main(): | |
n_samples = 10 | |
n_features = 5 | |
rank = 3 | |
X = np.random.rand(n_samples, n_features) | |
U, V = non_negative_pgd(X, rank) | |
loss = np.sum(X - U @ V.T) | |
print("Loss: ", loss) # \approx 0 | |
print("U: ", np.min(U)) # (n_samples, rank) | |
print("V: ", np.min(V)) # (n_features, rank) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def element_sgd(X, rank, T=500, la=0.01, a=1.0, b=1.0): | |
"""L2-regularized decomposition with stochastic gradient descent | |
Args: | |
X (np.ndarray): IxJ matrix (I: n_samples, J: n_features) | |
rank (int): rank for decomposed matrices ([I, J] -> [I, rank], [rank, J]) | |
T (int, optional): iteration. Defaults to 500. | |
la (float, optional): coefficient for regularization. Defaults to 0.01. | |
a (float, optional): factor for learning rate. Defaults to 1.0. | |
b (float, optional): factor for learning rate. Defaults to 1.0. | |
""" | |
n_samples, n_features = X.shape | |
U = np.random.rand(n_samples, rank) | |
V = np.random.rand(n_features, rank) | |
for t in range(T): | |
i = np.random.randint(0, n_samples) | |
j = np.random.randint(0, n_features) | |
eta = 1.0 / (a * np.sqrt(t) + b) | |
grad_fu = -X[i, j] * V[j] + U[i] * (V[j].T @ V[j] + la / n_features) | |
grad_fv = -X[i, j] * U[i] + V[j] * (U[i].T @ U[i] + la / n_samples) | |
U[i] = U[i] - eta * grad_fu | |
V[j] = V[j] - eta * grad_fv | |
return U, V | |
def main(): | |
n_samples = 10 | |
n_features = 5 | |
rank = 3 | |
X = np.random.rand(n_samples, n_features) | |
U, V = element_sgd(X, rank) | |
loss = np.sum(X - U @ V.T) | |
print("Loss: ", loss) # \approx 0 | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment