Created
June 29, 2015 21:15
-
-
Save maxjustus/0b87c0df16db64e365c2 to your computer and use it in GitHub Desktop.
Non-negative matrix factorization in Lua
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Implements the algorithm described here http://www.quuxlabs.com/blog/2010/09/matrix-factorization-a-simple-tutorial-and-implementation-in-python/ | |
-- by translating http://www.quuxlabs.com/wp-content/uploads/2010/09/mf.py_.txt into the equivalent lua code (I hope) | |
-- | |
-- INPUT: | |
-- R : a matrix to be factorized, dimension N x M | |
-- P : an initial matrix of dimension N x K | |
-- Q : an initial matrix of dimension M x K | |
-- K : the number of latent features | |
-- steps : the maximum number of steps to perform the optimisation | |
-- alpha : the learning rate | |
-- beta : the regularization parameter | |
-- OUTPUT: | |
-- the final matrices P and Q | |
function matrix_factorization(R, P, Q, K, steps, alpha, beta) | |
steps = steps or 5000 | |
alpha = alpha or 0.0002 | |
beta = beta or 0.02 | |
for step = 1, steps do | |
for i = 1, #R do | |
for j = 1, #R[i] do | |
if R[i][j] > 0 then | |
local eij = R[i][j] - dot_product(P[i], column(Q, j)) | |
for k = 1, K do | |
P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k]) | |
Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j]) | |
end | |
end | |
end | |
end | |
local e = 0 | |
for i = 1, #R do | |
for j = 1, #R[i] do | |
if R[i][j] > 0 then | |
e = e + math.pow(R[i][j] - dot_product(P[i], column(Q, j)), 2) | |
for k = 1, K do | |
e = e + (beta / 2) * (math.pow(P[i][k], 2) + math.pow(Q[k][j], 2)) | |
end | |
end | |
end | |
end | |
if e < 0.001 then | |
break | |
end | |
end | |
Qt = {} | |
for i = 1, #Q[1] do | |
Qt[i] = {} | |
for j = 1, #Q do | |
Qt[i][j] = Q[j][i] | |
end | |
end | |
return P, transpose(Q) | |
end | |
function column(t, index) | |
col = {} | |
for i = 1, #t do | |
col[i] = t[i][index] | |
end | |
return col | |
end | |
function transpose(matrix) | |
res = {} | |
for i = 1, #matrix[1] do | |
res[i] = {} | |
for j = 1, #matrix do | |
res[i][j] = matrix[j][i] | |
end | |
end | |
return res | |
end | |
function random_matrix(N, M) | |
local matrix = {} | |
for i=1, N do | |
matrix[i] = {} | |
for mi=1, M do | |
matrix[i][mi] = math.random() | |
end | |
end | |
return matrix | |
end | |
function dot_product(a, b) | |
local dp = 0 | |
for i = 1, #a do | |
dp = dp + a[i] * b[i] | |
end | |
return dp | |
end | |
-- Print contents of `tbl`, with indentation. | |
-- `indent` sets the initial level of indentation. | |
function tprint (tbl, indent) | |
if not indent then indent = 0 end | |
for k, v in pairs(tbl) do | |
formatting = string.rep(" ", indent) .. k .. ": " | |
if type(v) == "table" then | |
print(formatting) | |
tprint(v, indent+1) | |
elseif type(v) == 'boolean' then | |
print(formatting .. tostring(v)) | |
else | |
print(formatting .. v) | |
end | |
end | |
end | |
R = { | |
{5,3,0,1}, | |
{4,0,0,1}, | |
{1,1,0,5}, | |
{1,0,0,4}, | |
{0,1,5,4}, | |
} | |
N = #R | |
M = #R[1] | |
K = 2 | |
P = random_matrix(N,K) | |
Q = random_matrix(K,M) | |
nP, nQ = matrix_factorization(R, P, Q, K, 5000) | |
tprint(nP) | |
tprint(nQ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment