-
-
Save vgoklani/8739c4d090ccf24b9cce to your computer and use it in GitHub Desktop.
Low Rank appromixation using SGD
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python | |
| # | |
| # Created by Albert Au Yeung (2010) | |
| # | |
| # An implementation of matrix factorization | |
| # http://www.quuxlabs.com/blog/2010/09/matrix-factorization-a-simple-tutorial-and-implementation-in-python/ | |
| # | |
| try: | |
| import numpy | |
| except: | |
| print "This implementation requires the numpy module." | |
| exit(0) | |
| ############################################################################### | |
| """ | |
| @INPUT: | |
| R : a matrix to be factorized, dimension N x M | |
| P : an initial matrix of dimension N x K | |
| Q : an initial matrix of dimension M x K | |
| K : the number of latent features | |
| steps : the maximum number of steps to perform the optimisation | |
| alpha : the learning rate | |
| beta : the regularization parameter | |
| @OUTPUT: | |
| the final matrices P and Q | |
| """ | |
| def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02): | |
| Q = Q.T | |
| for step in xrange(steps): | |
| for i in xrange(len(R)): | |
| for j in xrange(len(R[i])): | |
| if R[i][j] > 0: | |
| eij = R[i][j] - numpy.dot(P[i,:],Q[:,j]) | |
| for k in xrange(K): | |
| P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k]) | |
| Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j]) | |
| eR = numpy.dot(P,Q) | |
| e = 0 | |
| for i in xrange(len(R)): | |
| for j in xrange(len(R[i])): | |
| if R[i][j] > 0: | |
| e = e + pow(R[i][j] - numpy.dot(P[i,:],Q[:,j]), 2) | |
| for k in xrange(K): | |
| e = e + (beta/2) * ( pow(P[i][k],2) + pow(Q[k][j],2) ) | |
| if e < 0.001: | |
| break | |
| return P, Q.T | |
| ############################################################################### | |
| if __name__ == "__main__": | |
| R = [ | |
| [5,3,0,1], | |
| [4,0,0,1], | |
| [1,1,0,5], | |
| [1,0,0,4], | |
| [0,1,5,4], | |
| ] | |
| R = numpy.array(R) | |
| N = len(R) | |
| M = len(R[0]) | |
| K = 2 | |
| P = numpy.random.rand(N,K) | |
| Q = numpy.random.rand(M,K) | |
| nP, nQ = matrix_factorization(R, P, Q, K) | |
| nR = numpy.dot(nP, nQ.T) | |
| print nR |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment