Created
August 23, 2011 22:09
-
-
Save marcelcaraciolo/1166730 to your computer and use it in GitHub Desktop.
SVD Example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy import linalg | |
import numpy as np | |
from scipy.spatial.distance import cosine | |
#Let's define the matrix | |
user_ids = np.array(['Amanda', 'Anna', 'Bruno', 'Ricardo']) | |
item_ids = np.array(['Back to The Future', 'Conan', | |
'Lord of the Rings', 'Star Wars']) | |
matrix = np.matrix([ | |
#Amanda, Anna, Bruno, Ricardo | |
[3, 4, 3, 1], #Back to The Future | |
[1, 3, 2, 6], #Conan | |
[2, 4, 1, 5], #Lord of The Rings | |
[3, 3, 5, 2] #Star Wars | |
]) | |
#Compute the SVD Decomposition | |
u, s, vt = linalg.svd(matrix) | |
#Now let's get the decomposed matrixes (2-rank) | |
#The first and second columns of u (4x2) | |
u2 = u[:, 0:2] | |
#The first and second columns of vt transposed (4x2) | |
vt2 = vt.T[:, 0:2] | |
#Finally the first two eigen-values (2x2) | |
eig2 = np.diag(s.T[0:2]) | |
#Now let's come with Marcel, the new user. | |
marcel = np.matrix([0, 3, 0, 4]) | |
marcel_2D = marcel * u2 * linalg.inv(eig2) | |
#Compute the cosine similarity between Marcel and | |
#every other User in our 2-D space | |
#Perfect similarity = 1.0 No Similarity = 0.0 | |
users_2D = [line * u2 * linalg.inv(eig2) for line in matrix.T] | |
users_sim = np.array([1 - cosine(marcel_2D, user_2D).flatten() | |
for user_2D in users_2D]).flatten() | |
# Remove all users who fall below the 0.90 cosine similarity | |
#cutoff and sort by similarity | |
#users_sim = users_sim[users_sim] | |
most_similar = np.where(users_sim >= 0.9) | |
most_similar_scores = users_sim[most_similar] | |
most_similar_users = user_ids[most_similar] | |
for userno, user_id in enumerate(most_similar_users): | |
print "%s x Marcel: %.2f" % (user_id, most_similar_scores[userno]) | |
#For computing the recommendations we will use the strategy: | |
# 1) Select the most similar user | |
# 2) Compare all the items rated by this user against your own and select | |
# the items that you have not yet rated | |
# 3) Return the ratings for items I have not yet seen, | |
# but the most similar user has rated. | |
sorted_scores = np.lexsort((most_similar_scores,))[::-1] | |
most_similar_user = most_similar_users[sorted_scores][0] | |
recommendable_items = np.logical_xor( | |
matrix[np.where(user_ids == most_similar_user)], | |
marcel) | |
#If is there any recommendations. | |
print most_similar_user + ' recommends for Marcel:' | |
if np.any(recommendable_items): | |
preferences = matrix[np.where(user_ids == \ | |
most_similar_user)][recommendable_items].getA().flatten() | |
item_ids = item_ids[recommendable_items.getA().flatten()] | |
for index in range(preferences.size): | |
print 'I gave for the movie %s the rating %d' \ | |
% (item_ids[index], preferences[index]) | |
else: | |
print "All the movies you also watched." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment