Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created August 23, 2011 22:09
Show Gist options
  • Save marcelcaraciolo/1166730 to your computer and use it in GitHub Desktop.
Save marcelcaraciolo/1166730 to your computer and use it in GitHub Desktop.
SVD Example
from scipy import linalg
import numpy as np
from scipy.spatial.distance import cosine
#Let's define the matrix
user_ids = np.array(['Amanda', 'Anna', 'Bruno', 'Ricardo'])
item_ids = np.array(['Back to The Future', 'Conan',
'Lord of the Rings', 'Star Wars'])
matrix = np.matrix([
#Amanda, Anna, Bruno, Ricardo
[3, 4, 3, 1], #Back to The Future
[1, 3, 2, 6], #Conan
[2, 4, 1, 5], #Lord of The Rings
[3, 3, 5, 2] #Star Wars
])
#Compute the SVD Decomposition
u, s, vt = linalg.svd(matrix)
#Now let's get the decomposed matrixes (2-rank)
#The first and second columns of u (4x2)
u2 = u[:, 0:2]
#The first and second columns of vt transposed (4x2)
vt2 = vt.T[:, 0:2]
#Finally the first two eigen-values (2x2)
eig2 = np.diag(s.T[0:2])
#Now let's come with Marcel, the new user.
marcel = np.matrix([0, 3, 0, 4])
marcel_2D = marcel * u2 * linalg.inv(eig2)
#Compute the cosine similarity between Marcel and
#every other User in our 2-D space
#Perfect similarity = 1.0 No Similarity = 0.0
users_2D = [line * u2 * linalg.inv(eig2) for line in matrix.T]
users_sim = np.array([1 - cosine(marcel_2D, user_2D).flatten()
for user_2D in users_2D]).flatten()
# Remove all users who fall below the 0.90 cosine similarity
#cutoff and sort by similarity
#users_sim = users_sim[users_sim]
most_similar = np.where(users_sim >= 0.9)
most_similar_scores = users_sim[most_similar]
most_similar_users = user_ids[most_similar]
for userno, user_id in enumerate(most_similar_users):
print "%s x Marcel: %.2f" % (user_id, most_similar_scores[userno])
#For computing the recommendations we will use the strategy:
# 1) Select the most similar user
# 2) Compare all the items rated by this user against your own and select
# the items that you have not yet rated
# 3) Return the ratings for items I have not yet seen,
# but the most similar user has rated.
sorted_scores = np.lexsort((most_similar_scores,))[::-1]
most_similar_user = most_similar_users[sorted_scores][0]
recommendable_items = np.logical_xor(
matrix[np.where(user_ids == most_similar_user)],
marcel)
#If is there any recommendations.
print most_similar_user + ' recommends for Marcel:'
if np.any(recommendable_items):
preferences = matrix[np.where(user_ids == \
most_similar_user)][recommendable_items].getA().flatten()
item_ids = item_ids[recommendable_items.getA().flatten()]
for index in range(preferences.size):
print 'I gave for the movie %s the rating %d' \
% (item_ids[index], preferences[index])
else:
print "All the movies you also watched."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment