marcelcaraciolo · August 23, 2011 22:09
diff --git a/svd_example.py b/svd_example.py
 from scipy import linalg
 import numpy as np
 from scipy.spatial.distance import cosine

 #Let's define the matrix
 user_ids = np.array(['Amanda', 'Anna', 'Bruno', 'Ricardo'])
 item_ids = np.array(['Back to The Future', 'Conan',
                    'Lord of the Rings', 'Star Wars'])

 matrix = np.matrix([
        #Amanda, Anna, Bruno, Ricardo
         [3, 4, 3, 1], #Back to The Future
         [1, 3, 2, 6], #Conan
         [2, 4, 1, 5], #Lord of The Rings
         [3, 3, 5, 2] #Star Wars
 ])

 #Compute the SVD Decomposition
 u, s, vt = linalg.svd(matrix)

 #Now let's get the decomposed matrixes (2-rank)
 #The first and second columns of u (4x2)
 u2 = u[:, 0:2]
 #The first and second columns of vt transposed (4x2)
 vt2 = vt.T[:, 0:2]
 #Finally the first two eigen-values (2x2)
 eig2 = np.diag(s.T[0:2])

 #Now let's come with Marcel, the new user.
 marcel = np.matrix([0, 3, 0, 4])
 marcel_2D = marcel * u2 * linalg.inv(eig2)

 #Compute the cosine similarity between Marcel and
 #every other User in our 2-D space
 #Perfect similarity = 1.0  No Similarity = 0.0
 users_2D = [line * u2 * linalg.inv(eig2)  for line in matrix.T]
 users_sim = np.array([1 - cosine(marcel_2D, user_2D).flatten()
                        for user_2D in users_2D]).flatten()

 # Remove all users who fall below the 0.90 cosine similarity
 #cutoff and sort by similarity
 #users_sim = users_sim[users_sim]
 most_similar = np.where(users_sim >= 0.9)
 most_similar_scores = users_sim[most_similar]
 most_similar_users = user_ids[most_similar]

 for userno, user_id in enumerate(most_similar_users):
    print "%s x Marcel: %.2f" % (user_id, most_similar_scores[userno])


 #For computing the recommendations we will use the strategy:
 # 1) Select the most similar user
 # 2) Compare all the items rated by this user against your own and select
 #     the items that you have not yet rated
 # 3) Return the ratings for items I have not yet seen,
 #     but the most similar user has rated.
 sorted_scores = np.lexsort((most_similar_scores,))[::-1]
 most_similar_user = most_similar_users[sorted_scores][0]

 recommendable_items = np.logical_xor(
                        matrix[np.where(user_ids == most_similar_user)],
                        marcel)

 #If is there any recommendations.
 print most_similar_user + ' recommends for Marcel:'

 if np.any(recommendable_items):
    preferences = matrix[np.where(user_ids ==  \
                most_similar_user)][recommendable_items].getA().flatten()
    item_ids = item_ids[recommendable_items.getA().flatten()]
    for index in range(preferences.size):
        print 'I gave for the movie %s the rating %d' \
        % (item_ids[index], preferences[index])
 else:
    print "All the movies you also watched."
	from scipy import linalg
	import numpy as np
	from scipy.spatial.distance import cosine

	#Let's define the matrix
	user_ids = np.array(['Amanda', 'Anna', 'Bruno', 'Ricardo'])
	item_ids = np.array(['Back to The Future', 'Conan',
	'Lord of the Rings', 'Star Wars'])

	matrix = np.matrix([
	#Amanda, Anna, Bruno, Ricardo
	[3, 4, 3, 1], #Back to The Future
	[1, 3, 2, 6], #Conan
	[2, 4, 1, 5], #Lord of The Rings
	[3, 3, 5, 2] #Star Wars
	])

	#Compute the SVD Decomposition
	u, s, vt = linalg.svd(matrix)

	#Now let's get the decomposed matrixes (2-rank)
	#The first and second columns of u (4x2)
	u2 = u[:, 0:2]
	#The first and second columns of vt transposed (4x2)
	vt2 = vt.T[:, 0:2]
	#Finally the first two eigen-values (2x2)
	eig2 = np.diag(s.T[0:2])

	#Now let's come with Marcel, the new user.
	marcel = np.matrix([0, 3, 0, 4])
	marcel_2D = marcel * u2 * linalg.inv(eig2)

	#Compute the cosine similarity between Marcel and
	#every other User in our 2-D space
	#Perfect similarity = 1.0 No Similarity = 0.0
	users_2D = [line * u2 * linalg.inv(eig2) for line in matrix.T]
	users_sim = np.array([1 - cosine(marcel_2D, user_2D).flatten()
	for user_2D in users_2D]).flatten()

	# Remove all users who fall below the 0.90 cosine similarity
	#cutoff and sort by similarity
	#users_sim = users_sim[users_sim]
	most_similar = np.where(users_sim >= 0.9)
	most_similar_scores = users_sim[most_similar]
	most_similar_users = user_ids[most_similar]

	for userno, user_id in enumerate(most_similar_users):
	print "%s x Marcel: %.2f" % (user_id, most_similar_scores[userno])


	#For computing the recommendations we will use the strategy:
	# 1) Select the most similar user
	# 2) Compare all the items rated by this user against your own and select
	# the items that you have not yet rated
	# 3) Return the ratings for items I have not yet seen,
	# but the most similar user has rated.
	sorted_scores = np.lexsort((most_similar_scores,))[::-1]
	most_similar_user = most_similar_users[sorted_scores][0]

	recommendable_items = np.logical_xor(
	matrix[np.where(user_ids == most_similar_user)],
	marcel)

	#If is there any recommendations.
	print most_similar_user + ' recommends for Marcel:'

	if np.any(recommendable_items):
	preferences = matrix[np.where(user_ids == \
	most_similar_user)][recommendable_items].getA().flatten()
	item_ids = item_ids[recommendable_items.getA().flatten()]
	for index in range(preferences.size):
	print 'I gave for the movie %s the rating %d' \
	% (item_ids[index], preferences[index])
	else:
	print "All the movies you also watched."