Created
October 18, 2009 12:31
-
-
Save jstorimer/212661 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'gsl' | |
users = { 1 => "Ben", 2 => "Tom", 3 => "John", 4 => "Fred" } | |
m = GSL::Matrix[ | |
#Ben, Tom, John, Fred | |
[5,5,0,5], # season 1 | |
[5,0,3,4], # season 2 | |
[3,4,0,3], # season 3 | |
[0,0,5,3], # season 4 | |
[5,4,4,5], # season 5 | |
[5,4,5,5] # season 6 | |
] | |
# Compute the SVD Decomposition | |
u, vt, s = m.SV_decomp | |
s = GSL::Matrix.diagonal(s) | |
# Take the 2-rank approximation of the Matrix | |
# - Take first and second columns of u (6x2) | |
# - Take first and second columns of vt (4x2) | |
# - Take the first two eigen-values (2x2) | |
u2 = GSL::Matrix[u.column(0), u.column(1)] | |
v2 = GSL::Matrix[vt.column(0), vt.column(1)] | |
eig2 = GSL::Matrix[s.column(0).to_a.flatten[0,2], s.column(1).to_a.flatten[0,2]] | |
# Here comes Bob, our new user | |
bob = GSL::Matrix[[5,5,0,0,0,5]] | |
bobEmbed = bob * u2 * eig2.inv | |
# Compute the cosine similarity between Bob and every other User in our 2-D space | |
user_sim, count = {}, 1 | |
v2.each_row { |x| | |
user_sim[count] = (GSL::Vector[bobEmbed[0],bobEmbed[1]].dot(x)) / (x.norm * bobEmbed.norm) | |
# user_sim[count] = (bobEmbed[0].dot(x)) / (x.norm * bobEmbed.norm) | |
count += 1 | |
} | |
# Remove all users who fall below the 0.90 cosine similarity cutoff and sort by similarity | |
similar_users = user_sim.delete_if {|k,sim| sim < 0.9 }.sort {|a,b| b[1] <=> a[1] } | |
similar_users.each { |u| printf "%s (ID: %d, Similarity: %0.3f) \n", users[u[0]], u[0], u[1] } | |
# We'll use a simple strategy in this case: | |
# 1) Select the most similar user | |
# 2) Compare all items rated by this user against your own and select items that you have not yet rated | |
# 3) Return the ratings for items I have not yet seen, but the most similar user has rated | |
similarUsersItems = m.column(similar_users[0][0]-1).transpose.to_a.flatten | |
myItems = bob.transpose.to_v | |
not_seen_yet = {} | |
myItems.each_index { |i| | |
not_seen_yet[i+1] = similarUsersItems[i] if myItems[i] == 0 and similarUsersItems[i] != 0 | |
} | |
printf "\n %s recommends: \n", users[similar_users[0][0]] | |
not_seen_yet.sort {|a,b| b[1] <=> a[1] }.each { |item| | |
printf "\tSeason %d .. I gave it a rating of %d \n", item[0], item[1] | |
} | |
print "We've seen all the same seasons, bugger!" if not_seen_yet.size == 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment