Created
December 9, 2009 04:40
-
-
Save jgagner/252267 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'benchmark' | |
d1 = [1,1,1,1,0,0,0,0,0] | |
d2 = [1,0,1,0,1,1,0,0,0] | |
d3 = [0,0,0,0,1,0,1,1,1] | |
def calc_sim vector1, vector2 | |
a = dot_product vector1, vector2 | |
b = (normalize vector1) * (normalize vector2) | |
a/b | |
end | |
def dot_product vector1, vector2 | |
sum = 0.0 | |
vector1.each_with_index do | item, index| | |
sum += item * vector2[index] | |
end | |
sum | |
end | |
def normalize vector | |
sum = 0.0 | |
vector.each do | entry| | |
sum += entry**2 | |
end | |
Math.sqrt sum | |
end | |
puts "d1= #{d1.inspect}" | |
puts "d2= #{d2.inspect}" | |
puts "d3= #{d3.inspect}" | |
puts "d1 to d2 #{calc_sim d1,d2}" | |
puts "d1 to d3 #{calc_sim d1,d3}" | |
puts "d2 to d3 #{calc_sim d2,d3}" | |
puts "d1 to d1 #{calc_sim d1,d1}" | |
puts "doing bigass vector" | |
bigass1 = [] | |
bigass2 = [] | |
100000.times do |x| | |
bigass1 << rand(100) % 2 | |
bigass2 << rand(100) % 2 | |
end | |
puts "two #{bigass1.count} entry vectors created. comparing" | |
result1 = '' | |
result2 = '' | |
Benchmark.bmbm do |x| | |
x.report("bigass1 with bigass2") { result1 = calc_sim bigass1, bigass2 } | |
x.report("bigass2 with bigass1") { result2 = calc_sim bigass2, bigass1} | |
end | |
puts "done. ba1 to ba2 #{result1} ba2 to ba1 #{result2}" | |
# Output | |
# ruby cosine_similarity.rb | |
# d1= [1, 1, 1, 1, 0, 0, 0, 0, 0] | |
# d2= [1, 0, 1, 0, 1, 1, 0, 0, 0] | |
# d3= [0, 0, 0, 0, 1, 0, 1, 1, 1] | |
# d1 to d2 0.5 | |
# d1 to d3 0.0 | |
# d2 to d3 0.25 | |
# d1 to d1 1.0 | |
# doing bigass vector | |
# two 100000 entry vectors created. comparing | |
# Rehearsal -------------------------------------------------------- | |
# bigass1 with bigass2 0.270000 0.000000 0.270000 ( 0.271032) | |
# bigass2 with bigass1 0.270000 0.000000 0.270000 ( 0.273375) | |
# ----------------------------------------------- total: 0.540000sec | |
# | |
# user system total real | |
# bigass1 with bigass2 0.270000 0.000000 0.270000 ( 0.268135) | |
# bigass2 with bigass1 0.270000 0.010000 0.280000 ( 0.268873) | |
# done. ba1 to ba2 0.500861220264164 ba2 to ba1 0.500861220264164 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment