Created
December 12, 2013 07:41
-
-
Save edisonqkj/7924482 to your computer and use it in GitHub Desktop.
Hashing for Cosine Similarity
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%% Hashing for Cosine Similarity | |
% This m file is written for the learning of Locality Sensitive Hashing. | |
% Enjoy! Any problem is welcome! | |
% | |
% Time: 2013/12/12 | |
% Released by: edisonqkj | |
% E-mail: [email protected] | |
% Referrence: | |
% http://www.cs.jhu.edu/~vandurme/papers/VanDurmeLallACL10-slides.pdf | |
% https://gist.github.com/greeness/94a3d425009be0f94751 | |
% | |
function [h,t]=LSH(dimension,num_plane,isdraw) | |
clc; | |
%% Main | |
% dimension=2; | |
% num_plane=2^10; | |
sample1=randn(1,dimension); | |
sample2=randn(1,dimension); | |
proj_plane=randn(num_plane,dimension); | |
[res1]=signature(sample1,proj_plane); | |
[res2]=signature(sample2,proj_plane); | |
[hash_sim]=hash_similarity(res1,res2); | |
[true_sim]=angular_similarity(sample1,sample2); | |
h=hash_sim; | |
t=true_sim; | |
% disp(['Hash Similarity ' 'True Similarity ' 'Diff ']); | |
% disp([num2str(h) ' ' num2str(t) ' ' num2str(abs(h-t))]); | |
%% Draw 2D similarity results | |
% Variant 'dimension' needs 2. | |
% Classify all the planes by signatures of sample1 and sample2 | |
% and colorize them respectively. | |
% Red: both samples share the same signature value of 1. | |
% Green: sample1's signature value equals to 1 but 0 of sample2. | |
% Blue: not 'Green' | |
% Black: none belongs to both samples. | |
if isdraw | |
x=proj_plane(:,1); | |
y=proj_plane(:,2); | |
% sample1: signature=1 | |
% sample2: signature=0 | |
pos1=logical((res1==1).*(res2==0)); | |
plot(x(pos1),y(pos1),'g.'); | |
legend_info{1}=['Sample1']; | |
hold on; | |
% sample1: signature=0 | |
% sample2: signature=1 | |
pos2=logical((res2==1).*(res1==0)); | |
plot(x(pos2),y(pos2),'.'); | |
legend_info{2}=['Sample2']; | |
% sample1: signature=1 | |
% sample2: signature=1 | |
pos3=logical((res2==1).*(res1==1)); | |
plot(x(pos3),y(pos3),'r.'); | |
legend_info{3}=['Shared']; | |
% sample1: signature=0 | |
% sample2: signature=0 | |
pos4=logical((res2==0).*(res1==0)); | |
plot(x(pos4),y(pos4),'k.'); | |
legend_info{4}=['None']; | |
% plot sample1,2 | |
plot([0,sample1(1)],[0,sample1(2)],'g-','LineWidth',3); | |
plot([0,sample2(1)],[0,sample2(2)],'b-','LineWidth',3); | |
plot(sample1(1),sample1(2),'go','MarkerSize',12); | |
plot(sample2(1),sample2(2),'bo','MarkerSize',12); | |
hold off; | |
grid on; | |
legend(legend_info); | |
title(['Hash Similarity: ' num2str(h) ' True Similarity: ' num2str(t)]); | |
end | |
end | |
function [res]=signature(sample,planes) | |
row=size(planes,1); | |
for i=1:row | |
% dot product of sample and planes(i) | |
if sample*planes(i,:)'>=0 | |
res(i)=1; | |
else | |
res(i)=0; | |
end | |
end | |
end | |
function [res]=hash_similarity(v1,v2) | |
col=size(v1,2); | |
res=1.0*sum(v1==v2)/col; | |
end | |
function [res]=angular_similarity(v1,v2) | |
dot=v1*v2'; | |
sum1=sqrt(sum(v1.*v1)); | |
sum2=sqrt(sum(v2.*v2)); | |
theta=acos(dot/(sum1*sum2)); | |
res=1-theta/pi; | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment