Skip to content

Instantly share code, notes, and snippets.

@nicolasbrugneaux
Last active September 21, 2015 14:41
Show Gist options
  • Save nicolasbrugneaux/ec104e633da5822beff3 to your computer and use it in GitHub Desktop.
Save nicolasbrugneaux/ec104e633da5822beff3 to your computer and use it in GitHub Desktop.
fuzzy-string-matching-using-cosine-similarity
// http://blog.nishtahir.com/2015/09/19/fuzzy-string-matching-using-cosine-similarity/
'use strict';
Set.intersection = function*( set1, set2 )
{
for ( let value of set1.values() )
{
if ( set2.has( value ) )
{
yield value;
}
}
};
const getTermFrequencyMap = ( terms ) =>
{
const map = new Map();
for ( let term of terms )
{
const n = map.get( term );
if ( n === undefined )
{
map.set( term, 1 );
}
else
{
map.set( term, n + 1 );
}
}
return map;
};
const cosineSimilarity = (text1, text2) =>
{
const a = getTermFrequencyMap(text1);
const b = getTermFrequencyMap(text2);
const s1 = new Set(a.keys());
const s2 = new Set(b.keys());
let dotProduct = 0;
let magnitudeA = 0;
let magnitudeB = 0;
for ( let item of Set.intersection( s1, s2 ) )
{
dotProduct += a.get( item ) * b.get( item );
}
for ( let k of a.keys() )
{
magnitudeA += Math.pow( a.get( k ), 2 );
}
for ( let k of b.keys() )
{
magnitudeB += Math.pow( b.get( k ), 2 );
}
return dotProduct / Math.sqrt( magnitudeA * magnitudeB );
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment