Last active
May 23, 2018 15:52
-
-
Save berdosi/a27f980b24261bf34e43c30948cd36e4 to your computer and use it in GitHub Desktop.
Calculate the Jaccard similarity index between two strings. Strings are treated as sets of words, and duplicate words are removed. (https://en.wikipedia.org/wiki/Jaccard_index)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function getJaccardSimilarity(item, otherItem) { | |
function makeUnique(prev, current, index) { | |
return (index === 1 | |
? [prev].concat(prev !== current ? current : []) // handle when first two items are identical | |
: (prev.indexOf(current) > -1 | |
? prev | |
: (prev.push(current), prev))) | |
} | |
const union = [].concat(item.split(/\s+/)).concat(otherItem.split(/\s+/)).reduce(makeUnique); | |
const otherUnique = otherItem.split(/\s+/).reduce(makeUnique); | |
const intersection = item.split(/\s+/).reduce(makeUnique).filter((word) => otherUnique.indexOf(word) > -1); | |
return intersection.length / union.length; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment