Skip to content

Instantly share code, notes, and snippets.

@jimfleming
Created August 15, 2012 15:44
Show Gist options
  • Save jimfleming/3361107 to your computer and use it in GitHub Desktop.
Save jimfleming/3361107 to your computer and use it in GitHub Desktop.
Fuzzy string comparison
# Based on: http://seatgeek.com/blog/dev/fuzzywuzzy-fuzzy-string-matching-in-python
_ = require 'underscore'
token_regex = /\w+/g
trim_regex = /^\s+|\s+$/g
levenshtein_dist = (a, b) ->
throw new TypeError('a is empty') unless _.isString(a)
throw new TypeError('b is empty') unless _.isString(b)
lensum = a.length + b.length
d = [e = 0]
while a[e]
c = [f = 0]
while b[++f]
g = d[f] = (if e then 1 + Math.min(d[--f], c[f] - (a[e - 1] is b[f]), c[++f] = d[f]) else f)
e++
return (lensum - g) / lensum
token_set_ratio = (s1, s2) ->
throw new TypeError('s1 is empty') unless _.isString(s1)
throw new TypeError('s2 is empty') unless _.isString(s2)
tokens1 = s1.match token_regex
tokens2 = s2.match token_regex
intersection = _.intersection tokens1, tokens2
diff_1to2 = _.difference tokens1, tokens2
diff_2to1 = _.difference tokens2, tokens1
sorted_sect = _.sortBy intersection, (word) ->
return word
.join ' '
.replace trim_regex, ''
sorted_1to2 = _.sortBy diff_1to2, (word) ->
return word
.join ' '
.replace trim_regex, ''
sorted_2to1 = _.sortBy diff_2to1, (word) ->
return word
.join ' '
.replace trim_regex, ''
combined_1to2 = "#{sorted_sect} #{sorted_1to2}".replace trim_regex, ''
combined_2to1 = "#{sorted_sect} #{sorted_2to1}".replace trim_regex, ''
pairwise = [
levenshtein_dist sorted_sect, combined_1to2
levenshtein_dist sorted_sect, combined_2to1
levenshtein_dist combined_1to2, combined_2to1
]
return _.max pairwise
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment