Created
January 10, 2016 00:51
-
-
Save Sammons/c618751890d46735cbe4 to your computer and use it in GitHub Desktop.
thinking about heuristic scoring for words against strings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var pretend_db = [ | |
'hello guys lets hang out soon', | |
'hello ben', | |
'what is up?', | |
'hey when are you coming to the bay?', | |
'yo', | |
'when are we getting together?', | |
'hig', | |
'oh noes', | |
'whenever yuo thnk is gud', | |
'whatever', | |
'yeah' | |
]; | |
function fuzzy_score(search, data) { | |
var score = 0; | |
var dataTokensChars = data.split(/\s+/gm).map(function(token) { return token.split(''); }); | |
return search.split(/\s+/gm).map(function(token) { | |
return token.split(''); | |
}).map(function(searchChars) { | |
var result = dataTokensChars.map(function(dataChars) { | |
var lastHighest = 0; | |
var matches = 0; | |
var subSequentMatches = 0; | |
searchChars.forEach(function(c) { | |
var index = dataChars.indexOf(c); | |
if (index >= 0) { | |
matches += 1; | |
if (index > lastHighest) { | |
lastHighest = index; | |
subSequentMatches += 1; | |
} | |
} | |
}) | |
return { seqMatches: subSequentMatches, matches: matches }; | |
}).reduce(function(stats, cur) { | |
stats.seqMatches += cur.seqMatches; | |
stats.matches += cur.matches; | |
return stats; | |
}); | |
return result; | |
}).reduce(function(stats, cur) { | |
stats.seqMatches += cur.seqMatches; | |
stats.matches += cur.matches; | |
return stats; | |
}); | |
} | |
console.log(pretend_db.map(function(data) { console.log(fuzzy_score("whenver you think", data), data); })) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment