Created
June 8, 2012 14:51
-
-
Save darcy/2896009 to your computer and use it in GitHub Desktop.
MongoDB Trigram Search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// THIS SHOULD WORK AS COPY/PASTE into mongo console | |
var games = ["here is a test", "small test", "another game"]; | |
var query = 'here is testing a test'; | |
////////////////////////////////////////////// | |
// 1. util functions | |
var intersect = function(arr1, arr2) { | |
var r = [], o = {}, l = arr2.length, i, v; | |
for (i = 0; i < l; i++) { | |
o[arr2[i]] = true; | |
} | |
l = arr1.length; | |
for (i = 0; i < l; i++) { | |
v = arr1[i]; | |
if (v in o) { | |
r.push(v); | |
} | |
} | |
return r; | |
}; | |
var trigramize = function(s) { | |
//normalize string | |
var w = ' '+s.toLowerCase().trim(); | |
//trigramize it | |
var tg = []; | |
for (var i=0; i<Math.min(s.length,20); i++) { | |
tg.push(w.substring(i,i+3)); | |
} | |
// get the unique trigrams | |
// (http://www.shamasis.net/2009/09/fast-algorithm-to-find-unique-items-in-javascript-array/) | |
var o = {}, i, l = tg.length, r = []; | |
for(i=0; i<l;i+=1) o[tg[i]] = tg[i]; | |
for(i in o) r.push(o[i]); | |
return r; | |
} | |
// 2. create games | |
db.foo.drop(); | |
games.forEach(function(g) { | |
db.foo.save({name : g, trigrams : trigramize(g) }); | |
}); | |
// 3. search function | |
var search = function(s) { | |
var map = function() { | |
emit(this, { trigram_match : intersect(this.trigrams, trigrams).length }); | |
}; | |
var reduce=function(key, values) { | |
return values; | |
} | |
var trigram_query = trigramize(s); | |
var res_name = ObjectId().toString(); | |
var res = db.foo.mapReduce(map,reduce,{ | |
out: res_name, // we could make this inline so our results aren't global, however then you don't have collection functions like sort or limit, etc | |
query: {trigrams : {$in :trigram_query}}, // filter out only trigram 'hits' | |
scope : { | |
trigrams : trigram_query, // pass our query param to map/reduce | |
intersect : intersect // pass the intersect function helper | |
} | |
}); | |
// res.find(); | |
var results = db[res_name].find().sort({value: -1}).limit(20).toArray(); //map(function(t) { return t._id}); | |
db[res_name].drop(); | |
return results; | |
} | |
search(query); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment