Created
January 3, 2011 00:06
-
-
Save sriranggd/762951 to your computer and use it in GitHub Desktop.
MongoDB Map-Reduce script for aggregating toppers for every group under different categories
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
I have a "users" collection with documents like this : | |
{ | |
name : "Jack" | |
group: "Group-1" | |
total : 121 | |
subScores : { | |
cat1 : 38 | |
cat2 : 42 | |
cat3 : 41 | |
} | |
} | |
total is the total user's score | |
subScores are different category based scores | |
Now I want to list out the following on a weekly basis: | |
1) Top 10 users from each group | |
2) Top 10 users from each group in each category | |
I am thinking of having the results for both of these operations in a | |
single collection in this format : | |
{ | |
group : "group-1" | |
sub : total | |
names : ["jack", "john", "foo", "bar".......] | |
} | |
{ | |
group : "group-1" | |
sub : cat1 | |
names : ["jill", "paul", "foo", "bar".......] | |
} | |
or in an aggregated form like this : | |
{ | |
group : "group-1" | |
toppers : { | |
total : ["jack", "john", "foo", "bar".......] | |
cat1 : ["jill", "paul", "foo", "bar".......] | |
....... | |
....... | |
} | |
} | |
Note : The final results of the Map-Reduce are different. They are like this : | |
total_toppers collection has documents like this : | |
{ | |
_id : "group-1" | |
value : { | |
students : [ | |
{ name : "Jack", score : 157 } | |
{ name : "Jill", score : 154 } | |
........... | |
........... | |
........... | |
........... | |
] | |
} | |
} | |
subject_toppers have exactly similar documents, except that the _id field has values of the form "group-1::cat1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var schools = [ | |
"school-1", | |
"school-2", | |
"school-3", | |
"school-4", | |
"school-5", | |
"school-6", | |
"school-7", | |
"school-8", | |
"school-9", | |
"school-0" | |
]; | |
function getSchool() { | |
return schools[Math.floor(Math.random() * 10)] | |
} | |
function getName() { | |
var chars = "ABCDEFGHIJKLMNOPQRSTUVWXTZabcdefghiklmnopqrstuvwxyz"; | |
var string_length = 8; | |
var randomstring = ''; | |
for (var i=0; i<string_length; i++) { | |
var rnum = Math.floor(Math.random() * chars.length); | |
randomstring += chars.substring(rnum,rnum+1); | |
} | |
return randomstring; | |
} | |
function getScores() { | |
var s1 = Math.floor(Math.random() * 50 + 1); | |
var s2 = Math.floor(Math.random() * 50 + 1); | |
var s3 = Math.floor(Math.random() * 50 + 1); | |
return new Array(s1, s2, s3, s1+s2+s3); | |
} | |
var mydb = db.getSisterDB("map_reduce"); | |
print("Begin : " + Date.now() + "\n"); | |
for(var i = 0; i < 50000; i++) { | |
s = getScores(); | |
a = { | |
name : getName(), | |
school : getSchool(), | |
total : s[3], | |
subScores : { | |
math : s[0], | |
sci : s[1], | |
eng : s[2] | |
} | |
} | |
mydb.students.insert(a); | |
} | |
print("End : " + Date.now() + "\n"); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function totalMap() { | |
emit(this.school, {students : [{name : this.name, score : this.total}]}); | |
} | |
function subjectMap() { | |
for(var sub in this.subScores) { | |
emit(this.school + "::" + sub, {students : [{name : this.name, score : this.subScores[sub]}]}); | |
} | |
} | |
function reduce(school, values) { | |
toppers = []; | |
length = values.length; | |
for (var i = 0; i < length; i++) { | |
stLen = values[i].students.length; | |
for(var j = 0; j < stLen; j++) { | |
if (toppers.length == 10) { | |
if (values[i].students[j].score > toppers[9].score) | |
toppers[9] = values[i].students[j]; | |
} else { | |
toppers.push(values[i].students[j]); | |
} | |
toppers.sort(function(a, b) { return b.score - a.score; }); | |
} | |
} | |
// Cannot return an array at the top level, so had to make it an object | |
return {students : toppers}; | |
} | |
var mydb = db.getSisterDB("map_reduce"); | |
var start = Date.now(); | |
var end = 0; | |
print("Begin : " + start); | |
try { | |
res = mydb.runCommand({mapReduce : "students", map : totalMap, reduce : reduce, verbose : true, out : "total_toppers"}); | |
res = mydb.runCommand({mapReduce : "students", map : subjectMap, reduce : reduce, verbose : true, out : "subject_toppers"}); | |
printjson(res); | |
end = Date.now(); | |
print("Success : End : " + end); | |
} catch (ex) { | |
printjson(ex); | |
end = Date.now(); | |
print("With exception : End : " + end); | |
} | |
print("Time taken in milliseconds = " + (end - start)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment