Skip to content

Instantly share code, notes, and snippets.

@sriranggd
Created January 3, 2011 00:06
Show Gist options
  • Save sriranggd/762951 to your computer and use it in GitHub Desktop.
Save sriranggd/762951 to your computer and use it in GitHub Desktop.
MongoDB Map-Reduce script for aggregating toppers for every group under different categories
I have a "users" collection with documents like this :
{
name : "Jack"
group: "Group-1"
total : 121
subScores : {
cat1 : 38
cat2 : 42
cat3 : 41
}
}
total is the total user's score
subScores are different category based scores
Now I want to list out the following on a weekly basis:
1) Top 10 users from each group
2) Top 10 users from each group in each category
I am thinking of having the results for both of these operations in a
single collection in this format :
{
group : "group-1"
sub : total
names : ["jack", "john", "foo", "bar".......]
}
{
group : "group-1"
sub : cat1
names : ["jill", "paul", "foo", "bar".......]
}
or in an aggregated form like this :
{
group : "group-1"
toppers : {
total : ["jack", "john", "foo", "bar".......]
cat1 : ["jill", "paul", "foo", "bar".......]
.......
.......
}
}
Note : The final results of the Map-Reduce are different. They are like this :
total_toppers collection has documents like this :
{
_id : "group-1"
value : {
students : [
{ name : "Jack", score : 157 }
{ name : "Jill", score : 154 }
...........
...........
...........
...........
]
}
}
subject_toppers have exactly similar documents, except that the _id field has values of the form "group-1::cat1"
var schools = [
"school-1",
"school-2",
"school-3",
"school-4",
"school-5",
"school-6",
"school-7",
"school-8",
"school-9",
"school-0"
];
function getSchool() {
return schools[Math.floor(Math.random() * 10)]
}
function getName() {
var chars = "ABCDEFGHIJKLMNOPQRSTUVWXTZabcdefghiklmnopqrstuvwxyz";
var string_length = 8;
var randomstring = '';
for (var i=0; i<string_length; i++) {
var rnum = Math.floor(Math.random() * chars.length);
randomstring += chars.substring(rnum,rnum+1);
}
return randomstring;
}
function getScores() {
var s1 = Math.floor(Math.random() * 50 + 1);
var s2 = Math.floor(Math.random() * 50 + 1);
var s3 = Math.floor(Math.random() * 50 + 1);
return new Array(s1, s2, s3, s1+s2+s3);
}
var mydb = db.getSisterDB("map_reduce");
print("Begin : " + Date.now() + "\n");
for(var i = 0; i < 50000; i++) {
s = getScores();
a = {
name : getName(),
school : getSchool(),
total : s[3],
subScores : {
math : s[0],
sci : s[1],
eng : s[2]
}
}
mydb.students.insert(a);
}
print("End : " + Date.now() + "\n");
function totalMap() {
emit(this.school, {students : [{name : this.name, score : this.total}]});
}
function subjectMap() {
for(var sub in this.subScores) {
emit(this.school + "::" + sub, {students : [{name : this.name, score : this.subScores[sub]}]});
}
}
function reduce(school, values) {
toppers = [];
length = values.length;
for (var i = 0; i < length; i++) {
stLen = values[i].students.length;
for(var j = 0; j < stLen; j++) {
if (toppers.length == 10) {
if (values[i].students[j].score > toppers[9].score)
toppers[9] = values[i].students[j];
} else {
toppers.push(values[i].students[j]);
}
toppers.sort(function(a, b) { return b.score - a.score; });
}
}
// Cannot return an array at the top level, so had to make it an object
return {students : toppers};
}
var mydb = db.getSisterDB("map_reduce");
var start = Date.now();
var end = 0;
print("Begin : " + start);
try {
res = mydb.runCommand({mapReduce : "students", map : totalMap, reduce : reduce, verbose : true, out : "total_toppers"});
res = mydb.runCommand({mapReduce : "students", map : subjectMap, reduce : reduce, verbose : true, out : "subject_toppers"});
printjson(res);
end = Date.now();
print("Success : End : " + end);
} catch (ex) {
printjson(ex);
end = Date.now();
print("With exception : End : " + end);
}
print("Time taken in milliseconds = " + (end - start));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment