Last active
February 12, 2019 16:42
-
-
Save kocolosk/a8b72d575ea6acc8279ac8aba908df26 to your computer and use it in GitHub Desktop.
View to retrieve list of top 10 partitions by document count (always includes an extra 2 partitions as an implementation artifact)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function (doc) { | |
var partition = doc._id.slice(0, doc._id.indexOf(':')) | |
emit(partition, 1); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function (keys, values, rereduce) { | |
var topTenPlusBoundaryKeys = function(partitions) { | |
// preserve boundary keys because we may not have the correct count for them yet | |
// not that it matters, but the array is reversed so these labels are correct | |
var first = partitions.pop(); | |
var last = partitions.shift(); | |
// sort the remaining entries by value | |
partitions.sort(function(p1, p2) { return p2.count - p1.count; }); | |
// return the top ten partitions, plus the boundary partitions, all sorted | |
var topTen = partitions.slice(0, 10); | |
if(first) { topTen.push(first) }; | |
if(last) { topTen.push(last) }; | |
topTen.sort(function(p1, p2) { return p2.count - p1.count; }); | |
return topTen; | |
}; | |
if (rereduce) { | |
// account for boundary keys by summing over each partition | |
var totals = values.reduce(function(acc, currentVals) { | |
currentVals.forEach(function(elem) { | |
if(acc[elem.partition]) { | |
acc[elem.partition] += elem.count; | |
} else { | |
acc[elem.partition] = elem.count; | |
} | |
}); | |
return acc; | |
}, {}); | |
// convert back into an Array with expected structure | |
var reduced = []; | |
for (var elem in totals) { | |
reduced.push({partition: elem, count: totals[elem]}) | |
}; | |
// sort in reverse order just to stay consistent with rereduce=false | |
// again, all that's required is to find the boundary keys | |
reduced.sort(function(p1, p2) { | |
if(p2.partition < p1.partition) { | |
return -1; | |
} | |
if(p1.partition < p2.partition) { | |
return 1; | |
} | |
return 0; | |
}); | |
return topTenPlusBoundaryKeys(reduced); | |
} | |
else { | |
// compute the number of index entries per partition | |
var reduced = keys.reduce(function(output, currentKey, index) { | |
if(currentKey[0] == output[0].partition) { | |
output[0].count += values[index] | |
} | |
else { | |
output.unshift({partition: currentKey[0], count: values[index]}) | |
} | |
return output; | |
}, [{partition: keys[0][0], count: 0}]); | |
return topTenPlusBoundaryKeys(reduced); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment