Last active
October 28, 2020 16:36
-
-
Save IceCreamYou/51f3f38937f9bdf999db to your computer and use it in GitHub Desktop.
Put numbers into buckets either by bucket size or range size.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Utility method to round numbers to a given number of decimal places. | |
* | |
* Usage: | |
* 3.5.round(0) // 4 | |
* Math.random().round(4) // 0.8179 | |
* var a = 5532; a.round(-2) // 5500 | |
* Number.prototype.round(12345.6, -1) // 12350 | |
* 32..round(-1) // 30 (two dots required since the first one is a decimal) | |
*/ | |
Number.prototype.round = function(v, a) { | |
if (typeof a === 'undefined') { | |
a = v; | |
v = this; | |
} | |
if (!a) a = 0; | |
var m = Math.pow(10,a|0); | |
return Math.round(v*m)/m; | |
}; | |
/** | |
* Put numbers into buckets that have equal-size ranges. | |
* | |
* @param {Number[]} data The data to bucket. | |
* @param {Number} bucketCount The number of buckets to use. | |
* @param {Number} [min] The minimum allowed data value. Defaults to the smallest value passed. | |
* @param {Number} [max] The maximum allowed data value. Defaults to the largest value passed. | |
* | |
* @return {Number[][]} An array of buckets of numbers. | |
*/ | |
function bucketNumbersLinearly(data, bucketCount, min, max) { | |
var i = 0, l = data.length; | |
// If min and max are given, set them to the highest and lowest data values | |
if (typeof min === 'undefined') { | |
min = Infinity; | |
max = -Infinity; | |
for (i = 0; i < l; i++) { | |
if (data[i] < min) min = data[i]; | |
if (data[i] > max) max = data[i]; | |
} | |
} | |
var inc = (max - min) / bucketCount, | |
buckets = new Array(bucketCount); | |
// Initialize buckets | |
for (i = 0; i < bucketCount; i++) { | |
buckets[i] = []; | |
} | |
// Put the numbers into buckets | |
for (i = 0; i < l; i++) { | |
// Buckets include the lower bound but not the higher bound, except the top bucket | |
if (data[i] === max) buckets[bucketCount-1].push(data[i]); | |
else buckets[((data[i] - min) / inc) | 0].push(data[i]); | |
} | |
return buckets; | |
} | |
/** | |
* Put numbers into equal-sized buckets. | |
* | |
* @param {Number[]} data The data to bucket. | |
* @param {Number} bucketCount The number of buckets to use. | |
* | |
* @return {Object} An object with two properties, `buckets` and `averages`. | |
* The `buckets` property holds an array of buckets of numbers, and the | |
* `averages` property holds an array of the average value of each bucket. | |
*/ | |
function populationBuckets(data, bucketCount) { | |
var i = 0, | |
inc = (data.length / bucketCount) | 0, | |
buckets = new Array(bucketCount), | |
averages = new Array(bucketCount); | |
data.sort(function(a, b) { | |
return a - b; | |
}); | |
for (i = 0; i < bucketCount; i++) { | |
var subset = data.slice(i*inc, (i+1)*inc); | |
buckets[i] = subset; | |
var sum = 0, bl = subset.length; | |
for (var j = 0; j < bl; j++) { | |
sum += subset[j]; | |
} | |
averages[i] = sum / bl; | |
} | |
return { buckets: buckets, averages: averages }; | |
} | |
/** | |
* Takes an array of buckets of numbers and returns an array of summary statistics for each bucket. | |
*/ | |
function showResults(buckets) { | |
var results = []; | |
for (var i = 0, l = buckets.length; i < l; i++) { | |
var minV = Infinity, maxV = -Infinity, sum = 0, bl = buckets[i].length; | |
for (var j = 0; j < bl; j++) { | |
if (buckets[i][j] < minV) { | |
minV = buckets[i][j]; | |
} | |
if (buckets[i][j] > maxV) { | |
maxV = buckets[i][j]; | |
} | |
sum += buckets[i][j]; | |
} | |
results.push({bucket: i+1, min: minV.round(4), max: maxV.round(4), avg: (sum / bl).round(4), count: bl}); // min, max, avg, count | |
} | |
return results; | |
} | |
/** | |
* Generates an array of `numValues` numbers between `min` and `max`. | |
* | |
* There's no particular reason numbers have to be uniformly distributed in a specific range; | |
* it's just easiest to verify the results if that's the case. | |
*/ | |
function getTestData(numValues, min, max) { | |
if (typeof max === 'undefined') max = 1; | |
if (typeof min === 'undefined') min = 0; | |
var data = new Array(numValues), range = max - min; | |
for (var i = 0; i < numValues; i++) { | |
data[i] = Math.random() * range + min; | |
} | |
return data; | |
} | |
// Test that numbers are bucketed as expected. Displays more nicely on Firefox. | |
var table = console.table || console.log; | |
console.log('Linear'); | |
console.table(showResults(bucketNumbersLinearly(getTestData(8000), 8, 0, 1))); | |
console.log('Population'); | |
console.table(showResults(populationBuckets(getTestData(8000), 8).buckets)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment