Skip to content

Instantly share code, notes, and snippets.

@srikumarks
Last active July 28, 2020 15:32
Show Gist options
  • Save srikumarks/9a3908afefc948633cfaf773c858779d to your computer and use it in GitHub Desktop.
Save srikumarks/9a3908afefc948633cfaf773c858779d to your computer and use it in GitHub Desktop.
Freedman-Diaconis thumb rule for number of bins of a histogram
// metric = array of real numbers (like > 100 or something)
// IQR = inter-quaartile-range
function numBins(metric, defaultBins) {
var h = binWidth(metric), ulim = Math.max.apply(Math, metric), llim = Math.min.apply(Math, metric);
if (h <= (ulim - llim) / metric.length) {
return defaultBins || 10; // Fix num bins if binWidth yields too small a value.
}
return Math.ceil((ulim - llim) / h);
}
function binWidth(metric) {
return 2 * iqr(metric) * Math.pow(metric.length, -1/3);
}
function iqr(metric) {
var sorted = metric.slice(0).sort(function (a, b) { return a - b; });
var q1 = sorted[Math.floor(sorted.length / 4)];
var q3 = sorted[Math.floor(sorted.length * 3 / 4)];
return q3-q1;
}
@swateek
Copy link

swateek commented Jul 28, 2017

I used this as test script for your code:

var metric = [];
  for(var i=0; i<1866; i++){
      metric.push(i);
  }
console.log(numBins(metric, 10));

@swateek
Copy link

swateek commented Jul 28, 2017

A python implementation of the same:

import math

def numBins(metric, defaultBins):
	h = binWidth(metric)
	ulim = max(metric)
	llim = min(metric)
	if (h <= (ulim - llim) / len(metric)):
		return defaultBins or 10
	return int(math.ceil((ulim - llim) / h))

def binWidth(metric):
	return 2 * iqr(metric) * (len(metric) ** (-0.333))

def comparator(a, b):
	return a - b

def iqr(metric):
	metric[0:].sort(cmp=comparator)
	q1 = metric[int(math.floor(len(metric) / 4))]
	q3 = metric[int(math.floor(len(metric) * 3 / 4))]
	return q3 - q1


metric = []

for x in range(0,1866):
	metric.append(x)

print numBins(metric, 10)

@tjbanks
Copy link

tjbanks commented Apr 27, 2020

Python 3 compatible:

import math
from functools import cmp_to_key

def numBins(metric, defaultBins):
	h = binWidth(metric)
	ulim = max(metric)
	llim = min(metric)
	if (h <= (ulim - llim) / len(metric)):
		return defaultBins or 10
	return int(math.ceil((ulim - llim) / h))

def binWidth(metric):
	return 2 * iqr(metric) * (len(metric) ** (-0.333))

def comparator(a, b):
	return a - b

def iqr(metric):
	metric[0:].sort(key=cmp_to_key(comparator))
	q1 = metric[int(math.floor(len(metric) / 4))]
	q3 = metric[int(math.floor(len(metric) * 3 / 4))]
	return q3 - q1


metric = []

for x in range(0,1866):
	metric.append(x)

print(numBins(metric, 10))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment