Last active
July 28, 2020 15:32
-
-
Save srikumarks/9a3908afefc948633cfaf773c858779d to your computer and use it in GitHub Desktop.
Freedman-Diaconis thumb rule for number of bins of a histogram
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// metric = array of real numbers (like > 100 or something) | |
// IQR = inter-quaartile-range | |
function numBins(metric, defaultBins) { | |
var h = binWidth(metric), ulim = Math.max.apply(Math, metric), llim = Math.min.apply(Math, metric); | |
if (h <= (ulim - llim) / metric.length) { | |
return defaultBins || 10; // Fix num bins if binWidth yields too small a value. | |
} | |
return Math.ceil((ulim - llim) / h); | |
} | |
function binWidth(metric) { | |
return 2 * iqr(metric) * Math.pow(metric.length, -1/3); | |
} | |
function iqr(metric) { | |
var sorted = metric.slice(0).sort(function (a, b) { return a - b; }); | |
var q1 = sorted[Math.floor(sorted.length / 4)]; | |
var q3 = sorted[Math.floor(sorted.length * 3 / 4)]; | |
return q3-q1; | |
} |
A python implementation of the same:
import math
def numBins(metric, defaultBins):
h = binWidth(metric)
ulim = max(metric)
llim = min(metric)
if (h <= (ulim - llim) / len(metric)):
return defaultBins or 10
return int(math.ceil((ulim - llim) / h))
def binWidth(metric):
return 2 * iqr(metric) * (len(metric) ** (-0.333))
def comparator(a, b):
return a - b
def iqr(metric):
metric[0:].sort(cmp=comparator)
q1 = metric[int(math.floor(len(metric) / 4))]
q3 = metric[int(math.floor(len(metric) * 3 / 4))]
return q3 - q1
metric = []
for x in range(0,1866):
metric.append(x)
print numBins(metric, 10)
Python 3 compatible:
import math
from functools import cmp_to_key
def numBins(metric, defaultBins):
h = binWidth(metric)
ulim = max(metric)
llim = min(metric)
if (h <= (ulim - llim) / len(metric)):
return defaultBins or 10
return int(math.ceil((ulim - llim) / h))
def binWidth(metric):
return 2 * iqr(metric) * (len(metric) ** (-0.333))
def comparator(a, b):
return a - b
def iqr(metric):
metric[0:].sort(key=cmp_to_key(comparator))
q1 = metric[int(math.floor(len(metric) / 4))]
q3 = metric[int(math.floor(len(metric) * 3 / 4))]
return q3 - q1
metric = []
for x in range(0,1866):
metric.append(x)
print(numBins(metric, 10))
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I used this as test script for your code: