Created
November 14, 2016 20:54
-
-
Save willf/91a6cabfc16a3829f6d60467a1814a33 to your computer and use it in GitHub Desktop.
Basic descriptive statistics using Ruby
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The _percentile_sorted function is adapted from similar code | |
# released under the Academic Free License, 3.0, written by | |
# Derrick Pallas, which used to be available at http://derrick.pallas.us/ruby-stats/ | |
# it fixes a bug that can occur if the list of numbers is small. | |
# Other code more or less from my [first ever gist](https://gist.github.com/willf/187846) | |
def _percentile_sorted(p, ns) | |
r = p / 100.00 * (ns.size + 1) | |
i, fr = r.divmod(1) | |
(fr==0.0 || i >= ns.size) ? ns[i-1] : ns[i-1] + (fr * (ns[i]-ns[i-1])) | |
end | |
# This requires a O(n log n) sort of the items in the array | |
# Plus a one time pass through the array | |
# and a one time pass through the distinct values of the array | |
def stats(ns, include_histogram=true) | |
ns = ns.sort | |
len = ns.size | |
histogram = Hash.new(0) | |
n, mean, m2, sum = ns.inject([0,0.0,0.0,0.0]) do | |
|acc, r| | |
n = acc[0]+1 | |
delta = r-acc[1] | |
mean = acc[1] + (delta/n) | |
m2 = acc[2] + (delta * (r-mean)) | |
sum = acc[3] + r | |
histogram[r]+=1 | |
[n, mean, m2, sum] | |
end | |
var = m2/n | |
max_count = histogram.values.max | |
{ | |
n: len, | |
mean: mean, | |
variance: var, | |
stddev: Math.sqrt(var), | |
sum: sum, | |
min: ns.first, | |
max: ns.last, | |
histogram: (include_histogram ? histogram : {}), | |
modes: include_histogram ? histogram.select{|x| histogram[x]==max_count}: {}, | |
median: (len % 2 == 1) ? ns[len/2] : (ns[len/2]+(ns[len/2-1])/2), | |
percentiles: [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99, 99.9].inject({}){|h,x| h[x] = _percentile_sorted(x, ns); h}, | |
quartiles: [25, 50, 75].inject({}){|h,x| h[x] = _percentile_sorted(x, ns); h} | |
} | |
end | |
# prng = Random.new | |
# puts stats((1..10000).map{|_| prng.rand(6)+1 + prng.rand(6)+1}) | |
# | |
# puts stats((1..10000).map{|_| prng.rand(1.0)}, include_histogram=false) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment