s = Sample.new(5,8) # Uses a default 95% confidence level, drawing 5,000 samples
s.call # Returns a histogram of 5 entries
s.mean # Returns the calculated mean between 5 and 8
s.sample_mean # Returns the mean of the sample data
s.sigma # Returns the standard deviation, calculated from the confidence interval
s1 = Sample.new(5, 8, confidence: 0.8) # Changes the confidence level to 80%
Sample.call(5,8) # Cuts to the chase, just returns the histogram
Last active
June 27, 2017 15:29
-
-
Save davidrichards/5fde3780947ed5ef101170b7692870eb to your computer and use it in GitHub Desktop.
Translate two confidence intervals into a mean, a standard deviation, and a histogram of sampled data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'histogram/array' | |
require 'croupier' | |
class Sample | |
def self.call(*args) | |
new(*args).call | |
end | |
attr_reader :a, :b, :opts | |
def initialize(a, b, opts={}) | |
@a, @b = [a, b].sort | |
@opts = opts.with_indifferent_access | |
end | |
def confidence | |
@confidence ||= opts.fetch(:confidence, 0.95) | |
end | |
def mean | |
@mean ||= (a + b) / 2.0 | |
end | |
# These are a rational Chebyshev approximation, a ratio of polynomials. | |
# Taken from the R implementation of qnorm. | |
# This is quite imprecise for numbers below 0.075 and above 0.925. | |
def qnorm(p) | |
q = p - 0.5 # Not right... | |
r = 0.180625 - q * q | |
val = | |
q * (((((((r * 2509.0809287301226727 + | |
33430.575583588128105) * r + 67265.770927008700853) * r + | |
45921.953931549871457) * r + 13731.693765509461125) * r + | |
1971.5909503065514427) * r + 133.14166789178437745) * r + | |
3.387132872796366608) / | |
(((((((r * 5226.495278852854561 + | |
28729.085735721942674) * r + 39307.89580009271061) * r + | |
21213.794301586595867) * r + 5394.1960214247511077) * r + | |
687.1870074920579083) * r + 42.313330701600911252) * r + 1.0) | |
val | |
end | |
def q | |
@q ||= qnorm(confidence) | |
end | |
def sigma | |
@sigma ||= (b - mean) / q | |
end | |
def distribution | |
@distribution ||= Croupier::Distributions.normal(mean: mean, std: sigma) | |
end | |
def n | |
@n ||= opts.fetch(:n, 5_000) | |
end | |
def samples | |
@samples ||= n.times.map { distribution.generate_number } | |
end | |
def slots | |
@slots ||= opts.fetch(:slots, 5) | |
end | |
def histogram | |
return @histogram if @histogram | |
x, y = samples.histogram(slots) | |
@histogram = Hash[x.zip(y)] | |
end | |
# Online approach to calculate mean, useful for reinforcement learning too. | |
def sample_mean | |
@sample_mean ||= samples.reduce do |mu, x| | |
(1 - (1.0 / n)) * mu + ((1.0 / n) * x) | |
end | |
end | |
def draw | |
distribution.generate_number | |
end | |
def z_score(x, mu, sigma) | |
(x - mu) / sigma | |
end | |
def draw_percentile | |
percentile_for(z_score(draw, sample_mean, sigma)) | |
end | |
def percentile_for(z) | |
return 0 if z < -6.5 | |
return 1 if z > 6.5 | |
fact_k = 1 | |
sum = 0 | |
term = 1 | |
k = 0 | |
loop_stop = Math.exp(-23) | |
while term.abs > loop_stop do | |
term = 0.3989422804 * ((-1)**k) * (z**k) / (2*k+1) / (2**k) * (z**(k+1)) /fact_k | |
sum += term | |
k += 1 | |
fact_k *= k | |
end | |
sum += 0.5 | |
1-sum | |
end | |
def call | |
histogram | |
end | |
def inspect | |
"#{self.class.name}: #{a}, #{b}, #{histogram}" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment