Skip to content

Instantly share code, notes, and snippets.

@envp
Last active January 29, 2016 12:14
Show Gist options
  • Select an option

  • Save envp/820f077d229bad4f467f to your computer and use it in GitHub Desktop.

Select an option

Save envp/820f077d229bad4f467f to your computer and use it in GitHub Desktop.
lib/distribution/binomial/ruby.rb
module Distribution
module Binomial
module Ruby_
class << self
# Returns a `Proc`object that yields a random integer `k` <= `n`
# which is binomially distributed with mean np and variance np(1-p)
# This method uses Luc Devroye's "Rejection Method" from page 533
# of his text: "Non-Uniform Random Variate Generation."
#
# @param n [Fixnum] the total number of trials
# @param prob [Float] probabilty of success in a single independant trial
# @param seed [Fixnum, nil] Value to initialize the generator with.
# The seed is always taken modulo 100_000_007. If omitted the value is
# remainder of `Random.new_seed` modulo 100_000_007
#
# @return [Proc] a proc that generates a binomially distributed integer
# `k` <= `n` when called
def rng(n, prob, seed = nil)
seed = Random.new_seed if seed.nil?
seed = seed.modulo 100_000_007
prng = Random.new(seed)
-> {devroye_rejection_generator(n, prob, prng)}
end
# Returns the probability mass function for a binomial variate
# having `k` successes out of `n` trials, with each success having
# probability `prob`.
#
# @param k [Fixnum, Bignum] number of successful trials
# @param n [Fixnum, Bignum] total number of trials
# @param prob [Float] probabilty of success in a single independant trial
#
# @return [Float] the probability mass for Binom(k, n, prob)
def pdf(k, n, prob)
fail 'k > n' if k > n
Math.binomial_coefficient(n, k) * (prob**k) * (1 - prob)**(n - k)
end
# TODO: Use exact_regularized_beta for
# small values and regularized_beta for bigger ones.
def cdf(k, n, prob)
# (0..x.floor).inject(0) {|ac,i| ac+pdf(i,n,pr)}
Math.regularized_beta(1 - prob, n - k, k + 1)
end
# Returns the exact CDF value by summing up all preceding values
#
# @param k [Fixnum, Bignum] number of successful trials
# @param n [Fixnum, Bignum] total number of trials
# @param prob [Float] probabilty of success in a single independant trial
def exact_cdf(k, n, prob)
out = (0..k).inject(0) { |ac, i| ac + pdf(i, n, prob) }
out = 1 if out > 1.0
out
end
# Returns the inverse-CDF or the quantile given the probability `prob`,
# the total number of trials `n` and the number of successes `k`
# Note: This is a candidate for future updates
#
# @paran qn [Float] the cumulative function value to be inverted
# @param n [Fixnum, Bignum] total number of trials
# @param prob [Float] probabilty of success in a single independant trial
#
# @return [Fixnum, Bignum] the integer quantile `k` given cumulative value
#
# @raise RangeError if qn is from outside of the closed interval [0, 1]
def quantile(qn, n, pr)
fail RangeError, 'cdf value(qn) must be from [0, 1]. '\
"Cannot find quantile for qn=#{qn}" if qn > 1 || qn < 0
ac = 0
(0..n).each do |i|
ac += pdf(i, n, pr)
return i if qn <= ac
end
end
# :nodoc:
# Upcoming implementation is a rejection algorithm, which utilises
# splitting the domain into various segments based on Lemma 4.8 from
# chapter 10 of Luc Devroye's amazing book:
# L. Devroye: [Non-Uniform Random Variate Generation], Springer-Verlag
def devroye_rejection_generator(n, p, seed)
fail ArgumentError, "Use Random.new_seed to generate a seed larger than 100,330,201" unless seed > 100_330_201
# Setup all the required variables, this ends up being lazily evaluated
setup = Hash.new do |h, key|
case key
when :d1
h[key] = [
1,
Math.sqrt(
n * p * (1 - p) *
Math.log(128 * n * p / (81 * Math::PI * (1 - p)))
)
].max
when :d2
h[key] = [
1,
Math.sqrt(
n * p * (1 - p) *
Math.log(128 * n * (1 - p) / (Math::PI * p))
)
].max
when :c
h[key] = 2 * h[:d1] / h[:np]
when :sigma1
h[key] = (1 + h[:d1] /
(4 * h[:np])) * Math.sqrt(n * p * (1 - p))
when :sigma2
h[key] = (1 + h[:d1] /
(4 * (n - h[:np]))) * Math.sqrt(n * p * (1 - p))
when :np
h[key] = n * p
else
h[key] = nil
end
end
# Keep a seperate hash for our splitting boundaries
bounds = Hash.new do |h, key|
case key
when :a1
h[key] = 0.5 * Math.exp(setup[:c]) * setup[:sigma1] *
Distribution::SQ2PI
when :a2
h[key] = 0.5 * setup[:sigma2] * Distribution::SQ2PI
when :a3
h[key] = (2 * (setup[:sigma1] ** 2)) / (setup[:d1] ** 2) *
Math.exp(
(setup[:d1] / (n - setup[:np])) -
(setup[:d1] ** 2) / (2 * (setup[:sigma1] ** 2))
)
when :a4
h[key] = (2 * (setup[:sigma2] ** 2)) / (setup[:d2] ** 2) *
Math.exp(
-(setup[:d1] ** 2) / (2 * (setup[:sigma1] ** 2))
)
when :s
h[k] = h[:a1] + h[:a2] + h[:a3] + h[:a4]
else
h[key] = nil
end
end
rejected = false
k, v = 0, 0
# The rejection algorithm assumes that p <= 0.5 is satisfied
# This will be fixed later by a final inversion step, since
# Binomial(n, prob) is symmetric in `prob`
p = prob > 0.5 ? 1 - prob : prob
# Seeeds for temporary rngs
# Get this by finding remainder modulo consecutive primes
# 100_000_007, 100_030_001, 100_111_001, 100_330_201
seeds = {
unif: seed.modulo 100_000_007,
norm: seed.modulo 100_030_001,
expo: [seed.modulo 100_111_001, seed.modulo 100_330_201]
}.freeze
# Create all rng procs before hand
# Since no actual code has been run yet, the following steps are
# comparatively low cost
rngs = {
unif: Random.new(seeds[:unif]),
norm: Distribution::Normal.rng(0, 1, seeds[:norm]),
expo: [
Distribution::Exponential.rng(setup[:np], {random: Random.new(seeds[:expo][0]}),
Distribution::Exponential.rng(setup[:np], {random: Random.new(seeds[:expo][1])}
]
}
loop do
urv = rngs[:unif].rand * bounds[:s]
if urv <= bounds[:a1]
nrv = (rngs[:norm].call * setup[:sigma1]).abs
erv = rngs[:expo][0].call
rejected = true unless nrv < setup[:d1]
if not rejected
k = nrv.floor
v = -erv - 0.5 * (nrv ** 2) + setup[:c]
end
elsif urv > bounds[:a1] && urv <= bounds[:a1] + bounds[:a2]
nrv = (rngs[:norm].call * setup[:sigma2]).abs
erv = rngs[:expo][0].call
rejected = true unless nrv < setup[:d2]
if not rejected
k = (-nrv).floor
v = -erv - 0.5 * (nrv ** 2)
end
elsif urv > bounds[:a1] + bounds[:a2] && urv <= bounds[:a1] + bounds[:a2] + bounds[:a3]
erv1, erv2 = rngs[:expo].map {|r| r.call}
y = setup[:d1] + (2 * (setup[:sigma1] ** 2) / setup[:d1]) * erv1
k = y.floor
v = -erv2 -
(setup[:d1] / 2 * (setup[:sigma1] ** 2)) * y +
setup[:d1] / (n - setup[:np])
rejected = false
else
# Effectively: urv > bounds[:a1] + bounds[:a2] + bounds[:a3]
erv1, erv2 = rngs[:expo].map {|r| r.call}
y = setup[:d2] + (2 * (setup[:sigma2] ** 2) / setup[:d2]) * erv1
k = (-y).floor
v = -erv2 - (setup[:d2] / 2 * (setup[:sigma2] ** 2)) * y
rejected = false
end
rejected = rejected ||
k < -setup[:np] ||
k > n - setup[:np] ||
v > Math.log(
pdf(setup[:np].floor + k, n, p) /
pdf(setup[:np].floor, n, p)
)
break unless rejected
end
end
private :devroye_rejection_generator
alias p_value quantile
alias exact_pdf pdf
end
end
end
end
@envp
Copy link
Copy Markdown
Author

envp commented Jan 29, 2016

Here be Satan.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment