Last active
December 18, 2015 20:49
-
-
Save bravoecho/5842797 to your computer and use it in GitHub Desktop.
Normally distributed random values
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'pp' | |
require 'benchmark' | |
require 'csv' | |
require 'securerandom' | |
# Generates normally distributed random numbers given a mean and a | |
# standard deviation. | |
# | |
# It uses the Marsaglia polar method for the popular Box-Muller transform. | |
# See <https://en.wikipedia.org/wiki/Marsaglia_polar_method> | |
# | |
# It is an Enumerable, and also provides a convenience `next` method to use when | |
# the size of the sample is not known in advance. | |
# | |
# Note: since it is based on the Enumerator class, and thus it uses fibers, | |
# it's not safe for use in threaded code, where it would raise the error | |
# "fiber called across threads". This will be fixed in a future version. | |
# | |
# Usage: | |
# | |
# normal_rand = NormalRand.new(mean: 15, stddev: 3) | |
# normal_rand.take(1_000) | |
# # it returns an array of normally distributed random float numbers | |
# | |
# To display the frequencies of a sample of size `sample_size`, with step 'step' | |
# (with default step = 1, try to use 0.3 for example): | |
# | |
# normal_rand.rounded_frequencies(sample_size, step) | |
# # => it returns a Hash of frequencies for classes of values defined by | |
# # the given step | |
# | |
class NormalRand | |
include Enumerable | |
RAND_GEN = SecureRandom.method(:random_number) | |
def initialize(options = {}) | |
@mean = options.fetch(:mean) | |
@stddev = options.fetch(:stddev) | |
@rand_gen = options.fetch(:rand_gen, RAND_GEN) | |
end | |
def next | |
enumerator.next | |
end | |
def each | |
if block_given? | |
enumerator.each { |value| yield value } | |
else | |
enumerator.each | |
end | |
end | |
# Create a hash of the frequency of n random values classified by the | |
# corresponding rounded step (default step is 1). | |
# | |
# Uses: | |
# | |
# - testing purposes, to check if the random values actuall satisfy a normal | |
# distribution by analising the correlation; | |
# | |
# - empirically decide which mean and stddev to use for the simluation; | |
# int this case, initialize the object with a desired mean and stddev, | |
# and then try the rounded frequencies for a relatively large sample (10,000); | |
# of course one could also make this choice based on the properties of a | |
# normal distribution: | |
# - 68% of values are within 1 standard deviation of the mean | |
# - 95% are within 2 standard deviations | |
# - 99.7% are within 3 standard deviations | |
def rounded_frequencies(n_values, step=1) | |
pairs = self.take(n_values) | |
.group_by { |number| round_to_step(number, step) } | |
.map { |rounded_value, close_values| [rounded_value, close_values.size] } | |
.sort | |
Hash[pairs] | |
end | |
private | |
# Example: round_to_step(5.6723, 0.25) # => 5.75 | |
def round_to_step(number, step) | |
steps = 1.0 / step | |
(number * steps).round / steps | |
end | |
def enumerator | |
@enumerator ||= Enumerator.new do |yielder| | |
loop do | |
if @next_gaussian_rand | |
yielder.yield @next_gaussian_rand | |
@next_gaussian_rand = nil | |
else | |
x, y, s = 0, 0, 0 | |
loop do | |
x = rand_between_minus_1_and_1 | |
y = rand_between_minus_1_and_1 | |
s = x**2 + y**2 | |
break if 0 < s && s < 1 | |
end | |
coeff = Math.sqrt(-2.0 * Math.log(s) / s) | |
@next_gaussian_rand = @mean + @stddev * y * coeff | |
yielder.yield @mean + @stddev * x * coeff | |
end | |
end | |
end | |
end | |
# rand(-1.0..1.0) would be nicer, but `SecureRandom.random_number` does not | |
# support ranges. | |
def rand_between_minus_1_and_1 | |
@rand_gen.() * 2 - 1 | |
end | |
end | |
# This is to verify that the gaussian random generator is working reasonably | |
class NormalDistribution | |
def initialize(options={}) | |
@mean = options.fetch(:mean) | |
@stddev = options.fetch(:stddev) | |
@sample_size = options.fetch(:sample_size, 1) | |
end | |
def f(x) | |
coeff = 1 / (@stddev * Math.sqrt(2 * Math::PI)) | |
exp = - (x - @mean)**2 / (2 * @stddev**2) | |
coeff * (Math::E**exp) | |
end | |
def for_range(range, step=1) | |
{}.tap do |result| | |
range.step(step).each { |x| result[x] = f(x) * @sample_size * step } | |
end | |
end | |
end | |
# Made up values | |
mean = 23.098 | |
stddev = 4.420 | |
step = 1 | |
sample_size = 30_000 | |
random_gaussian_frequencies = nil | |
puts Benchmark.measure { | |
random_gaussian = NormalRand.new(mean: mean, stddev: stddev) | |
random_gaussian_frequencies = random_gaussian.rounded_frequencies(sample_size, step) | |
} | |
pp random_gaussian_frequencies | |
pp random_gaussian_frequencies.values.inject(:+) | |
CSV.open("#{ENV['HOME']}/desktop/rand_gaussian.csv", "w") { |csv| | |
random_gaussian_frequencies.to_a.each { |pair| | |
csv << pair | |
} | |
} | |
################################################################################ | |
normal_distribution = nil | |
normal_distribution = NormalDistribution.new(mean: mean, stddev: stddev, sample_size: sample_size) | |
.for_range(0..50, step) | |
CSV.open("#{ENV['HOME']}/desktop/normal_distribution.csv", "w") { |csv| | |
normal_distribution.to_a.each { |pair| | |
csv << pair | |
} | |
} | |
pp normal_distribution | |
pp normal_distribution.values.inject(:+) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment