Skip to content

Instantly share code, notes, and snippets.

@davidkellis
Created October 14, 2014 14:07
Show Gist options
  • Save davidkellis/b6d2d5c66f26117237c5 to your computer and use it in GitHub Desktop.
Save davidkellis/b6d2d5c66f26117237c5 to your computer and use it in GitHub Desktop.
VMS Analysis
require 'time'
require 'pp'
require 'bigdecimal'
# This implementation is based on http://en.wikipedia.org/wiki/Quantiles#Estimating_the_quantiles_of_a_population
# For additional information, see:
# http://www.stanford.edu/class/archive/anthsci/anthsci192/anthsci192.1064/handouts/calculating%20percentiles.pdf
# http://en.wikipedia.org/wiki/Percentile
# http://www.mathworks.com/help/stats/quantiles-and-percentiles.html
#
# hFn is the function:
# (n: decimal) -> (p: decimal) -> decimal
# such that hFn returns a 1-based real-valued index (which may or may not be a whole-number) into the array of sorted values in xs
# qSubPFn is the function:
# (getIthX: (int -> decimal)) -> (h: decimal) -> decimal
# such that getIthX returns the zero-based ith element from the array of sorted values in xs
# percentages is a sequence of percentages expressed as real numbers in the range [0.0, 100.0]
def quantiles(hFn, qSubPFn, interpolate, isSorted, percentages, xs)
sortedXs = isSorted ? xs : xs.sort
n = BigDecimal.new sortedXs.length # n is the sample size
q = BigDecimal.new 100
p = ->(k) { k / q }
hs = percentages.map {|percentage| hFn.call(n, p.call(percentage)) - 1 } # NOTE: these indices are 0-based indices into sortedXs
getIthX = ->(i) { sortedXs[i] }
if interpolate # interpolate
hs.map do |h|
i = h.floor # i is a 0-based index into sortedXs (smaller index to interpolate between)
j = h.ceil # j is a 0-based index into sortedXs (larger index to interpolate between)
f = h - i # f is the fractional part of real-valued index h
intI = i.to_i
intJ = j.to_i
(1 - f) * getIthX.call(intI) + f * getIthX.call(intJ) # [1] - (1-f) * x_k + f * x_k+1 === x_k + f*(x_k+1 - x_k)
# [1]:
# see: http://web.stanford.edu/class/archive/anthsci/anthsci192/anthsci192.1064/handouts/calculating%20percentiles.pdf
# also: (1-f) * x_k + f * x_k+1 === x_k - f*x_k + f*x_k+1 === x_k + f*(x_k+1 - x_k) which is what I'm after
end
else # floor the index instead of interpolating
hs.map do |h|
i = h.floor.to_i # i is a 0-based index into sortedXs
getIthX.call(i)
end
end
end
# implementation based on description of R-1 at http://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population
def quantilesR1(interpolate, isSorted, percentages, xs)
quantiles(
->(n, p) { p == 0 ? 1 : n * p + BigDecimal.new("0.5") },
->(getIthX, h) { getIthX.call((h - BigDecimal.new("0.5")).ceil.to_i) },
interpolate,
isSorted,
percentages,
xs
)
end
# The R manual claims that "Hyndman and Fan (1996) ... recommended type 8"
# see: http://stat.ethz.ch/R-manual/R-patched/library/stats/html/quantile.html
# implementation based on description of R-8 at http://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population
OneThird = Rational(1, 3)
TwoThirds = Rational(2, 3)
def quantilesR8(interpolate, isSorted, percentages, xs)
quantiles(
->(n, p) {
if p < TwoThirds / (n + OneThird)
1
elsif p >= (n - OneThird) / (n + OneThird)
n
else
(n + OneThird) * p + OneThird
end
},
->(getIthX, h) {
floorHDec = h.floor
floorH = floorHDec.to_i
getIthX.call(floorH) + (h - floorHDec) * (getIthX.call(floorH + 1) - getIthX.call(floorH))
},
interpolate,
isSorted,
percentages,
xs
)
end
# we use the type 8 quantile method because the R manual claims that "Hyndman and Fan (1996) ... recommended type 8"
def percentiles(percentages, xs)
quantilesR8(true, false, percentages, xs)
end
def percentilesSorted(percentages, xs)
quantilesR8(true, true, percentages, xs)
end
def sample_with_replacement(array, n)
length = array.length
n.times.map { array[rand(length)] }
end
def build_sampling_distribution(n_samples, n_observations_per_sample, build_sample_fn, compute_sample_statistic_fn)
n_samples.times.map { compute_sample_statistic_fn.call(build_sample_fn.call(n_observations_per_sample)) }
end
# returns array of sampling distributions s.t. the ith sampling distribution is the sampling distribution of the statistic computed by compute_sample_statistic_fns[i]
def build_sampling_distributions(n_samples, n_observations_per_sample, build_sample_fn, compute_sample_statistic_fns)
sampling_distributions = Array.new(compute_sample_statistic_fns.count) { Array.new }
n_samples.times do
sample = build_sample_fn.call(n_observations_per_sample)
compute_sample_statistic_fns.each_with_index {|compute_sample_statistic_fn, i| sampling_distributions[i] << compute_sample_statistic_fn.call(sample) }
end
sampling_distributions
end
# returns array of sampling distributions s.t. the ith sampling distribution is the sampling distribution of the statistic computed by compute_sample_statistic_fns[i]
def build_sampling_distributions_from_one_multi_statistic_fn(n_samples, n_observations_per_sample, build_sample_fn, multi_statistic_fn)
diagnostic_sample = [1,2,3]
number_of_sampling_distributions = multi_statistic_fn.call(diagnostic_sample).count
sampling_distributions = Array.new(number_of_sampling_distributions) { Array.new }
n_samples.times do
sample = build_sample_fn.call(n_observations_per_sample)
sample_statistics = multi_statistic_fn.call(sample)
sample_statistics.each_with_index {|sample_statistic, i| sampling_distributions[i] << sample_statistic }
end
sampling_distributions
end
def calculate_cumulative_return(sequence_of_returns)
sequence_of_returns.reduce(:*)
end
def build_sample(n_observations, build_observation_fn)
n_observations.times.map { build_observation_fn.call }
end
monthly_returns = [23.2, 15.1, 2.4, -3.9, 25.1, 7.6, -2.8, -12.6, -3.5, 5.6, -2.2, 11.8, 16.5, 30.9, 5.0, 35.9, -2.8, -25.5, 21.3, 9.4, 15.0, 22.5, -5.5, 18.1, -13.7, 20.3, -3.9, 10.5, -0.3, 0.2, -11.5, 31.6, -13.3, 14.4, 1.1, 12.9, 5.0, -16.8, -0.7, 1.3, 0.2, 18.9, 15.5, -11.7, 9.2, -11.8]
# monthly_returns = [23.2, 15.1, 2.4, -3.9, 25.1, 7.6, -2.8, -12.6, -3.5, 5.6, -2.2, 11.8, 16.5, 30.9, 5.0, 35.9, -2.8, -25.5, 21.3, 9.4, 15.0, 22.5, -5.5, 18.1, -13.7, 20.3, -3.9, 10.5, -0.3, 0.2, -11.5, 31.6, -13.3, 14.4, 1.1, 12.9, 5.0, -16.8, -0.7, 1.3, 0.2, 18.9, 15.5, -11.7, 9.2, -11.8, -25.6]
# monthly_returns = [0.2, 18.9, 15.5, -11.7, 9.2, -11.8]
# monthly_returns = [-16.8, 0.2, 18.9, 15.5, -11.7, 9.2, -11.8]
monthly_returns = monthly_returns.map {|r| (r + 100) / 100.0 }
build_1_year_return_observation_fn = ->() { calculate_cumulative_return(sample_with_replacement(monthly_returns, 12)) }
# build_5_year_return_observation_fn = ->() { calculate_cumulative_return(sample_with_replacement(monthly_returns, 60)) }
# returns an array of returns
build_1_year_return_sample_fn = ->(n_observations) { build_sample(n_observations, build_1_year_return_observation_fn) }
# build_5_year_return_sample_fn = ->(n_observations) { build_sample(n_observations, build_5_year_return_observation_fn) }
compute_sample_mean_fn = ->(sample) { n = sample.count; sample.reduce(:+) / n.to_f }
compute_1st_percentile_fn = ->(sample) { percentiles([1], sample).first }
compute_5th_percentile_fn = ->(sample) { percentiles([5], sample).first }
compute_10th_percentile_fn = ->(sample) { percentiles([10], sample).first }
compute_20th_percentile_fn = ->(sample) { percentiles([20], sample).first }
compute_30th_percentile_fn = ->(sample) { percentiles([30], sample).first }
compute_40th_percentile_fn = ->(sample) { percentiles([40], sample).first }
compute_50th_percentile_fn = ->(sample) { percentiles([50], sample).first }
compute_60th_percentile_fn = ->(sample) { percentiles([60], sample).first }
compute_70th_percentile_fn = ->(sample) { percentiles([70], sample).first }
compute_80th_percentile_fn = ->(sample) { percentiles([80], sample).first }
compute_90th_percentile_fn = ->(sample) { percentiles([90], sample).first }
compute_95th_percentile_fn = ->(sample) { percentiles([95], sample).first }
compute_99th_percentile_fn = ->(sample) { percentiles([99], sample).first }
sample_statistic_fns = [
compute_sample_mean_fn,
compute_1st_percentile_fn,
compute_5th_percentile_fn,
compute_10th_percentile_fn,
compute_20th_percentile_fn,
compute_30th_percentile_fn,
compute_40th_percentile_fn,
compute_50th_percentile_fn,
compute_60th_percentile_fn,
compute_70th_percentile_fn,
compute_80th_percentile_fn,
compute_90th_percentile_fn,
compute_95th_percentile_fn,
compute_99th_percentile_fn
]
multiple_statistic_fn = ->(sample) { [compute_sample_mean_fn.call(sample)] + percentiles([1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99], sample) }
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_sample_mean_fn)
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_1st_percentile_fn)
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_5th_percentile_fn)
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_10th_percentile_fn)
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_30th_percentile_fn)
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_50th_percentile_fn)
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_75th_percentile_fn)
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_95th_percentile_fn)
# percentiles = percentiles([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100], sampling_distribution)
# print out the sampling distribution of the mean 1-year return
# pp [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
# pp percentiles.map(&:to_f)
t1 = Time.now
number_of_samples = 10000
number_of_observations_per_sample = 10000
# sampling_distributions = build_sampling_distributions(number_of_samples, number_of_observations_per_sample, build_1_year_return_sample_fn, sample_statistic_fns)
sampling_distributions = build_sampling_distributions_from_one_multi_statistic_fn(number_of_samples, number_of_observations_per_sample, build_1_year_return_sample_fn, multiple_statistic_fn)
t2 = Time.now
puts "Building sampling distributions from #{number_of_samples} samples, each containing #{number_of_observations_per_sample} observations, took #{t2 - t1} seconds."
percentiles = sampling_distributions.map {|sampling_distribution| percentiles([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100], sampling_distribution).map(&:to_f).map {|f| f.round(3) } }
pp percentiles
sampling_distribution_means = sampling_distributions.map {|sampling_distribution| compute_sample_mean_fn.call(sampling_distribution).to_f.round(3) }
pp sampling_distribution_means
# Given: monthly_returns = [0.2, 18.9, 15.5, -11.7, 9.2, -11.8]
# Building sampling distributions from 10000 samples, each containing 10000 observations, took 552.37217 seconds.
# [[1.466, 1.483, 1.485, 1.487, 1.489, 1.491, 1.492, 1.494, 1.496, 1.499, 1.514],
# [0.494, 0.508, 0.511, 0.521, 0.522, 0.522, 0.523, 0.524, 0.528, 0.529, 0.552],
# [0.665, 0.683, 0.684, 0.684, 0.684, 0.684, 0.685, 0.688, 0.692, 0.694, 0.713],
# [0.777, 0.796, 0.798, 0.799, 0.799, 0.8, 0.8, 0.802, 0.809, 0.809, 0.821],
# [0.935, 0.959, 0.96, 0.96, 0.961, 0.962, 0.962, 0.962, 0.963, 0.973, 0.987],
# [1.077, 1.092, 1.094, 1.096, 1.105, 1.105, 1.106, 1.106, 1.107, 1.107, 1.124],
# [1.207, 1.224, 1.226, 1.237, 1.238, 1.239, 1.24, 1.24, 1.241, 1.242, 1.258],
# [1.35, 1.368, 1.37, 1.37, 1.37, 1.371, 1.371, 1.371, 1.373, 1.387, 1.407],
# [1.493, 1.514, 1.53, 1.532, 1.532, 1.533, 1.533, 1.535, 1.535, 1.536, 1.556],
# [1.673, 1.696, 1.696, 1.698, 1.715, 1.716, 1.717, 1.718, 1.719, 1.721, 1.746],
# [1.899, 1.931, 1.95, 1.951, 1.952, 1.952, 1.953, 1.954, 1.954, 1.957, 2.006],
# [2.281, 2.314, 2.317, 2.317, 2.32, 2.342, 2.344, 2.346, 2.347, 2.349, 2.387],
# [2.596, 2.665, 2.669, 2.672, 2.7, 2.704, 2.704, 2.706, 2.707, 2.71, 2.784],
# [3.341, 3.439, 3.443, 3.446, 3.45, 3.479, 3.493, 3.498, 3.538, 3.544, 3.651]]
# [1.491,
# 0.521,
# 0.687,
# 0.801,
# 0.963,
# 1.102,
# 1.237,
# 1.373,
# 1.53,
# 1.711,
# 1.952,
# 2.332,
# 2.692,
# 3.48]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment