Created
October 14, 2014 14:07
-
-
Save davidkellis/b6d2d5c66f26117237c5 to your computer and use it in GitHub Desktop.
VMS Analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'time' | |
require 'pp' | |
require 'bigdecimal' | |
# This implementation is based on http://en.wikipedia.org/wiki/Quantiles#Estimating_the_quantiles_of_a_population | |
# For additional information, see: | |
# http://www.stanford.edu/class/archive/anthsci/anthsci192/anthsci192.1064/handouts/calculating%20percentiles.pdf | |
# http://en.wikipedia.org/wiki/Percentile | |
# http://www.mathworks.com/help/stats/quantiles-and-percentiles.html | |
# | |
# hFn is the function: | |
# (n: decimal) -> (p: decimal) -> decimal | |
# such that hFn returns a 1-based real-valued index (which may or may not be a whole-number) into the array of sorted values in xs | |
# qSubPFn is the function: | |
# (getIthX: (int -> decimal)) -> (h: decimal) -> decimal | |
# such that getIthX returns the zero-based ith element from the array of sorted values in xs | |
# percentages is a sequence of percentages expressed as real numbers in the range [0.0, 100.0] | |
def quantiles(hFn, qSubPFn, interpolate, isSorted, percentages, xs) | |
sortedXs = isSorted ? xs : xs.sort | |
n = BigDecimal.new sortedXs.length # n is the sample size | |
q = BigDecimal.new 100 | |
p = ->(k) { k / q } | |
hs = percentages.map {|percentage| hFn.call(n, p.call(percentage)) - 1 } # NOTE: these indices are 0-based indices into sortedXs | |
getIthX = ->(i) { sortedXs[i] } | |
if interpolate # interpolate | |
hs.map do |h| | |
i = h.floor # i is a 0-based index into sortedXs (smaller index to interpolate between) | |
j = h.ceil # j is a 0-based index into sortedXs (larger index to interpolate between) | |
f = h - i # f is the fractional part of real-valued index h | |
intI = i.to_i | |
intJ = j.to_i | |
(1 - f) * getIthX.call(intI) + f * getIthX.call(intJ) # [1] - (1-f) * x_k + f * x_k+1 === x_k + f*(x_k+1 - x_k) | |
# [1]: | |
# see: http://web.stanford.edu/class/archive/anthsci/anthsci192/anthsci192.1064/handouts/calculating%20percentiles.pdf | |
# also: (1-f) * x_k + f * x_k+1 === x_k - f*x_k + f*x_k+1 === x_k + f*(x_k+1 - x_k) which is what I'm after | |
end | |
else # floor the index instead of interpolating | |
hs.map do |h| | |
i = h.floor.to_i # i is a 0-based index into sortedXs | |
getIthX.call(i) | |
end | |
end | |
end | |
# implementation based on description of R-1 at http://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population | |
def quantilesR1(interpolate, isSorted, percentages, xs) | |
quantiles( | |
->(n, p) { p == 0 ? 1 : n * p + BigDecimal.new("0.5") }, | |
->(getIthX, h) { getIthX.call((h - BigDecimal.new("0.5")).ceil.to_i) }, | |
interpolate, | |
isSorted, | |
percentages, | |
xs | |
) | |
end | |
# The R manual claims that "Hyndman and Fan (1996) ... recommended type 8" | |
# see: http://stat.ethz.ch/R-manual/R-patched/library/stats/html/quantile.html | |
# implementation based on description of R-8 at http://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population | |
OneThird = Rational(1, 3) | |
TwoThirds = Rational(2, 3) | |
def quantilesR8(interpolate, isSorted, percentages, xs) | |
quantiles( | |
->(n, p) { | |
if p < TwoThirds / (n + OneThird) | |
1 | |
elsif p >= (n - OneThird) / (n + OneThird) | |
n | |
else | |
(n + OneThird) * p + OneThird | |
end | |
}, | |
->(getIthX, h) { | |
floorHDec = h.floor | |
floorH = floorHDec.to_i | |
getIthX.call(floorH) + (h - floorHDec) * (getIthX.call(floorH + 1) - getIthX.call(floorH)) | |
}, | |
interpolate, | |
isSorted, | |
percentages, | |
xs | |
) | |
end | |
# we use the type 8 quantile method because the R manual claims that "Hyndman and Fan (1996) ... recommended type 8" | |
def percentiles(percentages, xs) | |
quantilesR8(true, false, percentages, xs) | |
end | |
def percentilesSorted(percentages, xs) | |
quantilesR8(true, true, percentages, xs) | |
end | |
def sample_with_replacement(array, n) | |
length = array.length | |
n.times.map { array[rand(length)] } | |
end | |
def build_sampling_distribution(n_samples, n_observations_per_sample, build_sample_fn, compute_sample_statistic_fn) | |
n_samples.times.map { compute_sample_statistic_fn.call(build_sample_fn.call(n_observations_per_sample)) } | |
end | |
# returns array of sampling distributions s.t. the ith sampling distribution is the sampling distribution of the statistic computed by compute_sample_statistic_fns[i] | |
def build_sampling_distributions(n_samples, n_observations_per_sample, build_sample_fn, compute_sample_statistic_fns) | |
sampling_distributions = Array.new(compute_sample_statistic_fns.count) { Array.new } | |
n_samples.times do | |
sample = build_sample_fn.call(n_observations_per_sample) | |
compute_sample_statistic_fns.each_with_index {|compute_sample_statistic_fn, i| sampling_distributions[i] << compute_sample_statistic_fn.call(sample) } | |
end | |
sampling_distributions | |
end | |
# returns array of sampling distributions s.t. the ith sampling distribution is the sampling distribution of the statistic computed by compute_sample_statistic_fns[i] | |
def build_sampling_distributions_from_one_multi_statistic_fn(n_samples, n_observations_per_sample, build_sample_fn, multi_statistic_fn) | |
diagnostic_sample = [1,2,3] | |
number_of_sampling_distributions = multi_statistic_fn.call(diagnostic_sample).count | |
sampling_distributions = Array.new(number_of_sampling_distributions) { Array.new } | |
n_samples.times do | |
sample = build_sample_fn.call(n_observations_per_sample) | |
sample_statistics = multi_statistic_fn.call(sample) | |
sample_statistics.each_with_index {|sample_statistic, i| sampling_distributions[i] << sample_statistic } | |
end | |
sampling_distributions | |
end | |
def calculate_cumulative_return(sequence_of_returns) | |
sequence_of_returns.reduce(:*) | |
end | |
def build_sample(n_observations, build_observation_fn) | |
n_observations.times.map { build_observation_fn.call } | |
end | |
monthly_returns = [23.2, 15.1, 2.4, -3.9, 25.1, 7.6, -2.8, -12.6, -3.5, 5.6, -2.2, 11.8, 16.5, 30.9, 5.0, 35.9, -2.8, -25.5, 21.3, 9.4, 15.0, 22.5, -5.5, 18.1, -13.7, 20.3, -3.9, 10.5, -0.3, 0.2, -11.5, 31.6, -13.3, 14.4, 1.1, 12.9, 5.0, -16.8, -0.7, 1.3, 0.2, 18.9, 15.5, -11.7, 9.2, -11.8] | |
# monthly_returns = [23.2, 15.1, 2.4, -3.9, 25.1, 7.6, -2.8, -12.6, -3.5, 5.6, -2.2, 11.8, 16.5, 30.9, 5.0, 35.9, -2.8, -25.5, 21.3, 9.4, 15.0, 22.5, -5.5, 18.1, -13.7, 20.3, -3.9, 10.5, -0.3, 0.2, -11.5, 31.6, -13.3, 14.4, 1.1, 12.9, 5.0, -16.8, -0.7, 1.3, 0.2, 18.9, 15.5, -11.7, 9.2, -11.8, -25.6] | |
# monthly_returns = [0.2, 18.9, 15.5, -11.7, 9.2, -11.8] | |
# monthly_returns = [-16.8, 0.2, 18.9, 15.5, -11.7, 9.2, -11.8] | |
monthly_returns = monthly_returns.map {|r| (r + 100) / 100.0 } | |
build_1_year_return_observation_fn = ->() { calculate_cumulative_return(sample_with_replacement(monthly_returns, 12)) } | |
# build_5_year_return_observation_fn = ->() { calculate_cumulative_return(sample_with_replacement(monthly_returns, 60)) } | |
# returns an array of returns | |
build_1_year_return_sample_fn = ->(n_observations) { build_sample(n_observations, build_1_year_return_observation_fn) } | |
# build_5_year_return_sample_fn = ->(n_observations) { build_sample(n_observations, build_5_year_return_observation_fn) } | |
compute_sample_mean_fn = ->(sample) { n = sample.count; sample.reduce(:+) / n.to_f } | |
compute_1st_percentile_fn = ->(sample) { percentiles([1], sample).first } | |
compute_5th_percentile_fn = ->(sample) { percentiles([5], sample).first } | |
compute_10th_percentile_fn = ->(sample) { percentiles([10], sample).first } | |
compute_20th_percentile_fn = ->(sample) { percentiles([20], sample).first } | |
compute_30th_percentile_fn = ->(sample) { percentiles([30], sample).first } | |
compute_40th_percentile_fn = ->(sample) { percentiles([40], sample).first } | |
compute_50th_percentile_fn = ->(sample) { percentiles([50], sample).first } | |
compute_60th_percentile_fn = ->(sample) { percentiles([60], sample).first } | |
compute_70th_percentile_fn = ->(sample) { percentiles([70], sample).first } | |
compute_80th_percentile_fn = ->(sample) { percentiles([80], sample).first } | |
compute_90th_percentile_fn = ->(sample) { percentiles([90], sample).first } | |
compute_95th_percentile_fn = ->(sample) { percentiles([95], sample).first } | |
compute_99th_percentile_fn = ->(sample) { percentiles([99], sample).first } | |
sample_statistic_fns = [ | |
compute_sample_mean_fn, | |
compute_1st_percentile_fn, | |
compute_5th_percentile_fn, | |
compute_10th_percentile_fn, | |
compute_20th_percentile_fn, | |
compute_30th_percentile_fn, | |
compute_40th_percentile_fn, | |
compute_50th_percentile_fn, | |
compute_60th_percentile_fn, | |
compute_70th_percentile_fn, | |
compute_80th_percentile_fn, | |
compute_90th_percentile_fn, | |
compute_95th_percentile_fn, | |
compute_99th_percentile_fn | |
] | |
multiple_statistic_fn = ->(sample) { [compute_sample_mean_fn.call(sample)] + percentiles([1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99], sample) } | |
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_sample_mean_fn) | |
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_1st_percentile_fn) | |
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_5th_percentile_fn) | |
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_10th_percentile_fn) | |
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_30th_percentile_fn) | |
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_50th_percentile_fn) | |
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_75th_percentile_fn) | |
# sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_95th_percentile_fn) | |
# percentiles = percentiles([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100], sampling_distribution) | |
# print out the sampling distribution of the mean 1-year return | |
# pp [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] | |
# pp percentiles.map(&:to_f) | |
t1 = Time.now | |
number_of_samples = 10000 | |
number_of_observations_per_sample = 10000 | |
# sampling_distributions = build_sampling_distributions(number_of_samples, number_of_observations_per_sample, build_1_year_return_sample_fn, sample_statistic_fns) | |
sampling_distributions = build_sampling_distributions_from_one_multi_statistic_fn(number_of_samples, number_of_observations_per_sample, build_1_year_return_sample_fn, multiple_statistic_fn) | |
t2 = Time.now | |
puts "Building sampling distributions from #{number_of_samples} samples, each containing #{number_of_observations_per_sample} observations, took #{t2 - t1} seconds." | |
percentiles = sampling_distributions.map {|sampling_distribution| percentiles([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100], sampling_distribution).map(&:to_f).map {|f| f.round(3) } } | |
pp percentiles | |
sampling_distribution_means = sampling_distributions.map {|sampling_distribution| compute_sample_mean_fn.call(sampling_distribution).to_f.round(3) } | |
pp sampling_distribution_means | |
# Given: monthly_returns = [0.2, 18.9, 15.5, -11.7, 9.2, -11.8] | |
# Building sampling distributions from 10000 samples, each containing 10000 observations, took 552.37217 seconds. | |
# [[1.466, 1.483, 1.485, 1.487, 1.489, 1.491, 1.492, 1.494, 1.496, 1.499, 1.514], | |
# [0.494, 0.508, 0.511, 0.521, 0.522, 0.522, 0.523, 0.524, 0.528, 0.529, 0.552], | |
# [0.665, 0.683, 0.684, 0.684, 0.684, 0.684, 0.685, 0.688, 0.692, 0.694, 0.713], | |
# [0.777, 0.796, 0.798, 0.799, 0.799, 0.8, 0.8, 0.802, 0.809, 0.809, 0.821], | |
# [0.935, 0.959, 0.96, 0.96, 0.961, 0.962, 0.962, 0.962, 0.963, 0.973, 0.987], | |
# [1.077, 1.092, 1.094, 1.096, 1.105, 1.105, 1.106, 1.106, 1.107, 1.107, 1.124], | |
# [1.207, 1.224, 1.226, 1.237, 1.238, 1.239, 1.24, 1.24, 1.241, 1.242, 1.258], | |
# [1.35, 1.368, 1.37, 1.37, 1.37, 1.371, 1.371, 1.371, 1.373, 1.387, 1.407], | |
# [1.493, 1.514, 1.53, 1.532, 1.532, 1.533, 1.533, 1.535, 1.535, 1.536, 1.556], | |
# [1.673, 1.696, 1.696, 1.698, 1.715, 1.716, 1.717, 1.718, 1.719, 1.721, 1.746], | |
# [1.899, 1.931, 1.95, 1.951, 1.952, 1.952, 1.953, 1.954, 1.954, 1.957, 2.006], | |
# [2.281, 2.314, 2.317, 2.317, 2.32, 2.342, 2.344, 2.346, 2.347, 2.349, 2.387], | |
# [2.596, 2.665, 2.669, 2.672, 2.7, 2.704, 2.704, 2.706, 2.707, 2.71, 2.784], | |
# [3.341, 3.439, 3.443, 3.446, 3.45, 3.479, 3.493, 3.498, 3.538, 3.544, 3.651]] | |
# [1.491, | |
# 0.521, | |
# 0.687, | |
# 0.801, | |
# 0.963, | |
# 1.102, | |
# 1.237, | |
# 1.373, | |
# 1.53, | |
# 1.711, | |
# 1.952, | |
# 2.332, | |
# 2.692, | |
# 3.48] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment