Created
September 13, 2016 20:43
-
-
Save hillarysanders/9d34d5f58a927be5d68fb05734660e45 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################ | |
################################################################################ | |
# author: hillz | |
# subject: CLT simulation: bootstrap vs CLT on right tailed distributions with | |
# many samples - bootstrap converges to CLT answer. No reason to use | |
# bootstrap. | |
################################################################################ | |
################################################################################ | |
################################################################################ | |
# utils functions: | |
get.bootstrap.se = function(x, n.simulations=1000){ | |
sample.means = sapply(1:n.simulations, | |
FUN = function(i) mean(sample(x, size = length(x), replace = T))) | |
se = sd(sample.means) | |
return(se) | |
} | |
plot.boot.vs.clt.estimates = function(many.boot.SEs, clt.se){ | |
hist(many.boot.SEs, col='darkblue', main='Standard Error Estimates', breaks=30) | |
abline(v=clt.se, lwd=8, col='darkred') | |
abline(v=mean(many.boot.SEs), lwd=4, col='lightblue') | |
legend('topright', box.lwd=0, bty=T, pch=c(15, NA, NA), pt.cex=c(1.5, NA, NA), | |
lwd=c(NA, 8, 4), col=c('darkblue', 'darkred', 'lightblue'), | |
legend=c('Bootstrap SE Estimate', | |
'CLT based SE Estimate', | |
'Average Bootstrap SE Estimate')) | |
} | |
################################################################################ | |
# test paramaters: | |
n.obs = 100000 | |
raise.power = 10 # for skewness | |
# create the fake skewed data: | |
x = rexp(n = n.obs, rate = 1)^raise.power | |
# estimate SE of sample mean by bootstrap: | |
bootstrap.se = get.bootstrap.se(x) | |
# estimate SE of sample mean by Central limit theorom: | |
clt.se = sd(x)/sqrt(length(x)) | |
cat(paste0('CLT based SE: ', clt.se)) | |
cat(paste0('\nBootstrap based SE: ', bootstrap.se)) | |
# get many bootstrap SE estimates to make sure it is always very close to CLT estimate: | |
many.boot.SEs = sapply(1:100, FUN = function(i) get.bootstrap.se(x, n.simulations=250)) | |
# plot the results: | |
# the data is very right tailed: | |
par(mfrow=c(2,1)) | |
hist(x, breaks=1000, main='Very Right Tailed Distribution') | |
plot.boot.vs.clt.estimates(many.boot.SEs, clt.se) | |
# --> bootstrap just converges to CLT answer. There's no reason to use bootstraps when | |
# estimating the SD of your sample mean, if your data are iid, even if the data is massively | |
# skewed. | |
# This doesn't prove that CLT or bootstrap estimates are perfect at this skewness X sample | |
# size, just that there's no reason to bootstrap it over using CLT. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment