Created
July 23, 2022 01:39
-
-
Save BioSciEconomist/bc5204e57294ba972ded97e454c811c1 to your computer and use it in GitHub Desktop.
Basic CI simulation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#*----------------------------------------------------------------- | |
# | PROGRAM NAME: fun with CIs.r | |
# | DATE: 7/16/22 | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: | |
# *---------------------------------------------------------------- | |
# | PURPOSE: basic CI simulation | |
# *---------------------------------------------------------------- | |
rm(list=ls()) # get rid of any existing data | |
cat("\f") # clear console | |
dev.off() # clear plots | |
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation | |
library(MASS) | |
# | |
# simulate population | |
# | |
mu <- 100 # mean 4000 | |
sd <- 10 # standard deviation 3 x mean | |
# simulate costs based on parameter assumptions above | |
set.seed(1234567) | |
y <- rnorm(10000,100,10) | |
# check descriptive results | |
summary(y) | |
sd(y) | |
hist(y, breaks = 50) | |
# data frame | |
ID <- seq(1,10000) | |
df <- data.frame(ID,y) | |
# | |
# simualte sampling | |
# | |
Sn = 30 | |
set.seed(9876) # make this repeatable 1234567 | |
xbar <- c() | |
stderr <- c() | |
CI_Lower <- c() | |
CI_Upper <- c() | |
repNum <- c() | |
for (i in 1:500) { | |
sample <- df[sample(nrow(df), Sn, replace = F), ] # sample from treatment group | |
est <- mean(sample$y) | |
se <- sd(sample$y)/sqrt(Sn) # SE | |
#LB <- est - 1.96*se | |
#UB <- est + 1.96*se | |
LB <- est - se # just look at error bars | |
UB <- est + se # just look at error bars | |
xbar <- c(xbar,est) | |
stderr <- c(stderr,se) | |
CI_Lower <- c(CI_Lower, LB) | |
CI_Upper <- c(CI_Upper, UB) | |
repNum <- c(repNum,i) # get p-value from this bs sample | |
print(cbind("Iteration:", i)) # comment this out to stop printing to log | |
} | |
rpt <- data.frame(repNum,xbar,stderr,CI_Lower,CI_Upper) | |
rm(LB,UB,se,est,i,sample) # cleanup | |
k <- 100 # population parameter we are estimating | |
# report the simulation results | |
rpt$flag <- ifelse((rpt$CI_Lower <= k & rpt$CI_Upper >= k),1,0) # interval contains 'k' | |
rpt$sig <- ifelse((rpt$CI_Lower <= 0 & rpt$CI_Upper >= 0),0,1) # 1 if statistically different from 0 | |
rpt$diffLB <- abs(k - rpt$CI_Lower) # difference between k and CI lower bound | |
rpt$diffUB <- abs(k - rpt$CI_Upper) # difference between k and CI upper bound | |
rpt$diffEst <- abs(k - rpt$xbar) # difference between point estimate and k | |
rpt$est_closer <- ifelse(rpt$diffEst < rpt$diffLB & rpt$diffEst < rpt$diffUB,1,0) # flag when point estimate is closer to k | |
summary(rpt) # check | |
sd(rpt$xbar) | |
hist(rpt$xbar, breaks = 50) | |
# visualize | |
plot(rpt$xbar, pch=20, ylim=range(pretty(c(rpt$CI_Lower, rpt$CI_Upper))), | |
xlab='', ylab='', las=1, cex.axis=0.8, cex=.5, xaxt='n') | |
segments(seq_len(nrow(rpt)), rpt$CI_Lower, y1= rpt$CI_Upper, lend=1) | |
segments(seq_len(nrow(rpt)), rpt$CI_Lower, y1= rpt$CI_Upper, lwd=1, lend=1) | |
abline(h = 0) | |
# order by estimate | |
tmp <- rpt[order(rpt$xbar),] | |
plot(tmp$xbar, pch=20, ylim=range(pretty(c(tmp$CI_Lower, tmp$CI_Upper))), | |
xlab='', ylab='', las=1, cex.axis=0.8, cex=.5, xaxt='n') | |
segments(seq_len(nrow(tmp)), tmp$CI_Lower, y1= tmp$CI_Upper, lend=1) | |
segments(seq_len(nrow(tmp)), tmp$CI_Lower, y1= tmp$CI_Upper, lwd=1, lend=1) | |
abline(h = 0)\ | |
abline(h = 100, col="green") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment