BioSciEconomist · July 23, 2022 01:39
diff --git a/fun with CIs.r b/fun with CIs.r
 #*-----------------------------------------------------------------
 # | PROGRAM NAME: fun with CIs.r
 # | DATE: 7/16/22 
 # | CREATED BY: MATT BOGARD 
 # | PROJECT FILE:             
 # *----------------------------------------------------------------
 # | PURPOSE: basic CI simulation
 # *----------------------------------------------------------------



 rm(list=ls()) # get rid of any existing data 
 cat("\f") # clear console
 dev.off() # clear plots
 options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation


 library(MASS)



 #
 # simulate population
 #

 mu <- 100  # mean 4000
 sd <- 10 # standard deviation 3 x mean



 # simulate costs based on parameter assumptions above
 set.seed(1234567)
 y <- rnorm(10000,100,10)

 # check descriptive results
 summary(y)
 sd(y)
 hist(y, breaks = 50)



 # data frame
 ID <- seq(1,10000)

 df <- data.frame(ID,y)



 #
 # simualte sampling
 # 


 Sn = 30
 set.seed(9876) # make this repeatable 1234567

 xbar <- c()
 stderr <- c()
 CI_Lower <- c()
 CI_Upper <- c()
 repNum <- c()

 for (i in 1:500) {
  sample <- df[sample(nrow(df), Sn, replace = F), ] # sample from treatment group
  
  est <- mean(sample$y)
  se <- sd(sample$y)/sqrt(Sn) # SE
  #LB <- est - 1.96*se
  #UB <- est + 1.96*se
  LB <- est - se # just look at error bars
  UB <- est + se # just look at error bars
  xbar <- c(xbar,est)
  stderr <- c(stderr,se)
  CI_Lower <- c(CI_Lower, LB) 
  CI_Upper <- c(CI_Upper, UB) 
  repNum <- c(repNum,i) # get p-value from this bs sample
  print(cbind("Iteration:", i)) # comment this out to stop printing to log
 }


 rpt <- data.frame(repNum,xbar,stderr,CI_Lower,CI_Upper)

 rm(LB,UB,se,est,i,sample) # cleanup


 k <- 100 # population parameter we are estimating


 # report the simulation results
 rpt$flag <- ifelse((rpt$CI_Lower <= k & rpt$CI_Upper >= k),1,0) # interval contains 'k'
 rpt$sig <- ifelse((rpt$CI_Lower <= 0 &  rpt$CI_Upper >= 0),0,1) # 1 if statistically different from 0
 rpt$diffLB <- abs(k - rpt$CI_Lower) # difference between k and CI lower bound
 rpt$diffUB <- abs(k - rpt$CI_Upper) # difference between k and CI upper bound
 rpt$diffEst <- abs(k - rpt$xbar) # difference between point estimate and k
 rpt$est_closer <- ifelse(rpt$diffEst < rpt$diffLB  & rpt$diffEst < rpt$diffUB,1,0) # flag when point estimate is closer to k
 summary(rpt) # check

 sd(rpt$xbar)

 hist(rpt$xbar, breaks = 50)


 # visualize

 plot(rpt$xbar, pch=20, ylim=range(pretty(c(rpt$CI_Lower, rpt$CI_Upper))),
     xlab='', ylab='', las=1, cex.axis=0.8, cex=.5, xaxt='n')
 segments(seq_len(nrow(rpt)), rpt$CI_Lower, y1= rpt$CI_Upper, lend=1)
 segments(seq_len(nrow(rpt)), rpt$CI_Lower, y1= rpt$CI_Upper, lwd=1, lend=1)
 abline(h = 0)

 # order by estimate
 tmp <- rpt[order(rpt$xbar),]
 plot(tmp$xbar, pch=20, ylim=range(pretty(c(tmp$CI_Lower, tmp$CI_Upper))),
     xlab='', ylab='', las=1, cex.axis=0.8, cex=.5, xaxt='n')
 segments(seq_len(nrow(tmp)), tmp$CI_Lower, y1= tmp$CI_Upper, lend=1)
 segments(seq_len(nrow(tmp)), tmp$CI_Lower, y1= tmp$CI_Upper, lwd=1, lend=1)
 abline(h = 0)\
 abline(h = 100, col="green")
	#*-----------------------------------------------------------------
	# \| PROGRAM NAME: fun with CIs.r
	# \| DATE: 7/16/22
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE:
	# *----------------------------------------------------------------
	# \| PURPOSE: basic CI simulation
	# *----------------------------------------------------------------



	rm(list=ls()) # get rid of any existing data
	cat("\f") # clear console
	dev.off() # clear plots
	options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation


	library(MASS)



	#
	# simulate population
	#

	mu <- 100 # mean 4000
	sd <- 10 # standard deviation 3 x mean



	# simulate costs based on parameter assumptions above
	set.seed(1234567)
	y <- rnorm(10000,100,10)

	# check descriptive results
	summary(y)
	sd(y)
	hist(y, breaks = 50)



	# data frame
	ID <- seq(1,10000)

	df <- data.frame(ID,y)



	#
	# simualte sampling
	#


	Sn = 30
	set.seed(9876) # make this repeatable 1234567

	xbar <- c()
	stderr <- c()
	CI_Lower <- c()
	CI_Upper <- c()
	repNum <- c()

	for (i in 1:500) {
	sample <- df[sample(nrow(df), Sn, replace = F), ] # sample from treatment group

	est <- mean(sample$y)
	se <- sd(sample$y)/sqrt(Sn) # SE
	#LB <- est - 1.96*se
	#UB <- est + 1.96*se
	LB <- est - se # just look at error bars
	UB <- est + se # just look at error bars
	xbar <- c(xbar,est)
	stderr <- c(stderr,se)
	CI_Lower <- c(CI_Lower, LB)
	CI_Upper <- c(CI_Upper, UB)
	repNum <- c(repNum,i) # get p-value from this bs sample
	print(cbind("Iteration:", i)) # comment this out to stop printing to log
	}


	rpt <- data.frame(repNum,xbar,stderr,CI_Lower,CI_Upper)

	rm(LB,UB,se,est,i,sample) # cleanup


	k <- 100 # population parameter we are estimating


	# report the simulation results
	rpt$flag <- ifelse((rpt$CI_Lower <= k & rpt$CI_Upper >= k),1,0) # interval contains 'k'
	rpt$sig <- ifelse((rpt$CI_Lower <= 0 & rpt$CI_Upper >= 0),0,1) # 1 if statistically different from 0
	rpt$diffLB <- abs(k - rpt$CI_Lower) # difference between k and CI lower bound
	rpt$diffUB <- abs(k - rpt$CI_Upper) # difference between k and CI upper bound
	rpt$diffEst <- abs(k - rpt$xbar) # difference between point estimate and k
	rpt$est_closer <- ifelse(rpt$diffEst < rpt$diffLB & rpt$diffEst < rpt$diffUB,1,0) # flag when point estimate is closer to k
	summary(rpt) # check

	sd(rpt$xbar)

	hist(rpt$xbar, breaks = 50)


	# visualize

	plot(rpt$xbar, pch=20, ylim=range(pretty(c(rpt$CI_Lower, rpt$CI_Upper))),
	xlab='', ylab='', las=1, cex.axis=0.8, cex=.5, xaxt='n')
	segments(seq_len(nrow(rpt)), rpt$CI_Lower, y1= rpt$CI_Upper, lend=1)
	segments(seq_len(nrow(rpt)), rpt$CI_Lower, y1= rpt$CI_Upper, lwd=1, lend=1)
	abline(h = 0)

	# order by estimate
	tmp <- rpt[order(rpt$xbar),]
	plot(tmp$xbar, pch=20, ylim=range(pretty(c(tmp$CI_Lower, tmp$CI_Upper))),
	xlab='', ylab='', las=1, cex.axis=0.8, cex=.5, xaxt='n')
	segments(seq_len(nrow(tmp)), tmp$CI_Lower, y1= tmp$CI_Upper, lend=1)
	segments(seq_len(nrow(tmp)), tmp$CI_Lower, y1= tmp$CI_Upper, lwd=1, lend=1)
	abline(h = 0)\
	abline(h = 100, col="green")