BioSciEconomist · October 15, 2019 01:37
diff --git a/randomization inference demo.R b/randomization inference demo.R
 # *-----------------------------------------------------------------
 # | PROGRAM NAME: randomization inference demo.R
 # | DATE: 10/14/19 
 # | CREATED BY: MATT BOGARD 
 # | PROJECT FILE:         
 # *----------------------------------------------------------------
 # | PURPOSE: demo of randomization inference based on 'Field Experimetns: Design, Analysis, and Interpretation
 # | by Alan S. Gerber and Donald P. Green (Ch3)
 # *----------------------------------------------------------------

 # create some data
 Y <- c(150,140,145,137,141,145,149,153,157,161) 
 Z <- c(1,1,0,0,0,0,1,0,1,1)

 hist(Y) # distribution of outcome

 t.test(Y~Z,var.equal=FALSE,alternative= "two.sided", data = yield_data) # t-test 
 summary(lm(Y ~ Z)) # regression

 numiter <- 100000 # number of RI iterations. Use more for greater precision, fewer for greater speed.
 set.seed(1234567) # set random number seed (so that results can be replicated)


 # formulate our estimator (in this case differences in means)
 denom <- var(Z) 
 tau <- cov(Y,Z)/denom  # define regression based treatment effect outside of lm function

 tauRI <- rep(NA,numiter) # initialize vector for results

 for (i in 1:numiter) {
  
  Zri <- sample(Z) # sample from the treatment group assignment vector (this is where randomization happens)
  tauRI[i] <- cov(Y,Zri)/denom # calculate estimate
  
  if (i %% round(numiter/10) == 0) cat("Iteration",i,"of",numiter,"\n")
 }


 hist(tauRI) # RI distribution 

 # calculate p-value
 pvalue <- round(sum(tauRI >= tau)/numiter,3) + round(sum(tauRI <= -tau)/numiter,3)  # two sided
 print(pvalue)

 rm(tau,tauRI, Zri,denom,Y,X)

 #----------------------------
 # basic bootstrap confidence interval
 #-----------------------------

 tmp <- data.frame(Y,Z) # create data frame from data above

 # regression (for comparison)
 summary(lm(Y ~ Z, data = tmp))

 # boot strap for regression (this may not be accurate with such small sample)
 denom <- var(tmp$Z) # for estimator below
 set.seed(1234567) 
 bstrap <- c() # initialize vector for bootstrapped estimates
 for (i in 1:10000) {
  newsample <- tmp[sample(nrow(tmp), 10, replace = T), ] # sample from data with replacement (this is the bs sample)
  bstrap <- c(bstrap, cov(newsample$Y,newsample$Z)/denom) # calculate estimate from bs sample
 }

 hist(bstrap) # view bs distribution
 summary(bstrap) # summary stats

 sd(bstrap, na.rm = TRUE)  # standard error
 quantile(bstrap, c(.025,.975),na.rm = TRUE)  # 95% bootstrapped CI

 rm(bstrap,newsample) # cleanup
	# *-----------------------------------------------------------------
	# \| PROGRAM NAME: randomization inference demo.R
	# \| DATE: 10/14/19
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE:
	# *----------------------------------------------------------------
	# \| PURPOSE: demo of randomization inference based on 'Field Experimetns: Design, Analysis, and Interpretation
	# \| by Alan S. Gerber and Donald P. Green (Ch3)
	# *----------------------------------------------------------------

	# create some data
	Y <- c(150,140,145,137,141,145,149,153,157,161)
	Z <- c(1,1,0,0,0,0,1,0,1,1)

	hist(Y) # distribution of outcome

	t.test(Y~Z,var.equal=FALSE,alternative= "two.sided", data = yield_data) # t-test
	summary(lm(Y ~ Z)) # regression

	numiter <- 100000 # number of RI iterations. Use more for greater precision, fewer for greater speed.
	set.seed(1234567) # set random number seed (so that results can be replicated)


	# formulate our estimator (in this case differences in means)
	denom <- var(Z)
	tau <- cov(Y,Z)/denom # define regression based treatment effect outside of lm function

	tauRI <- rep(NA,numiter) # initialize vector for results

	for (i in 1:numiter) {

	Zri <- sample(Z) # sample from the treatment group assignment vector (this is where randomization happens)
	tauRI[i] <- cov(Y,Zri)/denom # calculate estimate

	if (i %% round(numiter/10) == 0) cat("Iteration",i,"of",numiter,"\n")
	}


	hist(tauRI) # RI distribution

	# calculate p-value
	pvalue <- round(sum(tauRI >= tau)/numiter,3) + round(sum(tauRI <= -tau)/numiter,3) # two sided
	print(pvalue)

	rm(tau,tauRI, Zri,denom,Y,X)

	#----------------------------
	# basic bootstrap confidence interval
	#-----------------------------

	tmp <- data.frame(Y,Z) # create data frame from data above

	# regression (for comparison)
	summary(lm(Y ~ Z, data = tmp))

	# boot strap for regression (this may not be accurate with such small sample)
	denom <- var(tmp$Z) # for estimator below
	set.seed(1234567)
	bstrap <- c() # initialize vector for bootstrapped estimates
	for (i in 1:10000) {
	newsample <- tmp[sample(nrow(tmp), 10, replace = T), ] # sample from data with replacement (this is the bs sample)
	bstrap <- c(bstrap, cov(newsample$Y,newsample$Z)/denom) # calculate estimate from bs sample
	}

	hist(bstrap) # view bs distribution
	summary(bstrap) # summary stats

	sd(bstrap, na.rm = TRUE) # standard error
	quantile(bstrap, c(.025,.975),na.rm = TRUE) # 95% bootstrapped CI

	rm(bstrap,newsample) # cleanup