Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Last active October 15, 2019 01:37
Show Gist options
  • Save BioSciEconomist/ae3047f76fec34772a8ff02f09cd0747 to your computer and use it in GitHub Desktop.
Save BioSciEconomist/ae3047f76fec34772a8ff02f09cd0747 to your computer and use it in GitHub Desktop.
Example of basic randomization inference in R
# *-----------------------------------------------------------------
# | PROGRAM NAME: randomization inference demo.R
# | DATE: 10/14/19
# | CREATED BY: MATT BOGARD
# | PROJECT FILE:
# *----------------------------------------------------------------
# | PURPOSE: demo of randomization inference based on 'Field Experimetns: Design, Analysis, and Interpretation
# | by Alan S. Gerber and Donald P. Green (Ch3)
# *----------------------------------------------------------------
# create some data
Y <- c(150,140,145,137,141,145,149,153,157,161)
Z <- c(1,1,0,0,0,0,1,0,1,1)
hist(Y) # distribution of outcome
t.test(Y~Z,var.equal=FALSE,alternative= "two.sided", data = yield_data) # t-test
summary(lm(Y ~ Z)) # regression
numiter <- 100000 # number of RI iterations. Use more for greater precision, fewer for greater speed.
set.seed(1234567) # set random number seed (so that results can be replicated)
# formulate our estimator (in this case differences in means)
denom <- var(Z)
tau <- cov(Y,Z)/denom # define regression based treatment effect outside of lm function
tauRI <- rep(NA,numiter) # initialize vector for results
for (i in 1:numiter) {
Zri <- sample(Z) # sample from the treatment group assignment vector (this is where randomization happens)
tauRI[i] <- cov(Y,Zri)/denom # calculate estimate
if (i %% round(numiter/10) == 0) cat("Iteration",i,"of",numiter,"\n")
}
hist(tauRI) # RI distribution
# calculate p-value
pvalue <- round(sum(tauRI >= tau)/numiter,3) + round(sum(tauRI <= -tau)/numiter,3) # two sided
print(pvalue)
rm(tau,tauRI, Zri,denom,Y,X)
#----------------------------
# basic bootstrap confidence interval
#-----------------------------
tmp <- data.frame(Y,Z) # create data frame from data above
# regression (for comparison)
summary(lm(Y ~ Z, data = tmp))
# boot strap for regression (this may not be accurate with such small sample)
denom <- var(tmp$Z) # for estimator below
set.seed(1234567)
bstrap <- c() # initialize vector for bootstrapped estimates
for (i in 1:10000) {
newsample <- tmp[sample(nrow(tmp), 10, replace = T), ] # sample from data with replacement (this is the bs sample)
bstrap <- c(bstrap, cov(newsample$Y,newsample$Z)/denom) # calculate estimate from bs sample
}
hist(bstrap) # view bs distribution
summary(bstrap) # summary stats
sd(bstrap, na.rm = TRUE) # standard error
quantile(bstrap, c(.025,.975),na.rm = TRUE) # 95% bootstrapped CI
rm(bstrap,newsample) # cleanup
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment