Last active
November 8, 2019 13:44
-
-
Save BioSciEconomist/eeb0b5ebde392b01ef66976758ddafd2 to your computer and use it in GitHub Desktop.
Basic bootstrapping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *----------------------------------------------------------------- | |
# | PROGRAM NAME: basic boot.R | |
# | DATE: 11/9/19 | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: | |
# *---------------------------------------------------------------- | |
# | PURPOSE: demo R boot package and home grown bootsrapping for differences in means and difference-in-differences | |
# *---------------------------------------------------------------- | |
#--------------------------------- | |
# baseline OLS model | |
#--------------------------------- | |
# set linear model baseline | |
summary(lm(ERVisits ~ treat, data = df_chrt)) | |
# 95% CI | |
est <- -.266 | |
std.err <- .363 | |
LL = est - 1.96 * std.err | |
UB = est + 1.96*std.err | |
ci <- cbind(Estmiate = est, "SE" = std.err,LL,UB) | |
print(ci) | |
rm(est,std.err,LL,UB) | |
#--------------------------------- | |
# roll your own bs | |
#--------------------------------- | |
# define treatment and control groups for separate bootstraps | |
treat <- df_chrt_match[df_chrt$treat == 1,] | |
ctrl <- df_chrt_match[df_chrt$treat == 0,] | |
t0 <- mean(treat$ERVisits) - mean(ctrl$ERVisits) # point estimate | |
set.seed(1234567) | |
bstrap <- c() # initialize vector for bootstrapped estimates | |
for (i in 1:10000) { | |
sample1 <- treat[sample(nrow(treat), nrow(treat), replace = T), ] # sample from treatment group | |
sample2 <- ctrl[sample(nrow(ctrl), nrow(ctrl), replace = T), ] # sample from control group | |
# sample from data with replacement (this is the bs sample) | |
bstrap <- c(bstrap, (mean(sample1$ERVisits) - mean(sample2$ERVisits))) # calculate estimate from bs sample | |
} | |
hist(bstrap) # view bs distribution | |
summary(bstrap) # summary stats | |
sd(bstrap, na.rm = TRUE) # standard error | |
quantile(bstrap, c(.025,.975),na.rm = TRUE) # 95% bootstrapped CI | |
results <- data.frame(bstrap) | |
b.under.H0 <- results$bstrap - mean(results$bstrap) # p-value based on bs dist | |
mean(abs(b.under.H0) > abs(t0)) | |
rm(bstrap,treat, ctrl, sample1, sample2) # cleanup | |
################################################## | |
# compare to basic boot from boot package | |
################################################## | |
# function to bootstrap the dummy variable regression estimate of differences in means | |
# this is different from above where we were pulling separate samples from the treatment and control groups | |
diff <- function(formula, data, indices) { | |
d <- data[indices,] # allows boot to select sample | |
fit <- lm(formula, data=d) | |
return(coef(fit)[2]) # get the coefficient for treatment (this gives mean differences) | |
} | |
# bootstrapping with 10000 replications | |
results <- boot(data=df_chrt_match, statistic=diff, | |
R=10000, formula=ERVisits ~ treat) | |
# view results | |
plot(results) | |
# get 95% bs confidence interval | |
boot.ci(results, type="all") # matches percentile method from above & same standard error we got above | |
b.under.H0 <- results$t - mean(results$t) | |
mean(abs(b.under.H0) > abs(results$t0)) # same p-value as above | |
#----------------------------------------- | |
# example for difference-in-differences via R bloggers | |
#--------------------------------------- | |
# Reference: https://www.r-bloggers.com/bootstrapping-standard-errors-for-difference-in-differences-estimation-with-r/ | |
run_DiD <- function(my_data, indices){ | |
d <- my_data[indices,] | |
return( | |
mean(d$rprice[d$year==1981 & d$nearinc==1]) - | |
mean(d$rprice[d$year==1981 & d$nearinc==0]) - | |
(mean(d$rprice[d$year==1978 & d$nearinc==1]) - | |
mean(d$rprice[d$year==1978 & d$nearinc==0])) | |
) | |
} | |
boot_est <- boot(data, run_DiD, R=1000, parallel="multicore", ncpus = 2) | |
#### using our example above: (note data was restructured | |
run_DiD <- function(my_data, indices){ | |
d <- my_data[indices,] | |
return( | |
(mean(d$ERVisits[d$treat == 1]) - mean(d$ERVisits[d$treat == 1])) - | |
(mean(d$ERVisits[d$treat == 0]) - mean(d$ERVisits[d$treat == 0]) ) | |
) | |
} | |
# bootstrapping with 10000 replications | |
set.seed(123456) | |
results <- boot(data=df_chrt, statistic=run_DiD, | |
R=10000) | |
print(results) | |
# view results | |
plot(results) | |
# get 95% bs confidence interval | |
boot.ci(results, type="all") | |
# adhoc bs p-value | |
b.under.H0 <- results$t - mean(results$t) | |
mean(abs(b.under.H0) > abs(results$t0)) | |
#----------------------------------------- | |
# roll your own | |
#--------------------------------------- | |
treat <- df_chrt[df_chrt$treat == 1,] | |
ctrl <- df_chrt[df_chrt$treat == 0,] | |
t0 <- (mean(treat$Post_ERVisits) - mean(treat$Pre_ERVisits)) - | |
(mean(ctrl$Post_ERVisits) - mean(ctrl$Pre_ERVisits) ) | |
set.seed(1234567) | |
bstrap <- c() # initialize vector for bootstrapped estimates | |
for (i in 1:10000) { | |
sample1 <- treat[sample(nrow(treat), nrow(treat), replace = T), ] # sample from treatment group | |
sample2 <- ctrl[sample(nrow(ctrl), nrow(ctrl), replace = T), ] # sample from control group | |
# sample from data with replacement (this is the bs sample) | |
bstrap <- c(bstrap,(mean(sample1$Post_ERVisits) - mean(sample1$Pre_ERVisits)) - | |
(mean(sample2$Post_ERVisits) - mean(sample2$Pre_ERVisits) ) ) # calculate estimate from bs sample | |
} | |
hist(bstrap) # view bs distribution | |
summary(bstrap) # summary stats | |
sd(bstrap, na.rm = TRUE) # standard error | |
quantile(bstrap, c(.025,.975),na.rm = TRUE) # 95% bootstrapped CI | |
bsresults <- data.frame(bstrap) | |
b.under.H0 <- bsresults$bstrap - mean(bsresults$bstrap) # p-value based on bs dist | |
mean(abs(b.under.H0) > abs(t0)) | |
rm(bstrap,treat, ctrl, sample1, sample2) # cleanup | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment