Created
October 29, 2019 19:21
-
-
Save viniciusmss/aa1e795b970052405592221276a2e5c6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ### Code heavily drawn from one of MIT's 17.802 Quantitative Research Methods II | |
| ### homework solutions. Courtesy of Jens Hainmueller. | |
| # ------ First steps! --------- | |
| # Importing the data | |
| library(readr) | |
| foo <- read_csv("https://bit.ly/2SKjUn2") | |
| # Number of observations | |
| nrow(foo) | |
| # Number of registered voters | |
| sum(foo$reg_voters) | |
| # Bootstrapped balance test | |
| storage1 <- NA | |
| set.seed(12345) | |
| for(i in 1:10000) { | |
| storage1[i] <- | |
| mean(foo$reg_voters[sample(c(2,4,6,8,10,12,14,16), 8, replace = T)]) - | |
| mean(foo$reg_voters[sample(c(1,3,5,7,9,11,13,15), 8, replace = T)]) | |
| } | |
| quantile(storage1, c(0.025, 0.975)) | |
| # ATE | |
| mean(d$vote_pop[d$treat == "pub.pol"]) - mean(d$vote_pop[d$treat == "client"]) | |
| # ------ YOU DID IT! Great job!!! --------- | |
| # Linear regression | |
| lm_basic <- lm(d$vote_pop~d$treat) | |
| ATE_basic <- lm_basic$coefficients[2] | |
| # Confidence interval | |
| confint(lm_basic) | |
| # ------ Randomization Inference (no blocking) --------- | |
| fisherfun_noblock <- function(nulleffect = 0, Ys = foo$vote_pop, | |
| diffmeans = -0.1575, | |
| sims = 20000) { | |
| storage.vector <- NA | |
| for(i in 1:sims) { | |
| Y0s <- c(Ys[1], Ys[2]-nulleffect, | |
| Ys[3], Ys[4]-nulleffect, | |
| Ys[5], Ys[6]-nulleffect, | |
| Ys[7], Ys[8]-nulleffect, | |
| Ys[9], Ys[10]-nulleffect, | |
| Ys[11], Ys[12]-nulleffect, | |
| Ys[13], Ys[14]-nulleffect, | |
| Ys[15], Ys[16]-nulleffect | |
| ) | |
| Y1s <- | |
| c(Ys[1]+nulleffect, Ys[2], | |
| Ys[3]+nulleffect, Ys[4], | |
| Ys[5]+nulleffect, Ys[6], | |
| Ys[7]+nulleffect, Ys[8], | |
| Ys[9]+nulleffect, Ys[10], | |
| Ys[11]+nulleffect, Ys[12], | |
| Ys[13]+nulleffect, Ys[14], | |
| Ys[15]+nulleffect, Ys[16] | |
| ) | |
| # NOW, TIME TO SIMULATE THE ASSIGNMENTS--DO SO BELOW... | |
| which_treated <- sample(1:16, 8) | |
| which_controls <- (1:16)[-which_treated] | |
| storage.vector[i] <- mean(Y1s[which_treated]) - mean(Y0s[which_controls]) | |
| } | |
| return(storage.vector) | |
| } | |
| set.seed(112) | |
| effect_noblock <- fisherfun_noblock() | |
| pval <- mean(effect_noblock < ATE_basic) | |
| # Plot the histogram | |
| hist(effect_noblock, breaks=25, main="Histogram of this_effect", xlab="this_effect") | |
| abline(v=ATE_basic,lty=1, col="red") | |
| mtext(sprintf("The p-value is: %.3f", pval), 3) | |
| # ---- This is anothe way to do it, which works for the sharp null only ---- | |
| # Get treatment vector as a dummy | |
| t=numeric(length(d$treat)) | |
| t[d$treat=="pub.pol"]=1 | |
| numiters=10000 | |
| this_effect=numeric(numiters) | |
| #Randomized distribution w/o blocking | |
| for (j in 1:numiters){ | |
| this_t=sample(t,16,replace=F) | |
| this_effect[j]=mean(d$vote_pop[this_t==1])-mean(d$vote_pop[this_t==0]) | |
| } | |
| mean(this_effect < ATE_basic) | |
| # ------ Randomization Inference (with pairwise blocking) --------- | |
| # I'm going to show you several ways of doing it so you can compare | |
| # and conclude that they are all doing the same thing. | |
| # ------- Simulation -- version 1 | |
| numiters=1000 | |
| this_effect_blocked=numeric(numiters) | |
| this_t=numeric(16) | |
| for (j in 1:numiters){ | |
| first_in_pair=rbinom(8,1,.5) | |
| second_in_pair=1-first_in_pair | |
| this_t[seq(1,15,2)]=first_in_pair | |
| this_t[seq(2,16,2)]=second_in_pair | |
| thisout=lm(d$vote_pop~this_t+d$block) | |
| this_effect_blocked[j]=thisout$coefficients[2] | |
| } | |
| pval <- mean(this_effect_blocked < ATE_basic) | |
| # Plotting | |
| hist(this_effect_blocked, breaks=25, main="Distribution of Randomized Effects", xlab="ATE") | |
| abline(v=ATE_basic,lty=1, col="red") | |
| pval <- mean(this_effect_blocked < ATE_basic) | |
| mtext(sprintf("The p-value is: %.3f", pval), 3) | |
| # ------- Simulation -- version 2 | |
| # This is the version Prof. Diamon shared with you https://tinyurl.com/y2gzgxja | |
| fisherfunction <- function(nulleffect = 0, Ys = foo$vote_pop, | |
| diffmeans = -0.1575, | |
| sims = 20000) { | |
| storage.vector <- NA | |
| for(i in 1:sims) { | |
| Y0s <- c(Ys[1], Ys[2]-nulleffect, | |
| Ys[3], Ys[4]-nulleffect, | |
| Ys[5], Ys[6]-nulleffect, | |
| Ys[7], Ys[8]-nulleffect, | |
| Ys[9], Ys[10]-nulleffect, | |
| Ys[11], Ys[12]-nulleffect, | |
| Ys[13], Ys[14]-nulleffect, | |
| Ys[15], Ys[16]-nulleffect | |
| ) | |
| Y1s <- | |
| c(Ys[1]+nulleffect, Ys[2], | |
| Ys[3]+nulleffect, Ys[4], | |
| Ys[5]+nulleffect, Ys[6], | |
| Ys[7]+nulleffect, Ys[8], | |
| Ys[9]+nulleffect, Ys[10], | |
| Ys[11]+nulleffect, Ys[12], | |
| Ys[13]+nulleffect, Ys[14], | |
| Ys[15]+nulleffect, Ys[16] | |
| ) | |
| which_treated <- c(sample(c(1,2), 1), | |
| sample(c(3,4), 1), | |
| sample(c(5,6), 1), | |
| sample(c(7,8), 1), | |
| sample(c(9,10), 1), | |
| sample(c(11,12), 1), | |
| sample(c(13,14), 1), | |
| sample(c(15,16), 1)) | |
| which_controls <- c(1:16)[-which_treated] | |
| storage.vector[i] <- mean(Y1s[which_treated]) - mean(Y0s[which_controls]) | |
| } | |
| empirical.pvalue <- sum(storage.vector <= diffmeans)/sims | |
| return(storage.vector) | |
| } | |
| this_effect_blocked <- fisherfunction(nulleffect = 0) # use this to obtain the p-value | |
| pval <- mean(this_effect_blocked < ATE_basic) | |
| # Plotting | |
| hist(this_effect_blocked, breaks=25, main="Distribution of Randomized Effects", xlab="ATE") | |
| abline(v=ATE_basic,lty=1, col="red") | |
| pval <- mean(this_effect_blocked < ATE_basic) | |
| mtext(sprintf("The p-value is: %.3f", pval), 3) | |
| # ------- Analytical -- version 3 | |
| # Create 256 rows, each with possible assignments for first village | |
| # in each pair | |
| # install.packages("e1071") | |
| library(e1071) | |
| first_in_pair=bincombinations(8) | |
| second_in_pair=1-first_in_pair | |
| # Storage vectors | |
| this_effect_blocked=numeric(256) | |
| this_t=numeric(16) | |
| for (j in 1:nrow(first_in_pair)){ | |
| this_t[seq(1,15,2)]=first_in_pair[j,] | |
| this_t[seq(2,16,2)]=second_in_pair[j,] | |
| thisout=lm(d$vote_pop~this_t+d$block) | |
| this_effect_blocked[j]=thisout$coefficients[2] | |
| } | |
| # Plotting: | |
| hist(this_effect_blocked, breaks=25, main="Distribution of Randomized Effects", xlab="ATE") | |
| abline(v=ATE_basic,lty=1, col="red") | |
| pval <- mean(this_effect_blocked < ATE_basic) | |
| mtext(sprintf("The p-value is: %.3f", pval), 3) | |
| # ------ Fisher Interval! Bonus Round! --------- | |
| stor2 <- NA | |
| n = 1 | |
| # Change fisher function from return(storage.vector) to | |
| # return(storage.vector) | |
| for(i in -35:5/100) { | |
| stor2[n] <- fisherfunction(nulleffect = i) | |
| n = n+1} | |
| plot(x = -35:5/100, y = stor2, type = "l", xlab = "additive sharp null", | |
| ylab = "p-value", col = "blue", lwd = 5, | |
| main = "Relationship Between Sharp Null and P-Value \n(Fisher Exact Test)", ylim = c(0, 1)) | |
| abline(h = 0.05, lty = 3, col = "gray", lwd = 2) | |
| abline(h = 0.95, lty = 3, col = "gray", lwd = 2) | |
| abline(h = 0.025, lty = 3, col = "purple", lwd = 2) | |
| abline(h = 0.975, lty = 3, col = "purple", lwd = 2) | |
| abline(h = 0.50, lty = 3, col = "pink", lwd = 2) | |
| abline(v = , col = "pink", lwd = 2, lty = 2) | |
| abline(v = , col = "purple", lwd = 2, lty = 2) | |
| abline(v = , col = "gray", lwd = 2,lty = 2) | |
| abline(v = , col = "purple", lwd = 2, lty = 2) | |
| abline(v = ,col = "gray", lwd = 2,lty = 2) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment