Skip to content

Instantly share code, notes, and snippets.

@viniciusmss
Created October 29, 2019 19:21
Show Gist options
  • Select an option

  • Save viniciusmss/aa1e795b970052405592221276a2e5c6 to your computer and use it in GitHub Desktop.

Select an option

Save viniciusmss/aa1e795b970052405592221276a2e5c6 to your computer and use it in GitHub Desktop.
### Code heavily drawn from one of MIT's 17.802 Quantitative Research Methods II
### homework solutions. Courtesy of Jens Hainmueller.
# ------ First steps! ---------
# Importing the data
library(readr)
foo <- read_csv("https://bit.ly/2SKjUn2")
# Number of observations
nrow(foo)
# Number of registered voters
sum(foo$reg_voters)
# Bootstrapped balance test
storage1 <- NA
set.seed(12345)
for(i in 1:10000) {
storage1[i] <-
mean(foo$reg_voters[sample(c(2,4,6,8,10,12,14,16), 8, replace = T)]) -
mean(foo$reg_voters[sample(c(1,3,5,7,9,11,13,15), 8, replace = T)])
}
quantile(storage1, c(0.025, 0.975))
# ATE
mean(d$vote_pop[d$treat == "pub.pol"]) - mean(d$vote_pop[d$treat == "client"])
# ------ YOU DID IT! Great job!!! ---------
# Linear regression
lm_basic <- lm(d$vote_pop~d$treat)
ATE_basic <- lm_basic$coefficients[2]
# Confidence interval
confint(lm_basic)
# ------ Randomization Inference (no blocking) ---------
fisherfun_noblock <- function(nulleffect = 0, Ys = foo$vote_pop,
diffmeans = -0.1575,
sims = 20000) {
storage.vector <- NA
for(i in 1:sims) {
Y0s <- c(Ys[1], Ys[2]-nulleffect,
Ys[3], Ys[4]-nulleffect,
Ys[5], Ys[6]-nulleffect,
Ys[7], Ys[8]-nulleffect,
Ys[9], Ys[10]-nulleffect,
Ys[11], Ys[12]-nulleffect,
Ys[13], Ys[14]-nulleffect,
Ys[15], Ys[16]-nulleffect
)
Y1s <-
c(Ys[1]+nulleffect, Ys[2],
Ys[3]+nulleffect, Ys[4],
Ys[5]+nulleffect, Ys[6],
Ys[7]+nulleffect, Ys[8],
Ys[9]+nulleffect, Ys[10],
Ys[11]+nulleffect, Ys[12],
Ys[13]+nulleffect, Ys[14],
Ys[15]+nulleffect, Ys[16]
)
# NOW, TIME TO SIMULATE THE ASSIGNMENTS--DO SO BELOW...
which_treated <- sample(1:16, 8)
which_controls <- (1:16)[-which_treated]
storage.vector[i] <- mean(Y1s[which_treated]) - mean(Y0s[which_controls])
}
return(storage.vector)
}
set.seed(112)
effect_noblock <- fisherfun_noblock()
pval <- mean(effect_noblock < ATE_basic)
# Plot the histogram
hist(effect_noblock, breaks=25, main="Histogram of this_effect", xlab="this_effect")
abline(v=ATE_basic,lty=1, col="red")
mtext(sprintf("The p-value is: %.3f", pval), 3)
# ---- This is anothe way to do it, which works for the sharp null only ----
# Get treatment vector as a dummy
t=numeric(length(d$treat))
t[d$treat=="pub.pol"]=1
numiters=10000
this_effect=numeric(numiters)
#Randomized distribution w/o blocking
for (j in 1:numiters){
this_t=sample(t,16,replace=F)
this_effect[j]=mean(d$vote_pop[this_t==1])-mean(d$vote_pop[this_t==0])
}
mean(this_effect < ATE_basic)
# ------ Randomization Inference (with pairwise blocking) ---------
# I'm going to show you several ways of doing it so you can compare
# and conclude that they are all doing the same thing.
# ------- Simulation -- version 1
numiters=1000
this_effect_blocked=numeric(numiters)
this_t=numeric(16)
for (j in 1:numiters){
first_in_pair=rbinom(8,1,.5)
second_in_pair=1-first_in_pair
this_t[seq(1,15,2)]=first_in_pair
this_t[seq(2,16,2)]=second_in_pair
thisout=lm(d$vote_pop~this_t+d$block)
this_effect_blocked[j]=thisout$coefficients[2]
}
pval <- mean(this_effect_blocked < ATE_basic)
# Plotting
hist(this_effect_blocked, breaks=25, main="Distribution of Randomized Effects", xlab="ATE")
abline(v=ATE_basic,lty=1, col="red")
pval <- mean(this_effect_blocked < ATE_basic)
mtext(sprintf("The p-value is: %.3f", pval), 3)
# ------- Simulation -- version 2
# This is the version Prof. Diamon shared with you https://tinyurl.com/y2gzgxja
fisherfunction <- function(nulleffect = 0, Ys = foo$vote_pop,
diffmeans = -0.1575,
sims = 20000) {
storage.vector <- NA
for(i in 1:sims) {
Y0s <- c(Ys[1], Ys[2]-nulleffect,
Ys[3], Ys[4]-nulleffect,
Ys[5], Ys[6]-nulleffect,
Ys[7], Ys[8]-nulleffect,
Ys[9], Ys[10]-nulleffect,
Ys[11], Ys[12]-nulleffect,
Ys[13], Ys[14]-nulleffect,
Ys[15], Ys[16]-nulleffect
)
Y1s <-
c(Ys[1]+nulleffect, Ys[2],
Ys[3]+nulleffect, Ys[4],
Ys[5]+nulleffect, Ys[6],
Ys[7]+nulleffect, Ys[8],
Ys[9]+nulleffect, Ys[10],
Ys[11]+nulleffect, Ys[12],
Ys[13]+nulleffect, Ys[14],
Ys[15]+nulleffect, Ys[16]
)
which_treated <- c(sample(c(1,2), 1),
sample(c(3,4), 1),
sample(c(5,6), 1),
sample(c(7,8), 1),
sample(c(9,10), 1),
sample(c(11,12), 1),
sample(c(13,14), 1),
sample(c(15,16), 1))
which_controls <- c(1:16)[-which_treated]
storage.vector[i] <- mean(Y1s[which_treated]) - mean(Y0s[which_controls])
}
empirical.pvalue <- sum(storage.vector <= diffmeans)/sims
return(storage.vector)
}
this_effect_blocked <- fisherfunction(nulleffect = 0) # use this to obtain the p-value
pval <- mean(this_effect_blocked < ATE_basic)
# Plotting
hist(this_effect_blocked, breaks=25, main="Distribution of Randomized Effects", xlab="ATE")
abline(v=ATE_basic,lty=1, col="red")
pval <- mean(this_effect_blocked < ATE_basic)
mtext(sprintf("The p-value is: %.3f", pval), 3)
# ------- Analytical -- version 3
# Create 256 rows, each with possible assignments for first village
# in each pair
# install.packages("e1071")
library(e1071)
first_in_pair=bincombinations(8)
second_in_pair=1-first_in_pair
# Storage vectors
this_effect_blocked=numeric(256)
this_t=numeric(16)
for (j in 1:nrow(first_in_pair)){
this_t[seq(1,15,2)]=first_in_pair[j,]
this_t[seq(2,16,2)]=second_in_pair[j,]
thisout=lm(d$vote_pop~this_t+d$block)
this_effect_blocked[j]=thisout$coefficients[2]
}
# Plotting:
hist(this_effect_blocked, breaks=25, main="Distribution of Randomized Effects", xlab="ATE")
abline(v=ATE_basic,lty=1, col="red")
pval <- mean(this_effect_blocked < ATE_basic)
mtext(sprintf("The p-value is: %.3f", pval), 3)
# ------ Fisher Interval! Bonus Round! ---------
stor2 <- NA
n = 1
# Change fisher function from return(storage.vector) to
# return(storage.vector)
for(i in -35:5/100) {
stor2[n] <- fisherfunction(nulleffect = i)
n = n+1}
plot(x = -35:5/100, y = stor2, type = "l", xlab = "additive sharp null",
ylab = "p-value", col = "blue", lwd = 5,
main = "Relationship Between Sharp Null and P-Value \n(Fisher Exact Test)", ylim = c(0, 1))
abline(h = 0.05, lty = 3, col = "gray", lwd = 2)
abline(h = 0.95, lty = 3, col = "gray", lwd = 2)
abline(h = 0.025, lty = 3, col = "purple", lwd = 2)
abline(h = 0.975, lty = 3, col = "purple", lwd = 2)
abline(h = 0.50, lty = 3, col = "pink", lwd = 2)
abline(v = , col = "pink", lwd = 2, lty = 2)
abline(v = , col = "purple", lwd = 2, lty = 2)
abline(v = , col = "gray", lwd = 2,lty = 2)
abline(v = , col = "purple", lwd = 2, lty = 2)
abline(v = ,col = "gray", lwd = 2,lty = 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment