viniciusmss · October 29, 2019 19:21
diff --git a/Scavenger Hunt Answer Key.R b/Scavenger Hunt Answer Key.R
 ### Code heavily drawn from one of MIT's 17.802 Quantitative Research Methods II 
 ### homework solutions. Courtesy of Jens Hainmueller. 


 # ------ First steps! --------- 

 # Importing the data
 library(readr)
 foo <- read_csv("https://bit.ly/2SKjUn2")

 # Number of observations
 nrow(foo)

 # Number of registered voters
 sum(foo$reg_voters)

 # Bootstrapped balance test
 storage1 <- NA

 set.seed(12345)
 for(i in 1:10000) {
  storage1[i] <-
    mean(foo$reg_voters[sample(c(2,4,6,8,10,12,14,16), 8, replace = T)]) -
    mean(foo$reg_voters[sample(c(1,3,5,7,9,11,13,15), 8, replace = T)])
 }

 quantile(storage1, c(0.025, 0.975))


 # ATE
 mean(d$vote_pop[d$treat == "pub.pol"]) - mean(d$vote_pop[d$treat == "client"])

 # ------ YOU DID IT! Great job!!! ---------

 # Linear regression
 lm_basic <- lm(d$vote_pop~d$treat)
 ATE_basic <- lm_basic$coefficients[2]

 # Confidence interval
 confint(lm_basic)


 # ------ Randomization Inference (no blocking) ---------

 fisherfun_noblock <- function(nulleffect = 0, Ys = foo$vote_pop,
                              diffmeans = -0.1575, 
                              sims = 20000) {
  
  storage.vector <- NA
  for(i in 1:sims) {
    
    Y0s <-  c(Ys[1], Ys[2]-nulleffect, 
              Ys[3], Ys[4]-nulleffect,
              Ys[5], Ys[6]-nulleffect, 
              Ys[7], Ys[8]-nulleffect,
              Ys[9], Ys[10]-nulleffect,
              Ys[11], Ys[12]-nulleffect,
              Ys[13], Ys[14]-nulleffect,
              Ys[15], Ys[16]-nulleffect
    )
    
    Y1s <-  
      c(Ys[1]+nulleffect, Ys[2], 
        Ys[3]+nulleffect, Ys[4],
        Ys[5]+nulleffect, Ys[6], 
        Ys[7]+nulleffect, Ys[8],
        Ys[9]+nulleffect, Ys[10], 
        Ys[11]+nulleffect, Ys[12], 
        Ys[13]+nulleffect, Ys[14], 
        Ys[15]+nulleffect, Ys[16]
      )
    
    # NOW, TIME TO SIMULATE THE ASSIGNMENTS--DO SO BELOW...
    which_treated <-   sample(1:16, 8)
    which_controls <-  (1:16)[-which_treated]

    storage.vector[i] <- mean(Y1s[which_treated]) - mean(Y0s[which_controls])
  }
  return(storage.vector)
 }
  
 set.seed(112)

 effect_noblock <- fisherfun_noblock()
 pval <- mean(effect_noblock < ATE_basic)

 # Plot the histogram
 hist(effect_noblock, breaks=25, main="Histogram of this_effect", xlab="this_effect")
 abline(v=ATE_basic,lty=1, col="red")
 mtext(sprintf("The p-value is: %.3f", pval), 3)


 # ---- This is anothe way to do it, which works for the sharp null only ----
 # Get treatment vector as a dummy
 t=numeric(length(d$treat))
 t[d$treat=="pub.pol"]=1

 numiters=10000
 this_effect=numeric(numiters)

 #Randomized distribution w/o blocking
 for (j in 1:numiters){
  this_t=sample(t,16,replace=F)
  this_effect[j]=mean(d$vote_pop[this_t==1])-mean(d$vote_pop[this_t==0])
 }

 mean(this_effect < ATE_basic)

 # ------ Randomization Inference (with pairwise blocking) ---------

 # I'm going to show you several ways of doing it so you can compare
 # and conclude that they are all doing the same thing.
 # ------- Simulation -- version 1

 numiters=1000
 this_effect_blocked=numeric(numiters)
 this_t=numeric(16)

 for (j in 1:numiters){
  first_in_pair=rbinom(8,1,.5)
  second_in_pair=1-first_in_pair
  
  this_t[seq(1,15,2)]=first_in_pair
  this_t[seq(2,16,2)]=second_in_pair
  
  thisout=lm(d$vote_pop~this_t+d$block)
  this_effect_blocked[j]=thisout$coefficients[2]
 }

 pval <- mean(this_effect_blocked < ATE_basic)

 # Plotting
 hist(this_effect_blocked, breaks=25, main="Distribution of Randomized Effects", xlab="ATE")
 abline(v=ATE_basic,lty=1, col="red")
 pval <- mean(this_effect_blocked < ATE_basic)
 mtext(sprintf("The p-value is: %.3f", pval), 3)


 # ------- Simulation -- version 2
 # This is the version Prof. Diamon shared with you https://tinyurl.com/y2gzgxja

 fisherfunction <- function(nulleffect = 0, Ys = foo$vote_pop,
                           diffmeans = -0.1575, 
                           sims = 20000) {
  
  storage.vector <- NA
  for(i in 1:sims) {
    
    Y0s <-  c(Ys[1], Ys[2]-nulleffect, 
              Ys[3], Ys[4]-nulleffect,
              Ys[5], Ys[6]-nulleffect, 
              Ys[7], Ys[8]-nulleffect,
              Ys[9], Ys[10]-nulleffect,
              Ys[11], Ys[12]-nulleffect,
              Ys[13], Ys[14]-nulleffect,
              Ys[15], Ys[16]-nulleffect
    )
    
    Y1s <-  
      c(Ys[1]+nulleffect, Ys[2], 
        Ys[3]+nulleffect, Ys[4],
        Ys[5]+nulleffect, Ys[6], 
        Ys[7]+nulleffect, Ys[8],
        Ys[9]+nulleffect, Ys[10], 
        Ys[11]+nulleffect, Ys[12], 
        Ys[13]+nulleffect, Ys[14], 
        Ys[15]+nulleffect, Ys[16]
      )
    
    which_treated <- c(sample(c(1,2), 1),
                       sample(c(3,4), 1),
                       sample(c(5,6), 1),
                       sample(c(7,8), 1),
                       sample(c(9,10), 1),
                       sample(c(11,12), 1),
                       sample(c(13,14), 1),
                       sample(c(15,16), 1))
    
    which_controls <- c(1:16)[-which_treated]
    
    storage.vector[i] <- mean(Y1s[which_treated]) - mean(Y0s[which_controls])
  }
  
  empirical.pvalue <- sum(storage.vector <= diffmeans)/sims
  return(storage.vector)
 }

 this_effect_blocked <- fisherfunction(nulleffect = 0)  # use this to obtain the p-value
 pval <- mean(this_effect_blocked < ATE_basic)

 # Plotting
 hist(this_effect_blocked, breaks=25, main="Distribution of Randomized Effects", xlab="ATE")
 abline(v=ATE_basic,lty=1, col="red")
 pval <- mean(this_effect_blocked < ATE_basic)
 mtext(sprintf("The p-value is: %.3f", pval), 3)


 # ------- Analytical -- version 3
 # Create 256 rows, each with possible assignments for first village
 # in each pair

 # install.packages("e1071")
 library(e1071)
 first_in_pair=bincombinations(8)
 second_in_pair=1-first_in_pair

 # Storage vectors
 this_effect_blocked=numeric(256)
 this_t=numeric(16)

 for (j in 1:nrow(first_in_pair)){
  this_t[seq(1,15,2)]=first_in_pair[j,]
  this_t[seq(2,16,2)]=second_in_pair[j,]
  thisout=lm(d$vote_pop~this_t+d$block)
  this_effect_blocked[j]=thisout$coefficients[2] 
 }

 # Plotting:
 hist(this_effect_blocked, breaks=25, main="Distribution of Randomized Effects", xlab="ATE")
 abline(v=ATE_basic,lty=1, col="red")
 pval <- mean(this_effect_blocked < ATE_basic)
 mtext(sprintf("The p-value is: %.3f", pval), 3)


 # ------ Fisher Interval! Bonus Round! ---------

 stor2 <- NA
 n = 1 

 # Change fisher function from return(storage.vector) to 
 # return(storage.vector)
 for(i in -35:5/100) {
  
  stor2[n] <- fisherfunction(nulleffect = i)
  n = n+1}

 plot(x = -35:5/100, y = stor2, type = "l", xlab = "additive sharp null", 
     ylab = "p-value", col = "blue", lwd = 5, 
     main = "Relationship Between Sharp Null and P-Value \n(Fisher Exact Test)", ylim = c(0, 1))

 abline(h = 0.05, lty = 3, col = "gray", lwd = 2)
 abline(h = 0.95, lty = 3, col = "gray", lwd = 2)

 abline(h = 0.025, lty = 3, col = "purple", lwd = 2)
 abline(h = 0.975, lty = 3, col = "purple", lwd = 2)

 abline(h = 0.50, lty = 3, col = "pink", lwd = 2)

 abline(v = , col = "pink", lwd = 2, lty = 2)
 abline(v = , col = "purple", lwd = 2, lty = 2)
 abline(v = , col = "gray", lwd = 2,lty = 2)
 abline(v = , col = "purple", lwd = 2, lty = 2)
 abline(v = ,col = "gray", lwd = 2,lty = 2)
	### Code heavily drawn from one of MIT's 17.802 Quantitative Research Methods II
	### homework solutions. Courtesy of Jens Hainmueller.


	# ------ First steps! ---------

	# Importing the data
	library(readr)
	foo <- read_csv("https://bit.ly/2SKjUn2")

	# Number of observations
	nrow(foo)

	# Number of registered voters
	sum(foo$reg_voters)

	# Bootstrapped balance test
	storage1 <- NA

	set.seed(12345)
	for(i in 1:10000) {
	storage1[i] <-
	mean(foo$reg_voters[sample(c(2,4,6,8,10,12,14,16), 8, replace = T)]) -
	mean(foo$reg_voters[sample(c(1,3,5,7,9,11,13,15), 8, replace = T)])
	}

	quantile(storage1, c(0.025, 0.975))


	# ATE
	mean(d$vote_pop[d$treat == "pub.pol"]) - mean(d$vote_pop[d$treat == "client"])

	# ------ YOU DID IT! Great job!!! ---------

	# Linear regression
	lm_basic <- lm(d$vote_pop~d$treat)
	ATE_basic <- lm_basic$coefficients[2]

	# Confidence interval
	confint(lm_basic)


	# ------ Randomization Inference (no blocking) ---------

	fisherfun_noblock <- function(nulleffect = 0, Ys = foo$vote_pop,
	diffmeans = -0.1575,
	sims = 20000) {

	storage.vector <- NA
	for(i in 1:sims) {

	Y0s <- c(Ys[1], Ys[2]-nulleffect,
	Ys[3], Ys[4]-nulleffect,
	Ys[5], Ys[6]-nulleffect,
	Ys[7], Ys[8]-nulleffect,
	Ys[9], Ys[10]-nulleffect,
	Ys[11], Ys[12]-nulleffect,
	Ys[13], Ys[14]-nulleffect,
	Ys[15], Ys[16]-nulleffect
	)

	Y1s <-
	c(Ys[1]+nulleffect, Ys[2],
	Ys[3]+nulleffect, Ys[4],
	Ys[5]+nulleffect, Ys[6],
	Ys[7]+nulleffect, Ys[8],
	Ys[9]+nulleffect, Ys[10],
	Ys[11]+nulleffect, Ys[12],
	Ys[13]+nulleffect, Ys[14],
	Ys[15]+nulleffect, Ys[16]
	)

	# NOW, TIME TO SIMULATE THE ASSIGNMENTS--DO SO BELOW...
	which_treated <- sample(1:16, 8)
	which_controls <- (1:16)[-which_treated]

	storage.vector[i] <- mean(Y1s[which_treated]) - mean(Y0s[which_controls])
	}
	return(storage.vector)
	}

	set.seed(112)

	effect_noblock <- fisherfun_noblock()
	pval <- mean(effect_noblock < ATE_basic)

	# Plot the histogram
	hist(effect_noblock, breaks=25, main="Histogram of this_effect", xlab="this_effect")
	abline(v=ATE_basic,lty=1, col="red")
	mtext(sprintf("The p-value is: %.3f", pval), 3)


	# ---- This is anothe way to do it, which works for the sharp null only ----
	# Get treatment vector as a dummy
	t=numeric(length(d$treat))
	t[d$treat=="pub.pol"]=1

	numiters=10000
	this_effect=numeric(numiters)

	#Randomized distribution w/o blocking
	for (j in 1:numiters){
	this_t=sample(t,16,replace=F)
	this_effect[j]=mean(d$vote_pop[this_t==1])-mean(d$vote_pop[this_t==0])
	}

	mean(this_effect < ATE_basic)

	# ------ Randomization Inference (with pairwise blocking) ---------

	# I'm going to show you several ways of doing it so you can compare
	# and conclude that they are all doing the same thing.
	# ------- Simulation -- version 1

	numiters=1000
	this_effect_blocked=numeric(numiters)
	this_t=numeric(16)

	for (j in 1:numiters){
	first_in_pair=rbinom(8,1,.5)
	second_in_pair=1-first_in_pair

	this_t[seq(1,15,2)]=first_in_pair
	this_t[seq(2,16,2)]=second_in_pair

	thisout=lm(d$vote_pop~this_t+d$block)
	this_effect_blocked[j]=thisout$coefficients[2]
	}

	pval <- mean(this_effect_blocked < ATE_basic)

	# Plotting
	hist(this_effect_blocked, breaks=25, main="Distribution of Randomized Effects", xlab="ATE")
	abline(v=ATE_basic,lty=1, col="red")
	pval <- mean(this_effect_blocked < ATE_basic)
	mtext(sprintf("The p-value is: %.3f", pval), 3)


	# ------- Simulation -- version 2
	# This is the version Prof. Diamon shared with you https://tinyurl.com/y2gzgxja

	fisherfunction <- function(nulleffect = 0, Ys = foo$vote_pop,
	diffmeans = -0.1575,
	sims = 20000) {

	storage.vector <- NA
	for(i in 1:sims) {

	Y0s <- c(Ys[1], Ys[2]-nulleffect,
	Ys[3], Ys[4]-nulleffect,
	Ys[5], Ys[6]-nulleffect,
	Ys[7], Ys[8]-nulleffect,
	Ys[9], Ys[10]-nulleffect,
	Ys[11], Ys[12]-nulleffect,
	Ys[13], Ys[14]-nulleffect,
	Ys[15], Ys[16]-nulleffect
	)

	Y1s <-
	c(Ys[1]+nulleffect, Ys[2],
	Ys[3]+nulleffect, Ys[4],
	Ys[5]+nulleffect, Ys[6],
	Ys[7]+nulleffect, Ys[8],
	Ys[9]+nulleffect, Ys[10],
	Ys[11]+nulleffect, Ys[12],
	Ys[13]+nulleffect, Ys[14],
	Ys[15]+nulleffect, Ys[16]
	)

	which_treated <- c(sample(c(1,2), 1),
	sample(c(3,4), 1),
	sample(c(5,6), 1),
	sample(c(7,8), 1),
	sample(c(9,10), 1),
	sample(c(11,12), 1),
	sample(c(13,14), 1),
	sample(c(15,16), 1))

	which_controls <- c(1:16)[-which_treated]

	storage.vector[i] <- mean(Y1s[which_treated]) - mean(Y0s[which_controls])
	}

	empirical.pvalue <- sum(storage.vector <= diffmeans)/sims
	return(storage.vector)
	}

	this_effect_blocked <- fisherfunction(nulleffect = 0) # use this to obtain the p-value
	pval <- mean(this_effect_blocked < ATE_basic)

	# Plotting
	hist(this_effect_blocked, breaks=25, main="Distribution of Randomized Effects", xlab="ATE")
	abline(v=ATE_basic,lty=1, col="red")
	pval <- mean(this_effect_blocked < ATE_basic)
	mtext(sprintf("The p-value is: %.3f", pval), 3)


	# ------- Analytical -- version 3
	# Create 256 rows, each with possible assignments for first village
	# in each pair

	# install.packages("e1071")
	library(e1071)
	first_in_pair=bincombinations(8)
	second_in_pair=1-first_in_pair

	# Storage vectors
	this_effect_blocked=numeric(256)
	this_t=numeric(16)

	for (j in 1:nrow(first_in_pair)){
	this_t[seq(1,15,2)]=first_in_pair[j,]
	this_t[seq(2,16,2)]=second_in_pair[j,]
	thisout=lm(d$vote_pop~this_t+d$block)
	this_effect_blocked[j]=thisout$coefficients[2]
	}

	# Plotting:
	hist(this_effect_blocked, breaks=25, main="Distribution of Randomized Effects", xlab="ATE")
	abline(v=ATE_basic,lty=1, col="red")
	pval <- mean(this_effect_blocked < ATE_basic)
	mtext(sprintf("The p-value is: %.3f", pval), 3)


	# ------ Fisher Interval! Bonus Round! ---------

	stor2 <- NA
	n = 1

	# Change fisher function from return(storage.vector) to
	# return(storage.vector)
	for(i in -35:5/100) {

	stor2[n] <- fisherfunction(nulleffect = i)
	n = n+1}

	plot(x = -35:5/100, y = stor2, type = "l", xlab = "additive sharp null",
	ylab = "p-value", col = "blue", lwd = 5,
	main = "Relationship Between Sharp Null and P-Value \n(Fisher Exact Test)", ylim = c(0, 1))

	abline(h = 0.05, lty = 3, col = "gray", lwd = 2)
	abline(h = 0.95, lty = 3, col = "gray", lwd = 2)

	abline(h = 0.025, lty = 3, col = "purple", lwd = 2)
	abline(h = 0.975, lty = 3, col = "purple", lwd = 2)

	abline(h = 0.50, lty = 3, col = "pink", lwd = 2)

	abline(v = , col = "pink", lwd = 2, lty = 2)
	abline(v = , col = "purple", lwd = 2, lty = 2)
	abline(v = , col = "gray", lwd = 2,lty = 2)
	abline(v = , col = "purple", lwd = 2, lty = 2)
	abline(v = ,col = "gray", lwd = 2,lty = 2)
No results found