Last active
December 16, 2020 17:30
-
-
Save Verina-Armanyous/1c21d8c13cfd833008acb89134ee1ce5 to your computer and use it in GitHub Desktop.
final.R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#NOTE: Please load the correct file in the folder. | |
#The tables are the same as in the original code, but genetic matching replaced PS matching. | |
#load("/Users/johannes/Downloads/cs112-replication-master/datamatch.RData") | |
#load("datamatch.Rdata") | |
library(MatchIt) | |
library(Zelig) | |
library(Matching) | |
library(rbounds) | |
library(rgenoud) | |
library(sensitivitymv) | |
outcomes <- datamatch[10:18] | |
outcomes.lbls <- names(outcomes) | |
n.outcomes <- dim(outcomes)[2] | |
#_________________________________ Table 1 _________________________________# | |
tab1 <- matrix(NA, nrow = n.outcomes, ncol = 6) | |
rownames(tab1) <- outcomes.lbls | |
colnames(tab1) <- c("N", "prop.all", "prop.ev", "prop.tv", "diff", "pvalue") | |
for (i in 1:n.outcomes) { | |
tab1[i, 1] <- length(na.omit(outcomes[, i])) | |
tab1[i, 2] <- prop.table(table(outcomes[, i]))[2] * 100 | |
tab1[i, 3:4] <- rev(prop.table(table(outcomes[, i], datamatch$EV), 2)[2, ]) * 100 | |
tab1[i, 5] <- tab1[i, 3] - tab1[i, 4] | |
tab1[i, 6] <- prop.test(table(outcomes[, i], datamatch$EV)[2, ], n = apply(table(outcomes[, i], datamatch$EV), 2, sum))$p.value | |
} | |
tab1 <- tab1[rev(order(tab1[, "diff"])), ] | |
### Table 1 ### | |
print(tab1, digits = 4) | |
#___________________________________________________________________________# | |
# Drop observations with missing values in covariates | |
datamatch[, 10:18][is.na(datamatch[, 10:18]) == "TRUE"] <- 99999 | |
datamatch <- na.omit(datamatch) | |
#__________________________ Table 2, pre-matching __________________________# | |
EV <- datamatch[2] | |
covariates <- datamatch[c("age.group", "educ", "white.collar", "not.full.time", "male", "tech", "pol.info")] | |
covariate.lbls <- names(covariates) | |
n.covariates <- dim(covariates)[2] | |
tab2.pre <- matrix(NA, nrow = n.covariates, ncol = 4) | |
rownames(tab2.pre) <- covariate.lbls | |
colnames(tab2.pre) <- c("ev", "tv", "diff", "pvalue") | |
tab2.pre[, 1:2] <- cbind(apply(covariates[EV == 1,], 2, mean), apply(covariates[EV == 0,], 2, mean)) | |
tab2.pre[, 3] <- tab2.pre[, 1] - tab2.pre[, 2] | |
for (i in c(1, 2, 6, 7)){ | |
tab2.pre[i, 4] <- ks.boot(covariates[, i][EV == 1], covariates[, i][EV == 0], nboots = 500)$ks.boot.pvalue | |
} | |
for (i in c(3, 4, 5)){ | |
tab2.pre[i, 4] <- prop.test(table(covariates[, i], EV$EV), n = apply(table(covariates[,i],EV$EV),2, sum))$p.value | |
} | |
#__________________________ Table 3, pre-matching __________________________# | |
datamatch[datamatch == 99999] <- NA | |
outcomes.pre <- datamatch[10:18] | |
tab3.pre <- matrix(NA,nrow = n.outcomes,ncol = 5) | |
rownames(tab3.pre) <- outcomes.lbls | |
colnames(tab3.pre) <- c("N", "prop.ev", "prop.tv", "diff", "pvalue") | |
for (i in 1:n.outcomes) { | |
tab3.pre[i, 1] <- length(na.omit(outcomes.pre[, i])) | |
tab3.pre[i, 2:3] <- rev(prop.table(table(outcomes.pre[,i],datamatch$EV),2)[2,])*100 | |
tab3.pre[i, 4] <- tab3.pre[i, 2] - tab3.pre[i, 3] | |
tab3.pre[i, 5] <- prop.test(table(outcomes.pre[, i], datamatch$EV)[2, ], n = apply(table(outcomes.pre[, i], datamatch$EV), 2, sum))$p.value | |
} | |
datamatch[, 10:18][is.na(datamatch[, 10:18]) == "TRUE"] <- 99999 | |
#__________________________ Genetic Matching (NEW) ________________________# | |
set.seed(123) | |
#We bind together the covariates | |
X = cbind(datamatch$polling.place, datamatch$age.group, datamatch$educ, | |
datamatch$male, datamatch$tech, datamatch$pol.info, datamatch$white.collar, | |
datamatch$not.full.time,(datamatch$age.group)^2, (datamatch$age.group)^3, | |
(datamatch$educ)^2, (datamatch$tech)^2) | |
#Covariates on which we intend to acquire balance on | |
balance.matrix <- cbind(X, I(datamatch$age.group*datamatch$educ), I(datamatch$age.group*datamatch$tech), | |
I(datamatch$educ*datamatch$pol.info),I(datamatch$age.group*datamatch$pol.info), | |
I(datamatch$tech*datamatch$pol.info) | |
) | |
#The genMatch function, further explained in the paper. | |
genout <- GenMatch(Tr=datamatch$EV, X=X, balance.matrix=BalanceMat, estimand="ATT", | |
pop.size=1000, max.generations=500, wait.generations=10) | |
#Outcome variable of the data | |
out.var=datamatch$eval.voting | |
#Matching using the genMatch function above | |
mout <- Match(Y=out.var, Tr=datamatch$EV, X=X, estimand="ATT", Weight.matrix=genout) | |
#Check balance | |
balance <- MatchBalance(EV ~ age.group + educ + white.collar + not.full.time + | |
male + tech + pol.info, data = datamatch, match.out=mout, nboots=500) | |
#matched sample | |
newdmatched <- rbind(datamatch[mout$index.tr,], datamatch[mout$index.co,]) | |
newdmatched[newdmatched == 99999] <- NA | |
save(newdmatched, file = "datamatched.Rdata") | |
#__________________________ Table 2, post-matching _________________________# | |
EV.post <- newdmatched[2] | |
covariates.post <- newdmatched[, covariate.lbls] | |
tab2.post <- matrix(NA, nrow = n.covariates, ncol = 4) | |
rownames(tab2.post) <- covariate.lbls | |
colnames(tab2.post) <- c("ev", "tv", "diff", "pvalue") | |
tab2.post[, 1:2] <- cbind(apply(covariates.post[EV.post == 1, ], 2, mean), apply(covariates.post[EV.post == 0,], 2, mean)) | |
tab2.post[, 3] <- tab2.post[, 1] - tab2.post[, 2] | |
for (i in c(1, 2, 6 , 7)){ | |
tab2.post[i, 4]<-ks.boot(covariates.post[,i][EV.post==1],covariates.post[,i][EV.post==0], nboots = 500)$ks.boot.pvalue | |
} | |
for (i in c(3, 4, 5)){ | |
tab2.post[i, 4] <- prop.test(table(covariates.post[, i], EV.post$EV), n = apply(table(covariates.post[, i], EV.post$EV),2 , sum))$p.value | |
} | |
tab2 <- cbind(tab2.pre, tab2.post) | |
tab2[3:5, c(1:3, 5:7)] <- tab2[3:5, c(1:3, 5:7)] * 100 | |
### Table 2 ### | |
print(tab2, digits = 4) | |
#__________________________ Table 3, post-matching _________________________# | |
outcomes.post <- newdmatched[10:18] | |
tab3.post <- matrix(NA, nrow = n.outcomes, ncol = 5) | |
rownames(tab3.post) <- outcomes.lbls | |
colnames(tab3.post) <- c("N", "prop.ev", "prop.tv", "diff", "pvalue") | |
for (i in 1:n.outcomes) { | |
tab3.post[i, 1] <- length(na.omit(outcomes.post[, i])) | |
tab3.post[i, 2:3] <- rev(prop.table(table(outcomes.post[, i], newdmatched$EV), 2)[2, ]) * 100 | |
tab3.post[i, 4] <- tab3.post[i, 2] - tab3.post[i, 3] | |
tab3.post[i, 5] <- prop.test(table(outcomes.post[, i], newdmatched$EV)[2, ], n = apply(table(outcomes.post[, i], newdmatched$EV), 2, sum))$p.value | |
} | |
tab3 <- cbind(tab3.pre, tab3.post) | |
tab3 <- tab3[rev(order(tab3[, 9])), ] | |
### Table 3 ### | |
print(tab3, digits = 4) | |
#______________ Table 4, Post-matching, model-based adjustment _____________# | |
logit.out <- NULL | |
coeffs.and.sd <- NULL | |
tab4.top <- NULL | |
tab4.low <- NULL | |
sims.setx <- NULL | |
sims.out <- NULL | |
for (i in 1:n.outcomes) { | |
logit.out[[i]] <- eval(parse(text = paste("logit.out[[", i, "]]<-zelig(" , | |
rownames(tab3)[i] , | |
"~ EV+age.group + I(age.group^2) + age.group:educ + educ + tech + white.collar + not.full.time + male + pol.info", | |
", data = subset(datamatched, is.na(datamatched[, '", | |
rownames(tab3)[i], | |
"']) != TRUE), model = 'logit')", sep = ""))) | |
coeffs.and.sd[[i]]<-summary(logit.out[[i]])$coefficients[, 1:2] | |
tab4.low<-cbind(tab4.low,coeffs.and.sd[[i]]) | |
x.low <- setx(logit.out[[i]], EV = 0) | |
x.high <- setx(logit.out[[i]], EV = 1) | |
sims.setx[[i]] <- sim(logit.out[[i]], x = x.low, x1 = x.high) | |
sims.out[[i]] <- summary(sims.setx[[i]])$qi.stats$fd[1:2] | |
tab4.top<-c(tab4.top, sims.out[[i]]) | |
} | |
tab4.low <- tab4.low[c(rownames(tab4.low)[2:11], rownames(tab4.low)[1]), ] | |
tab4 <- rbind(tab4.top, tab4.low) | |
### Table 4 ### | |
print(tab4, digits = 2) | |
# print sampe sizes reported in last row of Table 4: | |
for (i in 1:n.outcomes) { | |
print(dim(subset(newdmatched, is.na(newdmatched[, rownames(tab3)[i], ]) != TRUE))[1]) | |
} | |
#_______________________ Table 5, Sensitivity analysis _____________________# | |
matched.pairs <- NULL | |
matched.pairs1 <- NULL | |
matched.pairs2 <- NULL | |
bin <- NULL | |
tab5 <- NULL | |
for (i in 1:n.outcomes) { | |
matched.pairs[[i]] <- cbind(datamatch[row.names(m.out$match.matrix), rownames(tab3)[i]], datamatch[m.out$match.matrix, rownames(tab3)[i]]) | |
matched.pairs[[i]] <- data.frame(na.omit(matched.pairs[[i]])) | |
matched.pairs1[[i]] <- subset(matched.pairs[[i]], matched.pairs[[i]][, 1] != matched.pairs[[i]][, 2] & matched.pairs[[i]][, 2] == 1) | |
matched.pairs2[[i]] <- subset(matched.pairs[[i]], matched.pairs[[i]][, 1] != matched.pairs[[i]][, 2] & matched.pairs[[i]][, 2] == 0) | |
bin[[i]] <- binarysens(x = dim(matched.pairs1[[i]])[1], y = dim(matched.pairs2[[i]])[1], Gamma = 3.0, GammaInc = 0.1) | |
} | |
# In the case of confidence in ballot secrecy, reverse the order of the counts in binarysens (because effect is negative) | |
bin[[9]]<-binarysens(y = dim(matched.pairs1[[i]])[1],x = dim(matched.pairs2[[i]])[1],Gamma = 3.0,GammaInc = 0.1) | |
for (i in 1:n.outcomes) { | |
tab5<-cbind(tab5, as.matrix(bin[[i]]$bounds[, 2:3])) | |
} | |
### Table 5 ### | |
print(tab5, digits = 2) | |
##### Second extension: Sensitivity Analysis for genetic matching ####### | |
#1) calculate Rosenbaum’s bounds for the p-values using psens function (for the genetic matching results) | |
psens(mout, Gamma=5, GammaInc=.1)$bounds | |
#interpretation: the p-value starts becoming insignificant when Γ = 2.8 | |
#2) compute sensitivity analysis using senmv instead of psens because rbounds doesn't account for ties | |
#(i.e., when M ration is not one). | |
# Generate the table of matched set outcomes | |
# Define a function that restructure the result from genetic matching so that it works with senmv | |
# Function adapted from class code | |
make_ymat<-function(matches, y){ | |
# Get indices of treated and control units | |
treated <- unique(matches[, 1]) # Remove repeated indices | |
controls <- matches[, 2] # Keep repeated indices | |
# Get outcomes of control units | |
y_ctrls <- y[controls] | |
# Create a table to check how many matches for each control | |
trt_table <- table(treated) | |
# Get number of sets and size of largest set | |
n_sets <- length(trt_table) | |
max_ctrls <- max(trt_table) | |
# Smallest table necessary is number of sets vs. size of largest set | |
y_mat <- matrix(NA, n_sets, max_ctrls + 1) | |
m <- 0 # Auxiliary indexer that will run linearly through the matches sets | |
# For each set | |
for (i in 1:n_sets){ | |
y_mat[i, 1] <- y[treated[i]] # Get treated outcome | |
# Get controls outcomes | |
y_mat[i, 2:(1+trt_table[i])] <- y_ctrls[(m+1):(m+trt_table[i])] | |
# Advance the indexer | |
m <- m + trt_table[i] | |
} | |
y_mat | |
} | |
###### | |
ymat <- make_ymat(genout$matches, eval.voting) | |
newdata <- na.omit(ymat) #remove NA values | |
senmv.out <- senmv(y=newdata, gamma=1, method='t') | |
senmv.out | |
######## trying different gammas | |
for (gamma in seq(1, 3, 0.1)) | |
cat(sprintf("Gamma: %.1f\tp-val: %.5f\n", | |
gamma, senmv(y=newdata, gamma=gamma, method='t')$pval)) | |
#pvalue starts becoming insignificant when gamma is 2.7 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment