Created
March 24, 2018 15:50
-
-
Save pattoM/10542456c3fe0b38322d7c802cae8c11 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#QUESTION 1 | |
#read data | |
control <- read.table("nswre74_control.txt", header=FALSE) | |
treated <- read.table("nswre74_treated.txt", header=FALSE) | |
#simple difference in means for 1978 earnings | |
diff_means <- mean(treated$V10) - mean(control$V10) | |
#output diff in means | |
diff_means | |
#bind data and print | |
alldata <- rbind(control, treated) | |
alldata | |
#univariate linear regression model and summary | |
fit1 <- lm(alldata$V10 ~ alldata$V1) | |
summary(fit1) | |
#QUESTION 2 | |
#substituting cps controls.txt for controls | |
cps_control <- read.table("cps_controls.txt", header=FALSE) | |
cps_control | |
#diff means | |
cps_diff_means <- mean(treated$V10) - mean(cps_control$V10) | |
cps_diff_means | |
#binding initial treated and the new cpscontrols | |
newdata <- rbind(treated, cps_control) | |
newdata | |
#linear model fitting to get pvalues etc | |
fit2 <- glm(newdata$V10 ~ newdata$V1) | |
summary(fit2) | |
#QUESTION 3 | |
#Diving into matching(pscore) | |
set.seed(1234) | |
library(Matching) | |
#defining variables | |
Tr <- newdata$V1 | |
Y <- newdata$V10 | |
X <- cbind(newdata$V2, newdata$V3, newdata$V4, newdata$V5, newdata$V6, newdata$V7, newdata$V8, newdata$V9) | |
summary(Tr) | |
summary(Y) | |
summary(X) | |
#pscore model | |
pscoremod <- glm(Tr ~ X, family = binomial) | |
summary(pscoremod) | |
#matching | |
matchedresult <- Match(Y = Y, Tr = Tr, X = pscoremod$fitted, estimand = "ATT", M = 1,ties = TRUE, replace = TRUE) | |
summary(matchedresult) | |
te_estimate1 <- lm(Y ~ Tr, data=matchedresult$mdata) | |
summary(te_estimate1) | |
#checking balance | |
MatchBalance(Tr ~X, match.out = matchedresult,nboots = 0) | |
#plot to visualize the balance | |
#I use age because it has wider range of values and will visualize well | |
qqplot(newdata$V2[matchedresult$index.control],newdata$V2[matchedresult$index.treated]) | |
abline(coef = c(0,1),col=3) | |
#QUESTION 4 | |
#multivariate matching procedure - split columns with different values from before | |
set.seed(1234) | |
Tr_mvar <- newdata$V1 | |
Y_mvar <- newdata$V10 | |
#use the pscore from the previous question in the X_mvar | |
X_mvar <- cbind(newdata$V2, newdata$V3, newdata$V4==0,newdata$V4==1, newdata$V5==0,newdata$V5==1, newdata$V6==0,newdata$V6==1, newdata$V7==0,newdata$V7==1, newdata$V8, newdata$V9, pscoremod$fitted) | |
summary(Tr_mvar) | |
summary(Y_mvar) | |
summary(X_mvar) | |
#matching | |
matchedresult2 <- Match(Y = Y_mvar, Tr = Tr_mvar, X = X_mvar, estimand = "ATT", M = 1,ties = TRUE, replace = TRUE) | |
summary(matchedresult2) | |
#checking balance | |
MatchBalance(Tr_mvar ~ X_mvar, match.out = matchedresult2,nboots = 100) | |
#plot to visualize the balance | |
#I use age because it has wider range of values and will visualize well | |
qqplot(newdata$V2[matchedresult2$index.control],newdata$V2[matchedresult2$index.treated]) | |
abline(coef = c(0,1),col=3) | |
#QUESTION 5 | |
#Repeating qn 4,5 with genetic matching | |
library(rgenoud) | |
#calculate weights using genetic matching | |
#Repeat for question 3 - shares same variables | |
weights1 <- GenMatch(Tr = Tr,X = X,BalanceMatrix = X,pop.size = 10) | |
matchedgen <- Match(Y = Y, Tr = Tr, X = X, Weight.matrix = weights1) | |
summary(matchedgen) | |
MatchBalance(Tr ~ X,match.out = matchedgen,nboots = 100 ) | |
#visualizing balance with the age as an example | |
qqplot(newdata$V2[matchedgen$index.control],newdata$V2[matchedgen$index.treated]) | |
abline(coef = c(0,1),col=4) | |
#using gen match on question 4 | |
weights2 <- GenMatch(Tr = Tr_mvar,X = X_mvar,BalanceMatrix = X_mvar,pop.size = 10) | |
matchedgen2 <- Match(Y = Y_mvar, Tr = Tr_mvar, X = X_mvar, Weight.matrix = weights2) | |
summary(matchedgen2) | |
MatchBalance(Tr_mvar ~ X_mvar,match.out = matchedgen2,nboots = 100 ) | |
#visualizing balance with the age as an example | |
qqplot(newdata$V2[matchedgen2$index.control],newdata$V2[matchedgen2$index.treated]) | |
abline(coef = c(0,1),col=5) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment