diamonaj · November 11, 2018 18:11
diff --git a/genetic matching lalonde exercise.R b/genetic matching lalonde exercise.R
 sink("transcript.txt", split = TRUE)

 cat(getwd(), "\n")

 rm(list=ls())
 library(foreign)

 foo <- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00087718-6996/dw-data.csv")

 X = cbind(foo$age, foo$education, foo$black, foo$hispanic, foo$married, foo$nodegree, foo$re75, foo$re74)

 #The covariates we want to obtain balance on
 BalanceMat <- cbind(foo$age, foo$education, foo$black, foo$hispanic, foo$married, foo$nodegree, 
                    foo$re75, foo$re74, I(foo$re74*foo$re75), I(foo$age^2), I(foo$education^2), I(foo$re74^2), I(foo$re75^2))

 #
 #Let's call GenMatch() to find the optimal weight to give each
 #covariate in 'X' so as we have achieved balance on the covariates in
 #'BalanceMat'. This is only an example so we want GenMatch to be quick
 #so the population size has been set to be only 16 via the 'pop.size'
 #option. This is *WAY* too small for actual problems.
 #For details see http://sekhon.berkeley.edu/papers/MatchingJSS.pdf.

 # It may take a while!!! Notice that I set "nboots"...
 genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=BalanceMat, estimand="ATT",
                   pop.size=50, max.generations=10, wait.generations=5, nboots = 1000)

 #The outcome variable

 #
 # Now that GenMatch() has found the optimal weights, let's estimate
 # our causal effect of interest using those weights
 #
 mout <- Match(Tr=treat, X=X, estimand="ATT", Weight.matrix=genout)

 #                        
 #Let's determine if balance has actually been obtained on the variables of interest
 #                        
 mb <- MatchBalance(treat ~ foo$age + foo$education + foo$black + foo$hispanic + foo$married + foo$nodegree +  
                   foo$re75 + foo$re74 + I(foo$re74*foo$re75) + I(foo$age^2) + I(foo$education^2) + I(foo$re74^2) + I(foo$re75^2),
                   match.out=mout, nboots=1000)

 # What did genetic matching do for the balance on "AGE"???

 # identify and store "age" for the matched treated and control groups
 age_treated <- foo$age[mout$index.treated]
 age_controls <- foo$age[mout$index.control]

 # draw the distributions
 plot(density(age_treated), col = "red")
 lines(density(age_controls), col = "blue", lwd = 2)

 # "age" for the original controls
 lines(density(foo$age[foo$treat == 0]), col = "orange", lwd = 2)

 # "age" for the original treated
 lines(density(foo$age[foo$treat == 1]), col = "brown", lty = "dotted", lwd = 2)

 sink()
	sink("transcript.txt", split = TRUE)

	cat(getwd(), "\n")

	rm(list=ls())
	library(foreign)

	foo <- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00087718-6996/dw-data.csv")

	X = cbind(foo$age, foo$education, foo$black, foo$hispanic, foo$married, foo$nodegree, foo$re75, foo$re74)

	#The covariates we want to obtain balance on
	BalanceMat <- cbind(foo$age, foo$education, foo$black, foo$hispanic, foo$married, foo$nodegree,
	foo$re75, foo$re74, I(foo$re74*foo$re75), I(foo$age^2), I(foo$education^2), I(foo$re74^2), I(foo$re75^2))

	#
	#Let's call GenMatch() to find the optimal weight to give each
	#covariate in 'X' so as we have achieved balance on the covariates in
	#'BalanceMat'. This is only an example so we want GenMatch to be quick
	#so the population size has been set to be only 16 via the 'pop.size'
	#option. This is WAY too small for actual problems.
	#For details see http://sekhon.berkeley.edu/papers/MatchingJSS.pdf.

	# It may take a while!!! Notice that I set "nboots"...
	genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=BalanceMat, estimand="ATT",
	pop.size=50, max.generations=10, wait.generations=5, nboots = 1000)

	#The outcome variable

	#
	# Now that GenMatch() has found the optimal weights, let's estimate
	# our causal effect of interest using those weights
	#
	mout <- Match(Tr=treat, X=X, estimand="ATT", Weight.matrix=genout)

	#
	#Let's determine if balance has actually been obtained on the variables of interest
	#
	mb <- MatchBalance(treat ~ foo$age + foo$education + foo$black + foo$hispanic + foo$married + foo$nodegree +
	foo$re75 + foo$re74 + I(foo$re74*foo$re75) + I(foo$age^2) + I(foo$education^2) + I(foo$re74^2) + I(foo$re75^2),
	match.out=mout, nboots=1000)

	# What did genetic matching do for the balance on "AGE"???

	# identify and store "age" for the matched treated and control groups
	age_treated <- foo$age[mout$index.treated]
	age_controls <- foo$age[mout$index.control]

	# draw the distributions
	plot(density(age_treated), col = "red")
	lines(density(age_controls), col = "blue", lwd = 2)

	# "age" for the original controls
	lines(density(foo$age[foo$treat == 0]), col = "orange", lwd = 2)

	# "age" for the original treated
	lines(density(foo$age[foo$treat == 1]), col = "brown", lty = "dotted", lwd = 2)

	sink()