viniciusmss · September 21, 2018 15:38
diff --git a/Interaction Terms.R b/Interaction Terms.R
 # Cleaning the workspace
 rm(list = ls())

 # Loading the data
 library(Matching)
 data(lalonde)
 View(lalonde)
 str(lalonde)

 # Let's look at two individuals. For the purposes of this tutorial, 
 # they will be the youngest and oldest person in the data.
 youngest <- which(lalonde$age == min(lalonde$age))[1]
 oldest <- which(lalonde$age == max(lalonde$age))[1]

 sprintf("The youngest person is %d years old, had %d years of schooling, and earned %0.f dollars in 1978.", 
        lalonde[youngest, ]$age, lalonde[youngest, ]$educ, lalonde[youngest, ]$re78)
 sprintf("The oldest person is %d years old, had %d years of schooling, and earned %0.f dollars in 1978.", 
        lalonde[oldest, ]$age, lalonde[oldest, ]$educ, lalonde[oldest, ]$re78)

 # Let's fit a linear model based on age and education.
 lm.additive <- lm(re78 ~ age + educ, data = lalonde)
 summary(lm.additive)

 # Let's look at the prediction of our model for these two people.
 # At this point, we're not concerned with accuracy.
 sprintf("The youngest person is predicted to have earned %0.f dollars in 1978.", 
        predict(lm.additive, lalonde[youngest, ]))
 sprintf("The oldest person is predicted to have earned %0.f dollars in 1978.", 
        predict(lm.additive, lalonde[oldest, ]))

 # How would their predicted income change if they had
 # had an additional year of schooling?
 lalonde_mod <- lalonde
 lalonde_mod[c(youngest, oldest), ]$educ <- lalonde_mod[c(youngest, oldest), ]$educ + 1

 # Let's compare the two datasets
 lalonde[c(youngest, oldest), ]
 lalonde_mod[c(youngest, oldest), ]

 # What's the updated prediction?
 predict(lm.additive, lalonde_mod[youngest, ])
 predict(lm.additive, lalonde_mod[oldest, ])

 # What's the difference?
 predict(lm.additive, lalonde_mod[youngest, ]) - predict(lm.additive, lalonde[youngest, ])
 predict(lm.additive, lalonde_mod[oldest, ]) - predict(lm.additive, lalonde[oldest, ])
 coef(lm.additive)

 # What happens if we add an interaction term to our model?
 lm.non_additive <- lm(re78 ~ age*educ, data = lalonde)
 summary(lm.non_additive)

 # Let's again check our predictions
 # For the two people with their years of schooling unchanged
 predict(lm.non_additive, lalonde[youngest, ])
 predict(lm.non_additive, lalonde[oldest, ])

 # Adding one year of schooling...
 predict(lm.non_additive, lalonde_mod[youngest, ])
 predict(lm.non_additive, lalonde_mod[oldest, ])

 # What's the difference?
 predict(lm.non_additive, lalonde_mod[youngest, ]) - predict(lm.non_additive, lalonde[youngest, ])
 predict(lm.non_additive, lalonde_mod[oldest, ]) - predict(lm.non_additive, lalonde[oldest, ])

 # Why is this the case?
 ## re78 = c1*age + c2*educ + c3*age*educ
 ## re78_mod = c1*age + c2*(educ+1) + c3*age*(educ+1)
 ## re78_mod = c1*age + c2*educ + c2 + c3*age*educ + c3*age
 ## re78_mod = re78 + c2 + c3*age

 coef(lm.non_additive)[3] + lalonde[youngest, ]$age*coef(lm.non_additive)[4]
 coef(lm.non_additive)[3] + lalonde[oldest, ]$age*coef(lm.non_additive)[4]
	# Cleaning the workspace
	rm(list = ls())

	# Loading the data
	library(Matching)
	data(lalonde)
	View(lalonde)
	str(lalonde)

	# Let's look at two individuals. For the purposes of this tutorial,
	# they will be the youngest and oldest person in the data.
	youngest <- which(lalonde$age == min(lalonde$age))[1]
	oldest <- which(lalonde$age == max(lalonde$age))[1]

	sprintf("The youngest person is %d years old, had %d years of schooling, and earned %0.f dollars in 1978.",
	lalonde[youngest, ]$age, lalonde[youngest, ]$educ, lalonde[youngest, ]$re78)
	sprintf("The oldest person is %d years old, had %d years of schooling, and earned %0.f dollars in 1978.",
	lalonde[oldest, ]$age, lalonde[oldest, ]$educ, lalonde[oldest, ]$re78)

	# Let's fit a linear model based on age and education.
	lm.additive <- lm(re78 ~ age + educ, data = lalonde)
	summary(lm.additive)

	# Let's look at the prediction of our model for these two people.
	# At this point, we're not concerned with accuracy.
	sprintf("The youngest person is predicted to have earned %0.f dollars in 1978.",
	predict(lm.additive, lalonde[youngest, ]))
	sprintf("The oldest person is predicted to have earned %0.f dollars in 1978.",
	predict(lm.additive, lalonde[oldest, ]))

	# How would their predicted income change if they had
	# had an additional year of schooling?
	lalonde_mod <- lalonde
	lalonde_mod[c(youngest, oldest), ]$educ <- lalonde_mod[c(youngest, oldest), ]$educ + 1

	# Let's compare the two datasets
	lalonde[c(youngest, oldest), ]
	lalonde_mod[c(youngest, oldest), ]

	# What's the updated prediction?
	predict(lm.additive, lalonde_mod[youngest, ])
	predict(lm.additive, lalonde_mod[oldest, ])

	# What's the difference?
	predict(lm.additive, lalonde_mod[youngest, ]) - predict(lm.additive, lalonde[youngest, ])
	predict(lm.additive, lalonde_mod[oldest, ]) - predict(lm.additive, lalonde[oldest, ])
	coef(lm.additive)

	# What happens if we add an interaction term to our model?
	lm.non_additive <- lm(re78 ~ age*educ, data = lalonde)
	summary(lm.non_additive)

	# Let's again check our predictions
	# For the two people with their years of schooling unchanged
	predict(lm.non_additive, lalonde[youngest, ])
	predict(lm.non_additive, lalonde[oldest, ])

	# Adding one year of schooling...
	predict(lm.non_additive, lalonde_mod[youngest, ])
	predict(lm.non_additive, lalonde_mod[oldest, ])

	# What's the difference?
	predict(lm.non_additive, lalonde_mod[youngest, ]) - predict(lm.non_additive, lalonde[youngest, ])
	predict(lm.non_additive, lalonde_mod[oldest, ]) - predict(lm.non_additive, lalonde[oldest, ])

	# Why is this the case?
	## re78 = c1age + c2educ + c3ageeduc
	## re78_mod = c1age + c2(educ+1) + c3age(educ+1)
	## re78_mod = c1age + c2educ + c2 + c3ageeduc + c3*age
	## re78_mod = re78 + c2 + c3*age

	coef(lm.non_additive)[3] + lalonde[youngest, ]$age*coef(lm.non_additive)[4]
	coef(lm.non_additive)[3] + lalonde[oldest, ]$age*coef(lm.non_additive)[4]