Created
September 21, 2018 15:38
-
-
Save viniciusmss/f8edcb6490a651e270b9e0ea31ef8088 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Cleaning the workspace | |
rm(list = ls()) | |
# Loading the data | |
library(Matching) | |
data(lalonde) | |
View(lalonde) | |
str(lalonde) | |
# Let's look at two individuals. For the purposes of this tutorial, | |
# they will be the youngest and oldest person in the data. | |
youngest <- which(lalonde$age == min(lalonde$age))[1] | |
oldest <- which(lalonde$age == max(lalonde$age))[1] | |
sprintf("The youngest person is %d years old, had %d years of schooling, and earned %0.f dollars in 1978.", | |
lalonde[youngest, ]$age, lalonde[youngest, ]$educ, lalonde[youngest, ]$re78) | |
sprintf("The oldest person is %d years old, had %d years of schooling, and earned %0.f dollars in 1978.", | |
lalonde[oldest, ]$age, lalonde[oldest, ]$educ, lalonde[oldest, ]$re78) | |
# Let's fit a linear model based on age and education. | |
lm.additive <- lm(re78 ~ age + educ, data = lalonde) | |
summary(lm.additive) | |
# Let's look at the prediction of our model for these two people. | |
# At this point, we're not concerned with accuracy. | |
sprintf("The youngest person is predicted to have earned %0.f dollars in 1978.", | |
predict(lm.additive, lalonde[youngest, ])) | |
sprintf("The oldest person is predicted to have earned %0.f dollars in 1978.", | |
predict(lm.additive, lalonde[oldest, ])) | |
# How would their predicted income change if they had | |
# had an additional year of schooling? | |
lalonde_mod <- lalonde | |
lalonde_mod[c(youngest, oldest), ]$educ <- lalonde_mod[c(youngest, oldest), ]$educ + 1 | |
# Let's compare the two datasets | |
lalonde[c(youngest, oldest), ] | |
lalonde_mod[c(youngest, oldest), ] | |
# What's the updated prediction? | |
predict(lm.additive, lalonde_mod[youngest, ]) | |
predict(lm.additive, lalonde_mod[oldest, ]) | |
# What's the difference? | |
predict(lm.additive, lalonde_mod[youngest, ]) - predict(lm.additive, lalonde[youngest, ]) | |
predict(lm.additive, lalonde_mod[oldest, ]) - predict(lm.additive, lalonde[oldest, ]) | |
coef(lm.additive) | |
# What happens if we add an interaction term to our model? | |
lm.non_additive <- lm(re78 ~ age*educ, data = lalonde) | |
summary(lm.non_additive) | |
# Let's again check our predictions | |
# For the two people with their years of schooling unchanged | |
predict(lm.non_additive, lalonde[youngest, ]) | |
predict(lm.non_additive, lalonde[oldest, ]) | |
# Adding one year of schooling... | |
predict(lm.non_additive, lalonde_mod[youngest, ]) | |
predict(lm.non_additive, lalonde_mod[oldest, ]) | |
# What's the difference? | |
predict(lm.non_additive, lalonde_mod[youngest, ]) - predict(lm.non_additive, lalonde[youngest, ]) | |
predict(lm.non_additive, lalonde_mod[oldest, ]) - predict(lm.non_additive, lalonde[oldest, ]) | |
# Why is this the case? | |
## re78 = c1*age + c2*educ + c3*age*educ | |
## re78_mod = c1*age + c2*(educ+1) + c3*age*(educ+1) | |
## re78_mod = c1*age + c2*educ + c2 + c3*age*educ + c3*age | |
## re78_mod = re78 + c2 + c3*age | |
coef(lm.non_additive)[3] + lalonde[youngest, ]$age*coef(lm.non_additive)[4] | |
coef(lm.non_additive)[3] + lalonde[oldest, ]$age*coef(lm.non_additive)[4] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment