Skip to content

Instantly share code, notes, and snippets.

@viniciusmss
Created September 21, 2018 15:38
Show Gist options
  • Save viniciusmss/f8edcb6490a651e270b9e0ea31ef8088 to your computer and use it in GitHub Desktop.
Save viniciusmss/f8edcb6490a651e270b9e0ea31ef8088 to your computer and use it in GitHub Desktop.
# Cleaning the workspace
rm(list = ls())
# Loading the data
library(Matching)
data(lalonde)
View(lalonde)
str(lalonde)
# Let's look at two individuals. For the purposes of this tutorial,
# they will be the youngest and oldest person in the data.
youngest <- which(lalonde$age == min(lalonde$age))[1]
oldest <- which(lalonde$age == max(lalonde$age))[1]
sprintf("The youngest person is %d years old, had %d years of schooling, and earned %0.f dollars in 1978.",
lalonde[youngest, ]$age, lalonde[youngest, ]$educ, lalonde[youngest, ]$re78)
sprintf("The oldest person is %d years old, had %d years of schooling, and earned %0.f dollars in 1978.",
lalonde[oldest, ]$age, lalonde[oldest, ]$educ, lalonde[oldest, ]$re78)
# Let's fit a linear model based on age and education.
lm.additive <- lm(re78 ~ age + educ, data = lalonde)
summary(lm.additive)
# Let's look at the prediction of our model for these two people.
# At this point, we're not concerned with accuracy.
sprintf("The youngest person is predicted to have earned %0.f dollars in 1978.",
predict(lm.additive, lalonde[youngest, ]))
sprintf("The oldest person is predicted to have earned %0.f dollars in 1978.",
predict(lm.additive, lalonde[oldest, ]))
# How would their predicted income change if they had
# had an additional year of schooling?
lalonde_mod <- lalonde
lalonde_mod[c(youngest, oldest), ]$educ <- lalonde_mod[c(youngest, oldest), ]$educ + 1
# Let's compare the two datasets
lalonde[c(youngest, oldest), ]
lalonde_mod[c(youngest, oldest), ]
# What's the updated prediction?
predict(lm.additive, lalonde_mod[youngest, ])
predict(lm.additive, lalonde_mod[oldest, ])
# What's the difference?
predict(lm.additive, lalonde_mod[youngest, ]) - predict(lm.additive, lalonde[youngest, ])
predict(lm.additive, lalonde_mod[oldest, ]) - predict(lm.additive, lalonde[oldest, ])
coef(lm.additive)
# What happens if we add an interaction term to our model?
lm.non_additive <- lm(re78 ~ age*educ, data = lalonde)
summary(lm.non_additive)
# Let's again check our predictions
# For the two people with their years of schooling unchanged
predict(lm.non_additive, lalonde[youngest, ])
predict(lm.non_additive, lalonde[oldest, ])
# Adding one year of schooling...
predict(lm.non_additive, lalonde_mod[youngest, ])
predict(lm.non_additive, lalonde_mod[oldest, ])
# What's the difference?
predict(lm.non_additive, lalonde_mod[youngest, ]) - predict(lm.non_additive, lalonde[youngest, ])
predict(lm.non_additive, lalonde_mod[oldest, ]) - predict(lm.non_additive, lalonde[oldest, ])
# Why is this the case?
## re78 = c1*age + c2*educ + c3*age*educ
## re78_mod = c1*age + c2*(educ+1) + c3*age*(educ+1)
## re78_mod = c1*age + c2*educ + c2 + c3*age*educ + c3*age
## re78_mod = re78 + c2 + c3*age
coef(lm.non_additive)[3] + lalonde[youngest, ]$age*coef(lm.non_additive)[4]
coef(lm.non_additive)[3] + lalonde[oldest, ]$age*coef(lm.non_additive)[4]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment