Created
November 9, 2023 16:12
-
-
Save diamonaj/83e78b0117a5f45e119b4c9e7b79ba69 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set.seed(432) | |
# imagine these are scores before a program: | |
before <- rnorm(50, mean = 0, sd = 10) | |
# imagine these are scores after a program | |
after <- before + rnorm(50, mean = 5, sd = 20) | |
# the scores are correlated, but not perfectly correlated | |
# correlation = 0.32... the program helps, | |
# on average, it helps with +5 points, but there is | |
# luck involved in the "after" assessment | |
cor(before, after) | |
# calculate z-scores for the before scores, to see how far from the mean | |
# calculate z-scores for the after scores, to see how extreme | |
before_zscore <- (before - mean(before))/sd(before) | |
after_zscore <- (after - mean(after))/sd(after) | |
# plot the zscored observations (before vs. after) | |
plot(x = before_zscore, y = after_zscore, | |
pch = 16, col = "blue", | |
xlim = c(-3, 3), ylim = c(-3, 3)) | |
# run a linear regression (fit a line) to the points in the plot | |
# there are technical reasons why this line will NEVER have a | |
# slope of greater than 1 (or less than -1) -- basically b/c | |
# the slope of a regression line drawn through zscored (scaled) | |
# observations is always the simple correlation and correlation is | |
# never less than -1 or greater than +1. | |
reg1 <- lm(after_zscore ~ before_zscore) | |
abline(reg1, col = "red", lwd = 4) | |
# you can see that the slope of the line is 0.32, the correlation above, | |
# it's the number in the first column, second row | |
summary(reg1) | |
# just to show you that the correlation is still 0.32 | |
cor(before_zscore, after_zscore) | |
# and this is why whenever you have correlated before/after data you have | |
# regression to the mean--because, for any given value of 'before' (call that | |
# value "b", e.g., b = 1), the expected value of the corresponding 'after' | |
# value (i.e., the values of 'after' along the regression line) | |
# will be less than or equal or |b| (absolute value of b). | |
# The values of 'after' along the regression line will be | |
# equal to absolute value of b when correlation is perfect and the slope of | |
# the regression line is exactly 1. Otherwise, the slope will be shallower | |
# less steep than 1, and the 'after' value will be smaller than |b|. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment