Created
October 4, 2020 23:49
-
-
Save TonyLadson/286077221cc07022a77aa74d0a6c211c to your computer and use it in GitHub Desktop.
Regression examples and figures for the blog https://tonyladson.wordpress.com/2020/10/05/errors-in-variables-regression/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(deming) | |
# Functions --------------------------------------------------------------- | |
# given a point (x1, y1) and a line defined by a slope m and intercept c1 | |
# function to return the point x on the line where a line drawn perpendicular to x1, y1 | |
# will intercept the line | |
# These formulas come from the point of intersection formula | |
# https://byjus.com/point-of-intersection-formula/ | |
Calc_xi <- function(x1, y1, m, c1) { | |
(-1*(y1 - (-1/m)*x1) - (-1*c1))/(-m - (1/m)) | |
} | |
Calc_yi <- function(x1, y1, m, c1){ | |
((-1/m)*c1 - m*(y1 - (-1/m)*x1) ) / (-m - 1/m) | |
} | |
# Data -------------------------------------------------------------------- | |
set.seed(2003) | |
xdf <- tibble(x = 1:10, y = 1:10 + rnorm(10, sd = 1.5)) | |
xdf %>% | |
ggplot(aes(x, y)) + | |
geom_point() + | |
scale_x_continuous(limits = c(0,12), breaks = seq(0,12,2)) + | |
scale_y_continuous(limits = c(0,12), breaks = seq(0,12,2)) | |
# Model ------------------------------------------------------------------- | |
# Model Y as a function of x | |
m_yfnx <- lm(y ~ x, data = xdf) | |
summary(m_yfnx) | |
coef_yfnx <- coef(m_yfnx) | |
# All the errors in the y values | |
xdf %>% | |
ggplot(aes(x, y)) + | |
geom_point() + | |
geom_abline(intercept = coef_yfnx[1], slope = coef_yfnx[2], colour = 'blue') + | |
geom_segment(aes(x = x, y = y, xend = x, yend = predict(m_yfnx))) + | |
scale_x_continuous(limits = c(0,12), breaks = seq(0,12,2)) + | |
scale_y_continuous(limits = c(0,12), breaks = seq(0,12,2)) | |
# All the errors in the x values | |
# Model x as a function of y | |
m_xfny <- lm(x ~ y, data = xdf) | |
summary(m_xfny) | |
coef_xfny <- coef(m_xfny) | |
xdf %>% | |
ggplot(aes(x, y)) + | |
geom_point() + | |
geom_abline(intercept = -coef_xfny[1]/coef_xfny[2], slope = 1/coef_xfny[2], colour = 'green') + | |
geom_segment(aes(x = x, y = y, xend = predict(m_xfny), yend = y)) + | |
scale_x_continuous(limits = c(0,12), breaks = seq(0,12,2)) + | |
scale_y_continuous(limits = c(0,12), breaks = seq(0,12,2)) | |
# Orthogonal | |
m_dem <- deming(y ~ x, data = xdf) | |
coef_dem <- coef(m_dem) | |
coef_dem | |
xdf <- xdf %>% | |
mutate(xi = Calc_xi(x1 = x, y1 = y, m = coef_dem[2], c1 = coef_dem[1])) %>% | |
mutate(yi = Calc_yi(x1 = x, y1 = y, m = coef_dem[2], c1 = coef_dem[1])) | |
xdf %>% | |
ggplot(aes(x, y)) + | |
geom_point() + | |
geom_abline(intercept = coef_dem[1], slope = coef_dem[2], colour = 'red') + | |
geom_segment(aes(x = x, xend = xi, y = y, yend = yi) ) + | |
scale_x_continuous(limits = c(0,12), breaks = seq(0,12,2)) + | |
scale_y_continuous(limits = c(0,12), breaks = seq(0,12,2)) | |
# Three lines on the one plot | |
xdf %>% | |
ggplot(aes(x, y)) + | |
geom_point() + | |
geom_abline(intercept = coef_yfnx[1], slope = coef_yfnx[2], colour = 'blue') + | |
geom_abline(intercept = -coef_xfny[1]/coef_xfny[2], slope = 1/coef_xfny[2], colour = 'green') + | |
geom_abline(intercept = coef_dem[1], slope = coef_dem[2], colour = 'red') + | |
scale_x_continuous(limits = c(0,12), breaks = seq(0,12,2)) + | |
scale_y_continuous(limits = c(0,12), breaks = seq(0,12,2)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment