Last active
July 6, 2016 21:03
-
-
Save aschleg/618d6911ce64e82bf140af1080edbf13 to your computer and use it in GitHub Desktop.
Quick function for performing linear regression with one predictor variable
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Quick function for performing linear regression with only one predictor variable. | |
# Takes two arguments, x and y. Variables must have same length. | |
# Output will be similar to output of lm() with one predictor. | |
# Gist created as part of post on linear regression: http://www.aaronschlegel.com/notebook/simple-linear-regression-models-r/ | |
simple.linear.coef <- function(x, y) { | |
# Find length of x to get sample size. Assuming x and y have the same sample size. | |
n <- length(x) | |
if (n != length(y)) | |
stop('variables are not the same size') | |
# Calculate the error statistics Sxx, Syy, and Sxy | |
sxx <- sum(x^2) - sum(x)^2 / n | |
syy <- sum(y^2) - sum(y)^2 / n | |
sxy <- sum(x * y) - (sum(x) * sum(y)) / n | |
# Coefficients beta0 and beta1 | |
b1 <- sxy / sxx | |
b0 <- mean(y) - b1 * mean(x) | |
# Sum of standard error and Mean Standard Error | |
sse <- syy - sxy^2 / sxx | |
mse <- sse / (n - 2) | |
# Standard error beta0 and beta1 | |
b1.err <- sqrt(mse) / sqrt(sxx) | |
b0.err <- sqrt(mse) / sqrt(n) * sqrt(1 + (mean(x)^2 / (sum((x - mean(x))^2) / n))) | |
# beta0 and beta1 t-values | |
b0.t <- (b0 - 0) / b0.err | |
b1.t <- (b1 - 0) / b1.err | |
# p-values of beta0 and beta1 | |
b0.p <- 2 * pt(b0.t, df = n - 2) | |
b1.p <- 2 * pt(b1.t, df = n - 2, lower.tail = FALSE) | |
# Coefficient of determination R-squared | |
r2 <- (syy - sse) /syy | |
# R-squared adjusted | |
r2.adj <- r2 - (1 - r2) * ((2 - 1) / (length(y) - 2)) | |
rsquare <- paste('Multiple R-squared: ', round(r2, 4), ', Adjusted R-squared: ', round(r2.adj, 4)) | |
coeffs <- data.frame(cbind(c(b0, b1), c(b0.err, b1.err), c(b0.t, b1.t), c(b0.p, b1.p))) | |
colnames(coeffs) <- c('Estimate', 'Std. Error', 't value', 'Pr(>|t|)') | |
rownames(coeffs) <- c('Intercept', 'x1') | |
# Fit the line to the data with beta0 and beta1 found above | |
fitted <- x * b1 + b0 | |
# The F-Statistic | |
msr <- sum((fitted - mean(y))^2) / 1 | |
mse2 <- sum((y - fitted)^2) / (length(y) - 2) | |
f <- msr / mse2 | |
# p-value | |
p <- pf(f, 1, length(y) - 2, lower.tail = FALSE) | |
f.stat <- paste('F-statistic: ', round(f, 2), ' on 1 and ', n - 2, ' DF, p-value: ', format(p, digits = 3, scientific = TRUE)) | |
# Calculate and find summary statistics of the residuals | |
resd <- y - fitted | |
min.res <- round(min(resd), 3) | |
max.res <- round(max(resd), 3) | |
q1.q3 <- quantile(resd, probs = c(.25, .75)) # 1st and 3rd quartiles of the residuals | |
med <- round(median(resd), 3) | |
residual <- data.frame(cbind(min.res, round(q1.q3[1], 3), med, round(q1.q3[2], 3), max.res)) | |
colnames(residual) <- c('Min', 'Q1', 'Median', 'Q3', 'Max') | |
resdi <- paste('Residual standard error: ', round(sqrt(mse2), 2), ' on ', n - 2, ' degrees of freedom') | |
regres <- list('Residuals'=residual, 'Coefficients'=coeffs, resdi, rsquare, f.stat) | |
return(regres) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment