Last active
July 16, 2016 06:22
-
-
Save mine-cetinkaya-rundel/055c3377c394e82e00c16edb7711d2f3 to your computer and use it in GitHub Desktop.
Standard errors for lm and glm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# From https://stats.stackexchange.com/questions/224045/using-lm-for-2-sample-proportion-test | |
## prop.test gives pooled 2-sample proportion result | |
## glm w/ binomial family gives unpooled 2-sample proportion result | |
## lm and glm w/ gaussian family give unknown result | |
library(dplyr) | |
library(broom) | |
set.seed(12345) | |
## set up dataframe ------------------------- | |
n_A <- 5000 | |
n_B <- 5000 | |
outcome <- rbinom( | |
n = n_A + n_B, | |
size = 1, | |
prob = 0.5 | |
) | |
treatment <- c( | |
rep("A", n_A), | |
rep("B", n_B) | |
) | |
df <- tbl_df(data.frame(outcome = outcome, treatment = treatment)) | |
## by hand, 2-sample prop tests --------------------------------------------- | |
p_A <- sum(df$outcome[df$treatment == "A"])/n_A | |
p_B <- sum(df$outcome[df$treatment == "B"])/n_B | |
p_pooled <- sum(df$outcome)/(n_A + n_B) | |
z_pooled <- (p_B - p_A) / sqrt( p_pooled * (1 - p_pooled) * (1/n_A + 1/n_B) ) | |
pvalue_pooled <- 2*(1-pnorm(abs(z_pooled))) | |
z_unpooled <- (p_B - p_A) / sqrt( (p_A * (1 - p_A))/n_A + (p_B * (1 - p_B))/n_B ) | |
pvalue_unpooled <- 2*(1-pnorm(abs(z_unpooled))) | |
## using prop.test -------------------------------------- | |
res_prop_test <- tidy(prop.test( | |
x = c(sum(df$outcome[df$treatment == "A"]), | |
sum(df$outcome[df$treatment == "B"])), | |
n = c(n_A, n_B), | |
correct = FALSE | |
)) | |
res_prop_test # same as pvalue_pooled | |
all.equal(res_prop_test$p.value, pvalue_pooled) | |
# [1] TRUE | |
# using glm with identity link ----------------------------------- | |
res_glm_binomial <- df %>% | |
do(tidy(glm(outcome ~ treatment, family = binomial(link = "identity")))) %>% | |
filter(term == "treatmentB") | |
res_glm_binomial # same as p_unpooled | |
all.equal(res_glm_binomial$p.value, pvalue_unpooled) | |
# [1] TRUE | |
## glm and lm gaussian -------------------------------- | |
res_glm <- df %>% | |
do(tidy(glm(outcome ~ treatment))) %>% | |
filter(term == "treatmentB") | |
res_glm | |
all.equal(res_glm$p.value, pvalue_unpooled) | |
all.equal(res_glm$p.value, pvalue_pooled) | |
res_lm <- df %>% | |
do(tidy(lm(outcome ~ treatment))) %>% | |
filter(term == "treatmentB") | |
res_lm | |
all.equal(res_lm$p.value, pvalue_unpooled) | |
all.equal(res_lm$p.value, pvalue_pooled) | |
all.equal(res_lm$p.value, res_glm$p.value) | |
# [1] TRUE | |
# Explore differences | |
## calculate lm standard error -------------------------------- | |
res_lm_nobroom <- lm(outcome ~ treatment, data = df) | |
y <- res_lm_nobroom$model$outcome | |
y_hat_lm <- res_lm_nobroom$fitted.values | |
x <- ifelse(res_lm_nobroom$model$treatment == "A", 0, 1) | |
x_bar <- mean(x) | |
n <- nrow(df) | |
se_lm <- sqrt(sum((y - y_hat_lm)^2) / (n - 2)) / sqrt(sum((x - x_bar)^2)) | |
## calculate glm_binomial standard error -------------------------------- | |
res_glm_binomial_nobroom <- glm(outcome ~ treatment, data = df, family = binomial(link = "identity")) | |
y_hat_glm_binomial <- res_glm_binomial_nobroom$fitted.values | |
se_glm_binomial <- sqrt(sum((y - y_hat_glm_binomial)^2) / (n)) / sqrt(sum((x - x_bar)^2)) | |
## calculate se_unpooled -------------------------------- | |
se_unpooled <- sqrt( (p_A * (1 - p_A))/n_A + (p_B * (1 - p_B))/n_B ) | |
## compare standard errors -------------------------------- | |
all.equal(se_lm, res_lm$std.error) # TRUE | |
all.equal(se_lm, res_glm$std.error) # TRUE | |
all.equal(se_unpooled, res_glm_binomial$std.error) # TRUE | |
all.equal(se_glm_binomial, res_glm_binomial$std.error) # TRUE | |
all.equal(se_lm, se_glm_binomial) # FALSE | |
all.equal(res_lm$std.error, res_glm_binomial$std.error) # FALSE | |
all.equal(res_glm$std.error, res_glm_binomial$std.error) # FALSE |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment