Last active
February 5, 2020 19:55
-
-
Save NickCH-K/8ac828bab57803456c6ec796ca7b0b17 to your computer and use it in GitHub Desktop.
Selected Reporting Graph
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(broom) | |
library(gridExtra) | |
reg_return <- function(N) { | |
# Choose effect size | |
fx <- .05 | |
df <- tibble(x = runif(N) > .5) %>% | |
mutate(y = runif(N)+fx*x > .5) | |
m <- lm(y~x, data = df) | |
return(list(beta = m$coefficients[[2]], p = tidy(summary(m))$p.value[2])) | |
} | |
res <- crossing(N = (2:200)*5,iter = 1:20) %>% | |
mutate(regs = map(N, reg_return)) %>% | |
mutate(Difference = map_dbl(regs,"beta"), | |
p = map_dbl(regs,"p")) %>% | |
mutate(Significant = ifelse(p <= .05,"Sig. at 95%","Insig.")) | |
p1 <- ggplot(res, aes(x = N, y = Difference, color = Significant)) + | |
geom_point(position = 'jitter')+ | |
theme_light()+ | |
geom_hline(aes(yintercept = .05),linetype = 'dashed',color = 'black') + | |
theme(legend.position = c(.85, .85), | |
legend.background = element_rect(color = 'black')) + | |
labs(x = "Sample Size", | |
y = "Estimated Treatment vs. Control Difference", | |
title = "Estimated Effect of Treatment by Sample Size", | |
subtitle = "Outcome, treatment both binary with mean .5. True treatment effect is .05") | |
resbig <- crossing(N = 10000,iter = 1:380) %>% | |
mutate(regs = map(N, reg_return)) %>% | |
mutate(Difference = map_dbl(regs,"beta"), | |
p = map_dbl(regs,"p")) %>% | |
mutate(Significant = ifelse(p <= .05,"Sig. at 95%","Insig.")) | |
sigonly <- bind_rows( | |
resbig %>% filter(Significant == "Sig. at 95%") %>% mutate(size = "10,000"), | |
res %>% filter(N <= 100, Significant == "Sig. at 95%") %>% mutate(size = "10-100"), | |
res %>% filter(between(N,750,1000), Significant == "Sig. at 95%") %>% mutate(size = "750-1,000") | |
) %>% | |
mutate(Sample.Size = factor(size, levels = c("10-100","750-1,000","10,000"))) | |
means <- sigonly %>% | |
group_by(Sample.Size) %>% | |
summarize(m = mean(Difference)) %>% | |
pull(m) %>% | |
round(digits = 3) | |
p2 <- ggplot(sigonly %>% filter(between(Difference,-.1,.5)), aes(x = Difference, color = Sample.Size)) + | |
stat_density(geom='line',size = 1) + | |
theme_light()+ | |
geom_vline(aes(xintercept = .05),linetype = 'dashed',color = 'black') + | |
theme(legend.position = c(.85, .85), | |
legend.background = element_rect(color = 'black')) + | |
labs(x = "Estimated Difference", | |
y = "Density", | |
title = "Distribution of Significant Estimates by Sample Size", | |
subtitle = "Only includes results sig. at 95%, to demonstrate effects of selected reporting.\nDifferences outside [-.1,.5] omitted for clarity.") + | |
annotate("text", | |
label = paste("Mean",means), x = c(.275,.15,-.02), y = c(7,18,35)) | |
grid <- grid.arrange(p1,p2, ncol = 2) | |
ggsave("results_selection.png", plot = grid, width = 12, height = 9, units = "in") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment