Skip to content

Instantly share code, notes, and snippets.

@NickCH-K
Last active February 5, 2020 19:55
Show Gist options
  • Save NickCH-K/8ac828bab57803456c6ec796ca7b0b17 to your computer and use it in GitHub Desktop.
Save NickCH-K/8ac828bab57803456c6ec796ca7b0b17 to your computer and use it in GitHub Desktop.
Selected Reporting Graph
library(tidyverse)
library(broom)
library(gridExtra)
reg_return <- function(N) {
# Choose effect size
fx <- .05
df <- tibble(x = runif(N) > .5) %>%
mutate(y = runif(N)+fx*x > .5)
m <- lm(y~x, data = df)
return(list(beta = m$coefficients[[2]], p = tidy(summary(m))$p.value[2]))
}
res <- crossing(N = (2:200)*5,iter = 1:20) %>%
mutate(regs = map(N, reg_return)) %>%
mutate(Difference = map_dbl(regs,"beta"),
p = map_dbl(regs,"p")) %>%
mutate(Significant = ifelse(p <= .05,"Sig. at 95%","Insig."))
p1 <- ggplot(res, aes(x = N, y = Difference, color = Significant)) +
geom_point(position = 'jitter')+
theme_light()+
geom_hline(aes(yintercept = .05),linetype = 'dashed',color = 'black') +
theme(legend.position = c(.85, .85),
legend.background = element_rect(color = 'black')) +
labs(x = "Sample Size",
y = "Estimated Treatment vs. Control Difference",
title = "Estimated Effect of Treatment by Sample Size",
subtitle = "Outcome, treatment both binary with mean .5. True treatment effect is .05")
resbig <- crossing(N = 10000,iter = 1:380) %>%
mutate(regs = map(N, reg_return)) %>%
mutate(Difference = map_dbl(regs,"beta"),
p = map_dbl(regs,"p")) %>%
mutate(Significant = ifelse(p <= .05,"Sig. at 95%","Insig."))
sigonly <- bind_rows(
resbig %>% filter(Significant == "Sig. at 95%") %>% mutate(size = "10,000"),
res %>% filter(N <= 100, Significant == "Sig. at 95%") %>% mutate(size = "10-100"),
res %>% filter(between(N,750,1000), Significant == "Sig. at 95%") %>% mutate(size = "750-1,000")
) %>%
mutate(Sample.Size = factor(size, levels = c("10-100","750-1,000","10,000")))
means <- sigonly %>%
group_by(Sample.Size) %>%
summarize(m = mean(Difference)) %>%
pull(m) %>%
round(digits = 3)
p2 <- ggplot(sigonly %>% filter(between(Difference,-.1,.5)), aes(x = Difference, color = Sample.Size)) +
stat_density(geom='line',size = 1) +
theme_light()+
geom_vline(aes(xintercept = .05),linetype = 'dashed',color = 'black') +
theme(legend.position = c(.85, .85),
legend.background = element_rect(color = 'black')) +
labs(x = "Estimated Difference",
y = "Density",
title = "Distribution of Significant Estimates by Sample Size",
subtitle = "Only includes results sig. at 95%, to demonstrate effects of selected reporting.\nDifferences outside [-.1,.5] omitted for clarity.") +
annotate("text",
label = paste("Mean",means), x = c(.275,.15,-.02), y = c(7,18,35))
grid <- grid.arrange(p1,p2, ncol = 2)
ggsave("results_selection.png", plot = grid, width = 12, height = 9, units = "in")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment