Skip to content

Instantly share code, notes, and snippets.

@mark-andrews
Created November 9, 2020 14:32
Show Gist options
  • Save mark-andrews/84cb6806c2c5b6459c09c73deb8662ba to your computer and use it in GitHub Desktop.
Save mark-andrews/84cb6806c2c5b6459c09c73deb8662ba to your computer and use it in GitHub Desktop.
# This is a set of functions etc to illustrate the concept of a sampling
# distribution using the example of sampling gold and silver coins
# from a box.
library(tidyverse)
# set random number generator seed
set.seed(10101)
make_box <- function(N = 100, g = 0.5){
# N: Number of coins in box
# g: The hypothesized proportion of gold coins
# The number of gold coins
n_gold <- N * g
# The number of silver coins
n_silver <- N - n_gold
c(rep('Gold', n_gold),
rep('Silver', n_silver)
) %>% sample()
}
# make a box of coins
coin_box <- make_box(N = 100, g = 0.5)
sample_coins <- function(n = 25){
sample(coin_box, n, replace = T)
}
count_gold <- function(coin_sample){
sum(coin_sample == 'Gold')
}
sample_count_gold <- function(n = 25){
sample_coins(n) %>% count_gold()
}
rep_sample_count <- function(N, n = 25){
replicate(N, sample_count_gold(n))
}
plot_rep_sample_count <- function(N, n = 25){
tibble(m = rep_sample_count(N, n)) %>%
ggplot(aes(x = m)) +
geom_bar() +
xlim(0, n) +
xlab('gold coins in sample')
}
plot_rep_sample_count <- function(N, n = 25){
tibble(m = rep_sample_count(N, n)) %>%
ggplot(aes(x = m)) +
geom_bar() +
xlim(0, n) +
xlab('gold coins in sample')
}
plot_rep_sample_count2 <- function(N, n = 25, m_obs= 7, g = 0.5){
tibble(m = c(seq(0, n), rep_sample_count(N, n))) %>%
group_by(m) %>%
summarize(count = n()) %>%
ungroup() %>%
mutate(xtreme = abs((g * n) - seq(0, n) ) >= ((g * n) - m_obs)) %>%
ggplot(aes(x = m, y = count, fill = xtreme)) +
geom_bar(stat = 'identity') +
#xlim(0, n) +
xlab('gold coins in sample') +
theme(legend.position = "none") +
scale_fill_manual(values = c("gray40", "gray20"))
}
plot_sampling_dist <- function(n = 25, g = 0.5, m_obs = 7){
tibble(
m = seq(0, n),
p = dbinom(m, size=n, prob = g),
xtreme = abs((g * n) - seq(0, n) ) >= ((g * n) - m_obs)
) %>%
ggplot(aes(x = m, y = p, fill = xtreme)) +
geom_bar(stat = 'identity') +
xlab('gold coins in sample')
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment