Skip to content

Instantly share code, notes, and snippets.

@leeper
Created October 25, 2016 10:58
Show Gist options
  • Save leeper/e3cec8578bbcda94c2c2e2765f0e74a0 to your computer and use it in GitHub Desktop.
Save leeper/e3cec8578bbcda94c2c2e2765f0e74a0 to your computer and use it in GitHub Desktop.
Graphs showing SRS versus stratified/oversampling
# uses dev version of 'waffle'
# devtools::install_github("leeper/waffle@patch-1")
library("waffle")
library("extrafont")
# population
set.seed(1)
N <- 900L
p <- c("Small Group 1" = 30, "Big Group 1" = 420, "Small Group 2" = 30,"Big Group 2" = 420)
glyph <- c("male", "female")[sample(1:2, N, TRUE)]
w <- waffle(p, rows = 30, flip = TRUE, use_glyph = glyph, glyph_size = 4)
# SRS
w1 <- w + annotate("rect", xmin = 0.5, xmax = 30.4, ymin = 0.4, ymax = 30.4, col = "black", fill = NA, lwd = 1) +
# sample
set.seed(123)
n <- 50L
xs <- sample(1:30, n, TRUE)
ys <- sample(1:30, n, TRUE)
# plot sampled units
for (i in seq_len(n)) {
w1 <- w1 + annotate("rect", xmin = xs[i]-0.5, xmax = xs[i]+0.4, ymin = ys[i]-0.6, ymax = ys[i]+0.4, col = "black", alpha=0.5, lwd = 0.5)
}
# save
w1 + ggtitle("Sampling n=50\nfrom Population of N=900") +
xlab("Respondents from 'Small' Groups = 2, each of whom hugely influences results\nProbability of Being Sampled = 50/900 = 0.56 (All respondents weighted equally)")
ggsave("srs.png", width = 8, height = 8)
# Stratified Sampling
w2 <- w
w2 <- w2 + annotate("rect", xmin = 0.5, xmax = 30.4, ymin = 0.4, ymax = 1.4, col = "black", fill = NA, lwd = 1) +
annotate("rect", xmin = 0.5, xmax = 30.4, ymin = 0.4, ymax = 15.4, col = "black", fill = NA, lwd = 1) +
annotate("rect", xmin = 0.5, xmax = 30.4, ymin = 15.4, ymax = 16.4, col = "black", fill = NA, lwd = 1) +
annotate("rect", xmin = 0.5, xmax = 30.4, ymin = 0.4, ymax = 30.4, col = "black", fill = NA, lwd = 1)
# sample
set.seed(456)
n1 <- 10L
n2 <- 15L
n3 <- 10L
n4 <- 15L
xs <- c(sample(1:30, n1, FALSE), sample(1:30, n2, TRUE), sample(1:30, n3, FALSE), sample(1:30, n4, TRUE))
ys <- c(rep(1, n1), sample(2:15, n2, TRUE), rep(16, n3), sample(17:30, n4, TRUE))
# plot sampled units
for (i in seq_len(sum(n1,n2,n3,n4))) {
w2 <- w2 + annotate("rect", xmin = xs[i]-0.5, xmax = xs[i]+0.4, ymin = ys[i]-0.6, ymax = ys[i]+0.4, col = "black", alpha=0.5, lwd = 0.5)
}
# save
w2 + ggtitle("Sampling n=50, oversampling 'small' groups\nfrom Population of N=900") +
xlab("Respondents from 'Small' Groups = 20\nProbability of Being Sampled (Big) = 30/840 = 0.036; so weighted 1.56\nProbability of Being Sampled (Small) = 20/60 = 0.17; so weighted 0.12")
ggsave("stratified.png", width = 8, height = 8)
# weight for "big" group for n equivalent to 50
wb <- (840/900)/(30/50)
# weight for "small" group for n equivalent to 50
ws <- (60/900)/(20/50)
(wb*30) + (ws*20)
# ratio of big/small groups in sample matches ratio of big/small groups in population
840/60
(wb*30)/(ws*20)
@leeper
Copy link
Author

leeper commented Oct 25, 2016

Graphs are "srs.png":

srs

And "stratified.png":

stratified

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment