Created
October 25, 2016 10:58
-
-
Save leeper/e3cec8578bbcda94c2c2e2765f0e74a0 to your computer and use it in GitHub Desktop.
Graphs showing SRS versus stratified/oversampling
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# uses dev version of 'waffle' | |
# devtools::install_github("leeper/waffle@patch-1") | |
library("waffle") | |
library("extrafont") | |
# population | |
set.seed(1) | |
N <- 900L | |
p <- c("Small Group 1" = 30, "Big Group 1" = 420, "Small Group 2" = 30,"Big Group 2" = 420) | |
glyph <- c("male", "female")[sample(1:2, N, TRUE)] | |
w <- waffle(p, rows = 30, flip = TRUE, use_glyph = glyph, glyph_size = 4) | |
# SRS | |
w1 <- w + annotate("rect", xmin = 0.5, xmax = 30.4, ymin = 0.4, ymax = 30.4, col = "black", fill = NA, lwd = 1) + | |
# sample | |
set.seed(123) | |
n <- 50L | |
xs <- sample(1:30, n, TRUE) | |
ys <- sample(1:30, n, TRUE) | |
# plot sampled units | |
for (i in seq_len(n)) { | |
w1 <- w1 + annotate("rect", xmin = xs[i]-0.5, xmax = xs[i]+0.4, ymin = ys[i]-0.6, ymax = ys[i]+0.4, col = "black", alpha=0.5, lwd = 0.5) | |
} | |
# save | |
w1 + ggtitle("Sampling n=50\nfrom Population of N=900") + | |
xlab("Respondents from 'Small' Groups = 2, each of whom hugely influences results\nProbability of Being Sampled = 50/900 = 0.56 (All respondents weighted equally)") | |
ggsave("srs.png", width = 8, height = 8) | |
# Stratified Sampling | |
w2 <- w | |
w2 <- w2 + annotate("rect", xmin = 0.5, xmax = 30.4, ymin = 0.4, ymax = 1.4, col = "black", fill = NA, lwd = 1) + | |
annotate("rect", xmin = 0.5, xmax = 30.4, ymin = 0.4, ymax = 15.4, col = "black", fill = NA, lwd = 1) + | |
annotate("rect", xmin = 0.5, xmax = 30.4, ymin = 15.4, ymax = 16.4, col = "black", fill = NA, lwd = 1) + | |
annotate("rect", xmin = 0.5, xmax = 30.4, ymin = 0.4, ymax = 30.4, col = "black", fill = NA, lwd = 1) | |
# sample | |
set.seed(456) | |
n1 <- 10L | |
n2 <- 15L | |
n3 <- 10L | |
n4 <- 15L | |
xs <- c(sample(1:30, n1, FALSE), sample(1:30, n2, TRUE), sample(1:30, n3, FALSE), sample(1:30, n4, TRUE)) | |
ys <- c(rep(1, n1), sample(2:15, n2, TRUE), rep(16, n3), sample(17:30, n4, TRUE)) | |
# plot sampled units | |
for (i in seq_len(sum(n1,n2,n3,n4))) { | |
w2 <- w2 + annotate("rect", xmin = xs[i]-0.5, xmax = xs[i]+0.4, ymin = ys[i]-0.6, ymax = ys[i]+0.4, col = "black", alpha=0.5, lwd = 0.5) | |
} | |
# save | |
w2 + ggtitle("Sampling n=50, oversampling 'small' groups\nfrom Population of N=900") + | |
xlab("Respondents from 'Small' Groups = 20\nProbability of Being Sampled (Big) = 30/840 = 0.036; so weighted 1.56\nProbability of Being Sampled (Small) = 20/60 = 0.17; so weighted 0.12") | |
ggsave("stratified.png", width = 8, height = 8) | |
# weight for "big" group for n equivalent to 50 | |
wb <- (840/900)/(30/50) | |
# weight for "small" group for n equivalent to 50 | |
ws <- (60/900)/(20/50) | |
(wb*30) + (ws*20) | |
# ratio of big/small groups in sample matches ratio of big/small groups in population | |
840/60 | |
(wb*30)/(ws*20) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Graphs are
"srs.png"
:And
"stratified.png"
: