Skip to content

Instantly share code, notes, and snippets.

@flovv
Last active January 24, 2016 11:44
Show Gist options
  • Save flovv/be6e1c027409e36ec316 to your computer and use it in GitHub Desktop.
Save flovv/be6e1c027409e36ec316 to your computer and use it in GitHub Desktop.
illustrate the relationship between panel-based measurement error and panel sizes for GFK and Nielsen
# a simple script to illustrate the relationship between panel-based measurement error and panel sizes
# simple case of party estimate!
require(plyr)
require(ggplot2)
require(ggthemes)
n=1000
p=5
test <- c(40, 30, 15, 10 , 5) /100
getDraws <- function(n, prob){
sum(ifelse(runif(n, 0, 1) <= prob, 1,0)) /n
}
#getDraws(n, .05)
getVariance <- function(n, prob, redraws){
replicate(redraws, getDraws(n, prob))
}
line <- NULL
set.seed(123)
for(i in test){
if(exists("line")){
line <- rbind(line, data.frame(sample=getVariance(n, i, 100), trueValue =i))
}
else{
line <- data.frame(sample=getVariance(n, i, 100), trueValue =i)
}
}
line$trueValue <- as.factor(line$trueValue)
ggplot(line,aes(sample*100, fill=trueValue)) + geom_density(alpha=.4) +facet_wrap(~trueValue, scales="free")+ xlab("")
ggplot(line,aes(sample*100, fill=trueValue)) + geom_density(alpha=.4) + xlab("Vote Share in %") + theme_economist()
#############################################
confidence.interval = function(c.lev, margin=.5, p.ss, population) {
z.val = qnorm(.5+c.lev/200)
ss = ((population*p.ss-p.ss)/(population-p.ss))
c.interval = sqrt((z.val^2 * margin * (1-margin))/ss)
r.cint = round(c.interval*100, digits = 2)
return(r.cint)
}
##########################################
confidence.interval(95, 0.4, 1000, 70000000)
#############################################
#############################################
df <- data.frame(m=c(50, 30, 10, 5, 3, 2, 1, .5, .2, .05, 0.01), interval =confidence.interval(99, c(50, 30, 10, 5, 3, 2, 1, .5, .2, .05, 0.01)/100 , 5000, 50000000), order= seq(1,11,1) )
df <- data.frame(m=c(3, 2, 1, .5, .2, .05), interval =confidence.interval(99, c(3, 2, 1, .5, .2, .05)/100 , 5000, 50000000) )
df$m2 <- factor(df$m)
#df$m <- reorder(df$m, d$Win)
p <- ggplot(df, aes(y=m, x=m2 ))
p + theme_bw(base_size = 18)+geom_point() + geom_errorbar(aes(ymin=m-interval, ymax=m+interval), width=.1) + ylab("Market Share (%) and Confidence Intervalls") + xlab("Market Share (%) / Size of the Website") + ggtitle("Panel Size 5000, Population 50 Mio")
############### percentage error!
df$percError <- df$interval / df$m *100
p <- ggplot(df, aes(y=percError, x=m2)) + geom_point() + ylab("Relative Error, %") + xlab("Market Share (%) / Size of the Website") + ggtitle("Panel Size 5000, Population 50 Mio")
p +theme_economist(base_size = 16)
#########################################
## changing panel size!
df <- data.frame(m=c(1000, 3000, 5000, 10000, 20000, 50000), interval =confidence.interval(99, 1/100 , c(1000, 3000, 5000, 10000, 20000, 50000), 50000000), order= seq(1,6,1) )
df$percError <- df$interval / 1 *100
df$m2 <- factor(df$m)
p <- ggplot(df, aes(y=percError, x=m2)) + geom_point() + ylab("Relative Error, %") + xlab("Panel Size") + ggtitle("Varying Panel Size, Population 50 Mio, Website Size 1%")
p +theme_bw(base_size = 18)
p <- ggplot(df, aes(y=1, x=m2 ))
p +theme_bw(base_size = 18)+ geom_point() + geom_errorbar(aes(ymin=1-interval, ymax=1+interval), width=.1) + ylab("Market Share (%) and Confidence Intervalls") + xlab("Panel Size") + ggtitle("Varying Panel Size, Population 50 Mio, Website Size 1%")
############# facet!
df <- data.frame(m=c(1000, 3000, 5000, 10000, 20000, 50000), interval =confidence.interval(99, 1/100 , c(1000, 3000, 5000, 10000, 20000, 50000), 50000000), order= seq(1,6,1), size=1 )
df2 <- data.frame(m=c(1000, 3000, 5000, 10000, 20000, 50000), interval =confidence.interval(99, .5/100 , c(1000, 3000, 5000, 10000, 20000, 50000), 50000000), order= seq(1,6,1), size=.5 )
df3 <- data.frame(m=c(1000, 3000, 5000, 10000, 20000, 50000), interval =confidence.interval(99, .1/100 , c(1000, 3000, 5000, 10000, 20000, 50000), 50000000), order= seq(1,6,1), size=.1 )
hopp <-rbind(df, df2)
hopp <- rbind(hopp, df3)
hopp$m2 <- factor(hopp$m)
#hopp$size <- factor(hopp$size)
p <- ggplot(hopp, aes(y=size, x=m2 ))
p +theme_bw(base_size = 18)+ geom_point() + geom_errorbar(aes(ymin=size-interval, ymax=size+interval), width=.1) + ylab("Market Share (%) and Confidence Intervalls") + xlab("Panel Size") + ggtitle("Varying Panel Size, Population 50 Mio, Varying Website Size") + facet_grid(~size)
hopp$percError <- hopp$interval / hopp$size *100
p <- ggplot(hopp, aes(y=percError, x=m2 ))
p +theme_bw(base_size = 18)+ geom_point() + ylab("Relative Error, %") + xlab("Panel Size") + ggtitle("Varying Panel Size, Population 50 Mio, Varying Website Size") + facet_grid(~size)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment