Last active
August 29, 2015 14:05
-
-
Save dmarcelinobr/cde80bf3c7c0788a7e8b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(MCMCpack) | |
## Coping with polls: the more recent, the better they are. | |
##http://en.wikipedia.org/wiki/Opinion_polling_for_the_Scottish_independence_referendum,_2014 | |
polls = NULL | |
polls <- data.frame( rbind( | |
Survation = c(37, 50, 1010), | |
YouGov = c(35, 55, 1142), | |
TNSBMRB = c(32, 45, 1003), | |
IpsosMORI = c(40, 54, 1006), | |
Survation = c(40, 46, 1000), | |
Panelbase = c(41, 48, 1041) | |
)) | |
# set up for decimals | |
polls[, 1:2] <- polls[, 1:2]/100 | |
# weighting polls | |
wtd.polls <- rbind(polls, c(apply(polls[,1:2],2, weighted.mean, polls[,3]), sum(polls[,3]))) | |
names(wtd.polls) <- c("Yes","No","n") | |
wtd.polls$Others = 1-wtd.polls$Yes-wtd.polls$No | |
# Adjusting for the undecideds for the final results | |
wtd.polls.others <- data.frame(wtd.polls[7,1:2] + wtd.polls[7,1:2]/ sum(wtd.polls[7,1:2])*wtd.polls[7,4],n=wtd.polls[7,3],Others=0) | |
data = rbind(wtd.polls, wtd.polls.others) | |
################################################## | |
## If we have more than two categories (Yes, No, Uncertain), Beta distribution won't work properly. | |
## The following is the main function, which I use to randomly draw data from a dirichlet distribution. | |
################################################## | |
win.probs = function(j=8,export=1){ | |
p=rdirichlet(100000, | |
data$n[j]* | |
c(data$Yes[j], data$No[j], 1-data$Yes[j]-data$No[j])+1 | |
) | |
if(export==1){ | |
mean(p[,2]>p[,1]) | |
} else { | |
return(p) | |
} | |
} | |
( No.win.probs = sapply(1:nrow(data),win.probs) ) | |
## set simulated data for determining parameters and graphing | |
sim.dist = win.probs(8,export=2) | |
## Get the marginal distribution. Since we have two classes the Dirichlet is distributed as a Beta. | |
sim.dist = cbind(sim.dist, sim.dist[,2]-sim.dist[,1]) | |
## Draw a histogram of simulated data | |
hist(sim.dist[,4], col='gray90', nclass=50, main="Histogram of the Yes/No Differences", xlab="Yes/No Difference") | |
#abline(v=0, col=c('red'), lwd=2, lty=c(1)) | |
## In this case I ran the function a few time to set the start value close to what I believe be the output. | |
## So, you have to adjust the shape parameters (shape1 and shape2) manually; it's much of a trial and error exercise, | |
## but you can use the function beta.par from SciencesPo package to estimate these parameters using \mu and \sigma^2. | |
fit.dist.1 = fitdistr(sim.dist[,1], "beta", | |
start=list(shape1=3,shape2=3)) | |
fit.dist.2 = fitdistr(sim.dist[,2], "beta", | |
start=list(shape1=3,shape2=3)) | |
## We can also fit a curve of the margins | |
fit.dist.margin = fitdistr(sim.dist[,4], "beta", | |
start=list(shape1=5,shape2=5)) | |
## Draw densities of simulated data | |
curve(dbeta(x,fit.dist.1$estimate[1],fit.dist.1$estimate[2]), ylim=c(0,80), xlim=c(.35,.65), col='NA', lty=1, lwd=3, ylab="Density", xlab="theta", main="Distribution of the Yes/No Preference Referendum 2014", sub=paste("Probability that NO wins: ", round(NO.win.probs[8],2) ) ) ## yes 1 | |
curve(dbeta(x,fit.dist.1$estimate[1],fit.dist.1$estimate[2]), xlim=c(.35,.50), col='red', lty=1, add=TRUE, lwd=3) ## yes 1 | |
curve(dbeta(x,fit.dist.2$estimate[1],fit.dist.2$estimate[2]), add=TRUE, col='blue', xlim=c(.50,.65), lty=1, lwd=3) ## No 2 | |
abline(v=c(median(sim.dist[,1]), median(sim.dist[,2])), col=c('red','blue'), lwd=2, lty=c(2,2)) | |
legend("topright",c("Yes","No"), lwd=2, col=c('red','blue'), lty=c(1,1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment