dmarcelinobr · July 4, 2015 18:02
diff --git a/greek.thing.R b/greek.thing.R
 require(magrittr)
 require(SciencesPo)
 # Latest polls:
 polls = NULL
 polls <- data.frame( rbind( 
 Kapa = c(47.2,33,1000),
 ProRata = c(37,46,1200),
 U.Macedonia = c(42.5,43,1042),
 Public.Issue=c(45.5,45,1053),
 GPO=c(44.1,43.7,2453),
 Alco=c(44.5,43.9,1400),
 Metron = c(46,47,3612) ) )

 # set up for decimals
 polls[, 1:2] <- polls[, 1:2]/100

 # weighting polls 
 mean_of_polls <-apply(polls[,1:2],2, weighted.mean, polls[,3])
 sum_of_samples <- sum(polls[,3])

 wtd.polls <- polls %>% rbind(c(mean_of_polls, sum_of_samples))


 names(wtd.polls) <- c("YES","NO", "n")

 wtd.polls$DK = 1-(psum(wtd.polls$YES, wtd.polls$NO))

 # Adjusting for the undecideds for the final results
 wtd.polls[9,] <- data.frame(wtd.polls[4,1:2] + wtd.polls[4,1:2]/ sum(wtd.polls[4,1:2])*wtd.polls[4,4],n=wtd.polls[4,3],DK=0)

 ##################################################
 ## The following is a function which I used to draw from a dirichlet distribution.
 ## The Dirichlet function returns a matrix with n rows, each containing a single Dirichlet random deviate.
 ##################################################
 row= 9
 prob2win = function(row,export=1){
  p=rdirichlet(100000,
               wtd.polls$n[row] *
                 c(wtd.polls$YES[row], wtd.polls$NO[row], 1-wtd.polls$YES[row]-wtd.polls$NO[row])+1
  )
  if(export==1){
    mean(p[,2]<p[,1]) ## No exceeds Yes?
  } else {
    return(p)
  }
 }


 (Yes.win.probs = sapply(1:nrow(wtd.polls),prob2win))



 ## set simulated data for determining parameters and graphing 
 p = prob2win(row= 9, export=0)

 ## Get the marginal distribution. Since we have two classes the Dirichlet is distributed as a Beta.
 p = cbind(p, p[,1]-p[,2])

 ## Draw a histogram of simulated data
 hist(p[,4], col="gray", nclass=50, main="Histogram of the Yes/No Differences", xlab="Yes/No Difference")
 abline(v=0, col=c('red'), lwd=2, lty=c(1))

 ## In this case I ran the function a few time to set the start value close to what I believe be the output.
 ## So, you have to adjust the shape parameters (shape1 and shape2) manually; it's much of a trial and error exercise, 
 ## but you can use the function beta.par from SciencesPo package to estimate these parameters using \mu and \sigma^2.
 library(MCMCpack)
 fit1 = fitdistr(p[,1], "beta",
                start=list(shape1=455,shape2=555))

 fit2 = fitdistr(p[,2], "beta",
                start=list(shape1=545,shape2=455))

 ## Draw densities of simulated data
 par(
  bg = "black",
  col = "white",
  col.axis = "white",
  col.lab = "white",
  col.main = "white",
  col.sub = "white")
 curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), ylim=c(0,30), xlim=c(.40,.60), col='NA', lty=1, lwd=3, ylab="Density", xlab=expression("Posterior "*pi(theta)*""), main="Distribution of the Yes/No Positions\nGreek Referendum 2015", sub=paste("Probability that YES wins: ", round(Yes.win.probs[9],2) ) ) ## yes 1

 curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), xlim=c(.44,.56), col='darkgreen', lty=1, add=TRUE, lwd=8) ## yes 1

 curve(dbeta(x,fit2$estimate[1],fit2$estimate[2]), add=TRUE, col='orange', xlim=c(.43,.56), lty=1, lwd=8) ## No 2

 abline(v=c(median(p[,1]), median(p[,2])), col=c('green','orange'), lwd=2, lty=c(2,2))
 legend("topright",c("Yes","No"), lwd=2, col=c('darkgreen','orange'), lty=c(1,1))
	require(magrittr)
	require(SciencesPo)
	# Latest polls:
	polls = NULL
	polls <- data.frame( rbind(
	Kapa = c(47.2,33,1000),
	ProRata = c(37,46,1200),
	U.Macedonia = c(42.5,43,1042),
	Public.Issue=c(45.5,45,1053),
	GPO=c(44.1,43.7,2453),
	Alco=c(44.5,43.9,1400),
	Metron = c(46,47,3612) ) )

	# set up for decimals
	polls[, 1:2] <- polls[, 1:2]/100

	# weighting polls
	mean_of_polls <-apply(polls[,1:2],2, weighted.mean, polls[,3])
	sum_of_samples <- sum(polls[,3])

	wtd.polls <- polls %>% rbind(c(mean_of_polls, sum_of_samples))


	names(wtd.polls) <- c("YES","NO", "n")

	wtd.polls$DK = 1-(psum(wtd.polls$YES, wtd.polls$NO))

	# Adjusting for the undecideds for the final results
	wtd.polls[9,] <- data.frame(wtd.polls[4,1:2] + wtd.polls[4,1:2]/ sum(wtd.polls[4,1:2])*wtd.polls[4,4],n=wtd.polls[4,3],DK=0)

	##################################################
	## The following is a function which I used to draw from a dirichlet distribution.
	## The Dirichlet function returns a matrix with n rows, each containing a single Dirichlet random deviate.
	##################################################
	row= 9
	prob2win = function(row,export=1){
	p=rdirichlet(100000,
	wtd.polls$n[row] *
	c(wtd.polls$YES[row], wtd.polls$NO[row], 1-wtd.polls$YES[row]-wtd.polls$NO[row])+1
	)
	if(export==1){
	mean(p[,2]<p[,1]) ## No exceeds Yes?
	} else {
	return(p)
	}
	}


	(Yes.win.probs = sapply(1:nrow(wtd.polls),prob2win))



	## set simulated data for determining parameters and graphing
	p = prob2win(row= 9, export=0)

	## Get the marginal distribution. Since we have two classes the Dirichlet is distributed as a Beta.
	p = cbind(p, p[,1]-p[,2])

	## Draw a histogram of simulated data
	hist(p[,4], col="gray", nclass=50, main="Histogram of the Yes/No Differences", xlab="Yes/No Difference")
	abline(v=0, col=c('red'), lwd=2, lty=c(1))

	## In this case I ran the function a few time to set the start value close to what I believe be the output.
	## So, you have to adjust the shape parameters (shape1 and shape2) manually; it's much of a trial and error exercise,
	## but you can use the function beta.par from SciencesPo package to estimate these parameters using \mu and \sigma^2.
	library(MCMCpack)
	fit1 = fitdistr(p[,1], "beta",
	start=list(shape1=455,shape2=555))

	fit2 = fitdistr(p[,2], "beta",
	start=list(shape1=545,shape2=455))

	## Draw densities of simulated data
	par(
	bg = "black",
	col = "white",
	col.axis = "white",
	col.lab = "white",
	col.main = "white",
	col.sub = "white")
	curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), ylim=c(0,30), xlim=c(.40,.60), col='NA', lty=1, lwd=3, ylab="Density", xlab=expression("Posterior "pi(theta)""), main="Distribution of the Yes/No Positions\nGreek Referendum 2015", sub=paste("Probability that YES wins: ", round(Yes.win.probs[9],2) ) ) ## yes 1

	curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), xlim=c(.44,.56), col='darkgreen', lty=1, add=TRUE, lwd=8) ## yes 1

	curve(dbeta(x,fit2$estimate[1],fit2$estimate[2]), add=TRUE, col='orange', xlim=c(.43,.56), lty=1, lwd=8) ## No 2

	abline(v=c(median(p[,1]), median(p[,2])), col=c('green','orange'), lwd=2, lty=c(2,2))
	legend("topright",c("Yes","No"), lwd=2, col=c('darkgreen','orange'), lty=c(1,1))