Created
February 10, 2013 18:21
-
-
Save chrishanretty/4750469 to your computer and use it in GitHub Desktop.
Pooling the polls for Italy, final version
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Takes as input a Google Docs spreadsheet | |
### Outputs national and regional level estimates of support | |
### over time | |
### Load libraries | |
library(R2WinBUGS) | |
library(rjags) | |
library(RCurl) | |
library(reshape) | |
library(foreach) | |
library(car) | |
library(RColorBrewer) | |
library(ggplot2) | |
library(scales) | |
library(doMC) | |
registerDoMC(cores=4) | |
### Set up configs. | |
start.date <- as.Date("2012-12-10") | |
n.iter <- 2e6 | |
n.burn <- n.iter / 4 | |
n.thin <- n.iter/1000 | |
the.date <- Sys.Date() | |
### Get data | |
gdoc <- getURL("https://docs.google.com/spreadsheet/pub?key=0AlLkeTCb2GrxdFFVeEY1bS1nU3N4MnF2NnduQVNtOUE&single=true&gid=0&output=csv") | |
dat <- read.csv(textConnection(gdoc),na.strings=c("","NA")) | |
### Clean up numeric information | |
pct.vars <- 9:23 | |
dat[,pct.vars] <- apply(dat[,pct.vars],2,function(x){ | |
as.numeric(gsub("%","",gsub(",",".",x)))/100 | |
}) | |
### Clean up date information | |
date.vars <- 2:5 | |
dat[,date.vars] <- apply(dat[,date.vars],2,function(x){ | |
julian(as.Date(as.character(x),format="%d/%m/%Y"),start.date) | |
}) | |
### Subset to dates after start date | |
dat$avg.date <- floor(rowMeans(dat[,c("DateFrom","DateTo")])) + 1 | |
dat <- subset(dat,avg.date > 0) | |
### Clean up nRespondents | |
dat$nRespondents[is.na(dat$nRespondents)] <- 200 | |
### Clean up company information | |
dat$Company <- recode(dat$Company, | |
"c('Datamonitor')='DATAMONITOR'; | |
c('Demos-Demetra','Demos&Pi')='Demos'; | |
c('Ipsos','Ipsos pa','Ipsos srl')='Ipsos srl'; | |
c('Istituto Piepoli','Istituto Piepoli Spa')='Istituto Piepoli'; | |
c('tecne\\'','tecnè','Tecnè')='Tecnè'") | |
### Convert to factor | |
### Explicit listing of companies is brittle | |
### But necessary for the Bugs scripts | |
company.levels <- c("SpinCon", | |
"DATAMONITOR", | |
"SWG spa", | |
"DEMOPOLIS - Istituto Nazionale di Ricerche", | |
"ISPO RICERCHE S.R.L.", | |
"Lorien Consulting", | |
"Tecnè", | |
"Quorum", | |
"EMG Srl", | |
"Demos", | |
"Euromedia Research", | |
"IPR Marketing", | |
"Istituto Piepoli", | |
#"Epoké. Ricerche Sociali Applicate", | |
"Ipsos srl") | |
dat$Company <- factor(dat$Company, | |
levels = company.levels, | |
ordered=TRUE) | |
dat <- dat[!is.na(dat$Company),] | |
### Save the clean version | |
save(dat,file= paste(the.date,"_setup.RData",sep="")) | |
################################################################# | |
## JAGS MODEL! | |
################################################################# | |
model1 <- function() { | |
## measurement model | |
for(i in 1:nPolls){ | |
mu[i] <- house[org[i]] + alpha[date[i]] # + senfx * isSenate[i] | |
y[i] ~ dnorm(mu[i],prec[i]) # %_%T(0,1) | |
} | |
## transition model (aka random walk prior) | |
for(i in 2:nPeriods){ | |
mu.alpha[i] <- alpha[i-1] | |
alpha[i] ~ dnorm(mu.alpha[i],tau) # %_%T(0,1) | |
} | |
## priors | |
tau <- 1/pow(sigma,2) ## deterministic transform to precision | |
sigma ~ dunif(0,.01) ## uniform prior on standard deviation | |
alpha[1] ~ dunif(0,.5) ## initialization of daily track | |
#senfx ~ dnorm(0,0.01) | |
for(i in 1:nOrgs){ ## normal priors for house effects | |
house.star[i] ~ dnorm(0,1000) | |
house[i] <- house.star[i] - mean(house.star) | |
} | |
} | |
write.model(model1,con="model1.bug") | |
### Do loop around parties | |
the.parties <- names(dat)[pct.vars] | |
the.parties <- setdiff(the.parties,"Lista.Monti.Senato") | |
res <- foreach(i = the.parties) %dopar% { | |
good <- !is.na(dat[,i]) & (dat$Area == "Italy") & (dat$Ramo=="Camera") | |
if (i == "Altri.CDX") { | |
good <- !is.na(dat[,i]) & | |
!is.na(dat[,"Fratelli.d.Italia"]) & | |
!is.na(dat[,"Destra"]) & | |
(dat$Area == "Italy") | |
} | |
y <- dat[good,i] | |
var <- (y)*(1-(y))/(dat$nRespondents[good]) | |
prec <- 1/var | |
date <- dat$avg.date[good] | |
org=as.integer(dat$Company[good]) | |
isSenate = (dat$Ramo == "Senato") | |
nOrgs = length(levels(dat$Company)) | |
nPolls=length(y[good]) | |
nPeriods=length(min(dat$avg.date):max(dat$avg.date)) | |
alpha=rep(NA,nPeriods) | |
forJags <- list(y=y,prec=prec,date=date,org=org,nOrgs=nOrgs,nPolls=nPolls, | |
isSenate = isSenate, | |
nPeriods = nPeriods,alpha=alpha) | |
initfunc <- function(party){ | |
house.star <- rnorm(n=forJags$nOrgs,sd=.075) | |
nPeriods = length(min(dat$avg.date):max(dat$avg.date)) | |
sigma <- runif(n=1,0,.01) | |
alpha <- rep(NA,nPeriods) | |
if (i == "PD") { | |
alpha[1] <- .28 | |
} else if (i == "PDL") { | |
alpha[1] <- .17 | |
} else if (i == "M5S") { | |
alpha[1] <- .15 | |
} else if (i == "Lega") { | |
alpha[1] <- .05 | |
} else if (i == "Destra") { | |
alpha[1] <- .015 | |
} else if (i == "Fratelli.d.Italia") { | |
alpha[1] <- .015 | |
} else if (i == "Altri.CDX") { | |
alpha[1] <- .015 | |
} else if (i == "UDC") { | |
alpha[1] <- .035 | |
} else if (i == "FLI") { | |
alpha[1] <- .015 | |
} else if (i == "Lista.Monti.Camera") { | |
alpha[1] <- .09 | |
} else if (i == "SEL") { | |
alpha[1] <- .04 | |
} else if (i == "Altri.CSX") { | |
alpha[1] <- .015 | |
} else if (i == "Rivoluzione.Civile") { | |
alpha[1] <- .04 | |
} else if (i == "FermareDeclino") { | |
alpha[1] <- .015 | |
} | |
list(house.star=house.star,alpha=alpha, | |
sigma=sigma) | |
} | |
my.model <- jags.model("model1.bug", | |
data=forJags,inits=initfunc(i),n.chains=1, | |
n.adapt=100) | |
out <- coda.samples(my.model,n.burnin=n.burn, | |
variable.names=c("alpha","house","sigma"),#,"senfx"), | |
n.iter=n.iter,thin=n.thin) | |
filename = paste("jags_output/jags_output_",i,"_",the.date,".RData",sep="") | |
save(out,forJags,file=filename) | |
} | |
################################################################# | |
## END OF NATIONAL ANALYSIS | |
################################################################# | |
model2 <- function() { | |
for(j in 1:nOrgs){ ## vague normal priors for house effects | |
house[j] ~ dnorm(true.house[j],true.prec.house[j]) | |
house.cut[j] <- cut(house[j]) | |
} | |
for(j in 1:nDailies) { | |
alpha[j] ~ dnorm(true.alpha[j],true.prec.alpha[j]) | |
alpha.cut[j] <- cut(alpha[j]) | |
} | |
for (i in 1:nregPolls) { | |
mu[i] <- house.cut[org[i]] + (alpha.cut[date[i]] * scale.factor[region[i]]) | |
y[i] ~ dnorm(mu[i],prec[i]) | |
} | |
for (i in 1:nRegions) { | |
scale.factor[i] ~ dnorm(mm[i],pm[i]) | |
} | |
} | |
write.model(model2,con="model2.bug") | |
the.regions <- c("ABRUZZO","BASILICATA","CALABRIA","CAMPANIA", | |
"EMILIA ROMAGNA","FRIULI VENEZIA GIULIA","LAZIO","LIGURIA", | |
"LOMBARDIA","MARCHE","MOLISE","PIEMONTE","PUGLIA","SARDEGNA", | |
"SICILIA","TOSCANA","UMBRIA","VENETO") | |
### Get regional priors, precisions | |
gdoc <- getURL("https://docs.google.com/spreadsheet/pub?hl=en&hl=en&key=0AlLkeTCb2GrxdDY3MDlDRzQ0WDVhT2xPRVpXYzAzclE&single=true&gid=0&output=csv") | |
mean.mat <- read.csv(textConnection(gdoc),header=T,check.names=T) | |
gdoc <- getURL("https://docs.google.com/spreadsheet/pub?hl=en&hl=en&key=0AlLkeTCb2GrxdF9lb2daMURVTlNsOWNFbUxsWktXOUE&single=true&gid=0&output=csv") | |
prec.mat <- read.csv(textConnection(gdoc),header=T,check.names=T) | |
### List files | |
### Exclude UDC, FLI | |
the.files <- list.files(path="jags_output/", | |
pattern = paste("jags_output.*",the.date,".RData",sep="")) | |
## Exclude regional files | |
the.files <- the.files[grep("region",the.files,invert=TRUE)] | |
## Exclude UDC, FLI | |
the.files <- the.files[grep("(UDC|FLI)",the.files,invert=TRUE)] | |
for (my.file in the.files) { | |
the.party <- gsub("jags_output_","",my.file) | |
the.party <- gsub(paste("_",the.date,".RData",sep=""),"",the.party) | |
if (the.party == "Lista.Monti.Camera") { | |
the.party <- "Lista.Monti.Senato" | |
} | |
## Load the file | |
load(paste("jags_output/",my.file,sep="")) | |
holder <- summary(out) | |
## Identify parts of the file | |
dailies <- grep("alpha",rownames(holder$statistics)) | |
house.fx <- grep("house",rownames(holder$statistics)) | |
## ############################################################## | |
## Now get regional stuff | |
## ############################################################## | |
reg <- subset(dat,Area!="Italy") | |
reg$Region <- factor(toupper(reg$Area), | |
levels = the.regions, | |
ordered=TRUE) | |
reg <- reg[!is.na(reg$Region),] | |
reggood <- (!is.na(reg[,the.party])) | |
y <- reg[reggood,the.party] | |
var <- (y)*(1-(y))/(reg$nRespondents[reggood]) | |
prec <- 1/ var | |
prec [!is.finite(prec)] <- max(prec[is.finite(prec)]) | |
forJags <- list(y=y, | |
true.house = holder$statistics[house.fx,1], | |
true.prec.house = 1 / (holder$statistics[house.fx,2] ^ 2), | |
true.alpha = holder$statistics[dailies,1], | |
true.prec.alpha = 1 / (holder$statistics[dailies,2] ^ 2), | |
org=as.integer(reg$Company[reggood]), | |
nOrgs = length(company.levels), | |
nDailies = length(dailies), | |
region = as.integer(reg$Region[reggood]), | |
nRegions = length(the.regions), | |
date = reg$avg.date[reggood], | |
mm = mean.mat[,the.party], | |
pm = prec.mat[,the.party], | |
prec=prec, | |
nregPolls=length(y[reggood])) | |
#initfunc <- function(){ | |
# house.star <- rnorm(n=forJags$nOrgs,sd=.075) | |
# nPeriods = length(min(dat$Julian):max(dat$Julian)) | |
# sigma <- runif(n=1,0,.01) | |
# list(house.star=house.star, | |
# sigma=sigma) | |
#} | |
initfunc <- function(){ | |
scale.factor <- rnorm(length(forJags$mm), | |
mean = forJags$mm, | |
sd = (1/forJags$pm)) | |
list(scale.factor=scale.factor) | |
} | |
my.bugs.directory<-"/home/chris/.wine/drive_c/Program Files/WinBUGS14" | |
bugs.obj <- bugs(data=forJags, inits=initfunc, | |
model.file="model2.bug", | |
parameters.to.save=c("scale.factor"), | |
n.chains=3, | |
n.iter=1e5+1e4, | |
n.burnin=1e4, | |
n.thin=1e5/1e3, | |
codaPkg = TRUE, | |
bugs.directory=my.bugs.directory, | |
working.directory=".", | |
debug=F) | |
out <- read.bugs(bugs.obj) | |
filename = paste("jags_output/jags_output_",the.party,"_regions_",the.date,".RData",sep="") | |
save.image(file=filename) | |
} | |
#stop("End of evaluation: continue manually from here") | |
### Now get house effects | |
the.files <- list.files(path="jags_output/",pattern="jags_output_") | |
the.files <- the.files[grepl(the.date,the.files)] | |
## Exclude regional files | |
the.files <- the.files[grep("region",the.files,invert=TRUE)] | |
master <- vector("list",length(the.files)) | |
setwd("jags_output") | |
for (i in 1:length(the.files)) { | |
the.party <- gsub("jags_output_","",the.files[i]) | |
the.party <- gsub(".RData","",the.party) | |
the.party <- gsub(paste("_",the.date,sep=""),"",the.party) | |
load(the.files[i]) | |
holder <- summary(out) | |
my.house <- grep("house",rownames(holder$statistics)) | |
master[[i]] <- data.frame(Party=the.party, | |
Mean = holder$statistics[my.house,1], | |
Lo = holder$quantiles[my.house,1], | |
Lwr = holder$quantiles[my.house,2], | |
Upr = holder$quantiles[my.house,4], | |
Hi = holder$quantiles[my.house,5]) | |
} | |
setwd("..") | |
master <- do.call("rbind",master) | |
### Now plot trends over time | |
### | |
the.files <- list.files(path="jags_output/",pattern="jags_output_") | |
the.files <- the.files[grepl(the.date,the.files)] | |
## Exclude regional files | |
the.files <- the.files[grep("region",the.files,invert=TRUE)] | |
master <- vector("list",length(the.files)) | |
setwd("jags_output") | |
for (i in 1:length(the.files)) { | |
the.party <- gsub("jags_output_","",the.files[i]) | |
the.party <- gsub(".RData","",the.party) | |
the.party <- gsub(paste("_",the.date,sep=""),"",the.party) | |
load(the.files[i]) | |
holder <- summary(out) | |
my.alpha <- grep("alpha",rownames(holder$statistics)) | |
master[[i]] <- data.frame(Party=the.party, | |
Mean = holder$statistics[my.alpha,1], | |
Lo = holder$quantiles[my.alpha,1], | |
Lwr = holder$quantiles[my.alpha,2], | |
Upr = holder$quantiles[my.alpha,4], | |
Hi = holder$quantiles[my.alpha,5]) | |
} | |
setwd("..") | |
master <- do.call("rbind",master) | |
master$julian <- rep(1:length(my.alpha),times = length(the.files)) | |
master$Date <- as.Date(master$julian,origin=start.date) | |
master$Party <- factor(master$Party, | |
levels=c("PD","PDL", | |
"M5S","Lista.Monti.Camera", | |
"Lega","SEL","Rivoluzione.Civile", | |
"UDC","FLI", | |
"Destra","Fratelli.d.Italia", | |
"Altri.CDX","Altri.CSX","FermareDeclino")) | |
my.pal <- brewer.pal(11,"Paired") | |
party.colors <- c(my.pal[6],my.pal[2], | |
my.pal[10],my.pal[1], | |
my.pal[4],my.pal[5],my.pal[8], ## riv civ. | |
my.pal[4],my.pal[7], | |
"black","gray", | |
my.pal[6],my.pal[2],my.pal[9]) | |
## Break in to two pieces | |
top.parties <- levels(master$Party)[1:3] | |
middle.parties <- levels(master$Party)[4:7] | |
bottom.parties <- levels(master$Party)[8:length(levels(master$Party))] | |
## Set up limits | |
left.lim<-min(master$Date) | |
right.lim<-max(master$Date)+10 | |
## Plot top | |
top.df <- master[master$Party %in% top.parties,] | |
maxdate <- max(top.df$Date) | |
top.plot <- ggplot(top.df,aes(x=Date,y=Mean,group=Party)) + | |
geom_smooth(aes(ymin=Lwr,ymax=Upr,color=Party,fill=Party),alpha=.5,stat="identity") + | |
geom_smooth(aes(ymin=Lo,ymax=Hi,color=Party,fill=Party),alpha=.3,stat="identity") + | |
geom_line(color="white") + | |
scale_x_date("Date",limits=c(left.lim,right.lim)) + | |
geom_text(data = subset(top.df,Date==max(Date)),aes(x=Date,y=Mean,color=Party,label=signif(Mean*100,2)),hjust=0,size=6) + | |
scale_y_continuous("%",labels=percent) + | |
scale_fill_manual(values = party.colors,drop=T) + | |
scale_color_manual(values = party.colors,drop=T) + | |
theme_bw() + | |
opts(legend.position="bottom",legend.direction="horizontal",title=paste("Polls up to ",maxdate,sep="")) | |
middle.df <- master[master$Party %in% middle.parties,] | |
maxdate <- max(middle.df$Date) | |
middle.plot <- ggplot(middle.df,aes(x=Date,y=Mean,group=Party)) + | |
geom_smooth(aes(ymin=Lwr,ymax=Upr,color=Party,fill=Party),alpha=.5,stat="identity") + | |
geom_smooth(aes(ymin=Lo,ymax=Hi,color=Party,fill=Party),alpha=.3,stat="identity") + | |
geom_line(color="white") + | |
scale_x_date("Date",limits=c(left.lim,right.lim)) + | |
geom_text(data = subset(middle.df,Date==max(Date)),aes(x=Date,y=Mean,color=Party,label=signif(Mean*100,2)),hjust=0,size=6) + | |
scale_y_continuous("%",labels=percent) + | |
scale_fill_manual(values = party.colors,drop=T) + | |
scale_color_manual(values = party.colors,drop=T) + | |
theme_bw() + | |
opts(legend.position="bottom",legend.direction="horizontal",title=paste("Polls up to ",maxdate,sep="")) | |
bottom.df <- master[master$Party %in% bottom.parties,] | |
## bottom.df <- master[master$Party == "Destra",] | |
maxdate <- max(bottom.df$Date) | |
bottom.plot <- ggplot(bottom.df,aes(x=Date,y=Mean,group=Party)) + | |
geom_smooth(aes(ymin=Lwr,ymax=Upr,color=Party,fill=Party),alpha=.5,stat="identity") + | |
geom_smooth(aes(ymin=Lo,ymax=Hi,color=Party,fill=Party),alpha=.3,stat="identity") + | |
geom_line(color="white") + | |
scale_x_date("Date",limits=c(left.lim,right.lim)) + | |
geom_text(data = subset(bottom.df,Date==max(Date)),aes(x=Date,y=Mean,color=Party,label=signif(Mean*100,2)),hjust=0,size=6) + | |
scale_y_continuous("%",labels=percent) + | |
scale_fill_manual(values = party.colors,drop=T) + | |
scale_color_manual(values = party.colors,drop=T) + | |
theme_bw() + | |
opts(legend.position="bottom",legend.direction="horizontal",title=paste("Polls up to ",maxdate,sep="")) | |
### Now plot regional scale factors | |
### | |
the.files <- list.files(pattern = paste("jags_output.*regions_",the.date,".RData",sep=""), | |
path="jags_output/") | |
the.regions <- c("ABRUZZO","BASILICATA","CALABRIA","CAMPANIA", | |
"EMILIA ROMAGNA","FRIULI VENEZIA GIULIA","LAZIO","LIGURIA", | |
"LOMBARDIA","MARCHE","MOLISE","PIEMONTE","PUGLIA","SARDEGNA", | |
"SICILIA","TOSCANA","UMBRIA","VENETO") | |
for (my.file in the.files) { | |
the.party <- gsub("jags_output_","",my.file) | |
the.party <- gsub(paste("_",the.date,".RData",sep=""),"",the.party) | |
the.party <- gsub("_regions","",the.party) | |
## Load the file | |
load(paste("jags_output/",my.file,sep="")) | |
holder <- summary(out) | |
## Plot the regional stuff | |
regional.df <- data.frame(Region = the.regions, | |
Mean = holder$statistics[-1,1], | |
Lo = holder$quantiles[-1,1], | |
Hi = holder$quantiles[-1,5]) | |
regional.df$Region <- factor(regional.df$Region,ordered=T) | |
regional.df$Region <- reorder(regional.df$Region,regional.df$Mean) | |
regional.plot <- ggplot(regional.df,aes(ymin=Lo,ymax=Hi,y=Mean,x=Region)) + | |
geom_pointrange() + | |
scale_x_discrete("") + | |
scale_y_continuous(name = paste(the.party, "regional scale factor")) + | |
coord_flip() + theme_bw() | |
## How does this compare to our priors? | |
mm <- mean.mat [ , the.party] | |
pm <- prec.mat [ , the.party] | |
prior.df <- data.frame(Region = the.regions, | |
Mean = mm, | |
Lo = mm - 1.96 * sqrt(1/pm), | |
Hi = mm + 1.96 * sqrt(1/pm)) | |
prior.df$Region <- factor(prior.df$Region,ordered=T) | |
prior.df$Region <- reorder(prior.df$Region,prior.df$Mean) | |
prior.plot <- ggplot(prior.df,aes(ymin=Lo,ymax=Hi,y=Mean,x=Region)) + | |
geom_pointrange() + | |
scale_x_discrete("") + | |
scale_y_continuous(name = paste(the.party, "regional scale factor")) + | |
coord_flip() + theme_bw() | |
regional.df$Variable <- "Estimated" | |
prior.df$Variable <- "Prior" | |
regional.df <- rbind(regional.df,prior.df) | |
comb.plot <- ggplot(regional.df,aes(ymin=Lo,ymax=Hi,y=Mean,x=Region,color=Variable)) + | |
geom_pointrange(position=position_dodge(width=.5, height=0)) + | |
scale_x_discrete("") + | |
scale_y_continuous(name = paste(the.party, "regional scale factor")) + | |
coord_flip() + theme_bw() | |
pdf(file = paste("images/",the.party,"_regional_scale.pdf",sep="")) | |
print(comb.plot) | |
dev.off() | |
} | |
### Votes to seat allocations | |
### | |
## First, the Camera | |
largest.remainders <- function(x,y) { | |
## x is vote shares | |
## y is number seats | |
if (length(y)>1) { | |
stop("Number of seats should have length 1") | |
} | |
x2 <- x/sum(x,na.rm=T) | |
quota <- 1 / y | |
seats <- floor(x2 / quota) | |
seats.left <- y - sum(seats) | |
if (seats.left > 0) { | |
remainder <- x2 %% quota | |
additional.seats <- order(remainder,decreasing=T)[1:seats.left] | |
seats[additional.seats] <- seats[additional.seats] + 1 | |
} | |
seats | |
} | |
doCameraAlloc <- function(dat){ | |
## Create coalitions | |
dat$Coalition <- car:::recode(as.character(dat$Party), | |
"c('Destra','PDL','Lega','Fratelli.d.Italia','Altri.CDX')='CDX'; | |
c('UDC','FLI','Lista.Monti.Camera') = 'Centre'; | |
c('PD','SEL','Altri.CSX') = 'CSX'") | |
## Check coalition/list thresholds | |
dat$InCoalition <- (dat$Party != dat$Coalition) | |
dat <- ddply(dat,.(Coalition),transform,CoalitionTotal = sum(Value,na.rm=T)) | |
dat <- ddply(dat,.(Coalition),transform,Coalition2Pct = any(Value>2)) | |
dat$makesThreshold.a <- ((dat$InCoalition == 1) & (dat$CoalitionTotal > 10) & (dat$Coalition2Pct == TRUE)) | |
dat$makesThreshold.b <- ((dat$InCoalition == 0) & (dat$CoalitionTotal > 4)) | |
dat$makesThreshold <- (dat$makesThreshold.a | dat$makesThreshold.b) | |
## Remove things which do not make the cut at this stage | |
dat <- subset(dat,makesThreshold) | |
## Check maj. premium, initial allocation | |
coalition.df <- unique(dat[,c("CoalitionTotal","Coalition")]) | |
coalition.df$CoalitionAlloc <- largest.remainders(coalition.df$CoalitionTotal,629) ## 630 less Val d'Aosta | |
dat <- merge(dat,coalition.df,all=T) | |
isOkay <- (max(dat$CoalitionAlloc)>340) | |
if (isOkay) { | |
} else { | |
dat$CoalitionAlloc[which(dat$CoalitionTotal == max(dat$CoalitionTotal))] <- 340 | |
} | |
## Check list within coalition thresholds | |
dat$makesThreshold.c <- (dat$InCoalition ==0) | (dat$Value > 2) | |
dat <- ddply(dat,.(Coalition),function(inner.df) { | |
inner.df$lowValues <-inner.df$Value | |
inner.df$lowValues [which(inner.df$Value > 2)] <- NA | |
inner.df$makesThreshold.c[which(inner.df$lowValues == max(inner.df$lowValues,na.rm=T))] <- TRUE | |
inner.df | |
}) | |
## But impose a ban on Altri.CDX getting anything | |
dat$makesThreshold.c[dat$Party=="Altri.CDX"] <- FALSE | |
## Remove things which do not make the cut at this stage | |
dat <- subset(dat,makesThreshold.c) | |
## Allocate seats | |
dat <- ddply(dat,.(Coalition),function(inner.df){ | |
inner.df$PartyAlloc <- largest.remainders(inner.df$Value, | |
unique(inner.df$CoalitionAlloc)) | |
inner.df | |
}) | |
dat | |
} | |
national.files <- list.files(pattern = paste("jags_output.*",the.date,".RData",sep=""), | |
path="jags_output/") | |
national.files <- national.files[grep("region",national.files,invert=TRUE)] | |
## Remove Monti in the Senate | |
national.files <- national.files[grep("Senato",national.files,invert=TRUE)] | |
nParties <- length(national.files) | |
nSimulations <- 1000 | |
out.mat <- array(data=NA, | |
dim = c(nSimulations,nParties)) | |
dimnames(out.mat)[[1]] <- as.character(1:1000) | |
dimnames(out.mat)[[2]] <- letters[1:nParties] | |
for (national.file in national.files) { | |
the.party <- gsub("jags_output_","",national.file) | |
the.party <- gsub(paste("_",the.date,".RData",sep=""),"",the.party) | |
load(paste("jags_output/",national.file,sep="")) | |
national.out <- out | |
## Get the vote share for this party on the last available date | |
dailies <- grep("alpha",colnames(national.out[[1]])) | |
lastday <- dailies[length(dailies)] | |
national.vote <- as.matrix(national.out[[1]][,lastday]) | |
national.vote [ which(national.vote < 0 )] <- 0 | |
out.mat[,match(national.file,national.files)] <- national.vote | |
dimnames(out.mat)[[2]][match(national.file,national.files)] <- the.party | |
} | |
plot.df <- melt(out.mat) | |
names(plot.df) <- c("Simulation","Party","Value") | |
plot.df$Value <- plot.df$Value * 100 | |
camera.seats <- ddply(plot.df,.(Simulation),doCameraAlloc) | |
write.csv(camera.seats,file=paste("camera_seats_",the.date,".csv",sep="")) | |
### Now get the regional vote shares | |
national.files <- list.files(pattern = paste("jags_output.*",the.date,".RData",sep=""), | |
path="jags_output/") | |
national.files <- national.files[grep("region",national.files,invert=TRUE)] | |
## Remove UDC, FLI | |
national.files <- national.files[grep("UDC|FLI",national.files,invert=TRUE)] | |
regional.files <- list.files(pattern = paste("jags_output.*regions_",the.date,".RData",sep=""), | |
path="jags_output/") | |
nRegions <- length(the.regions) | |
nParties <- length(national.files) | |
nSimulations <- 1000 | |
out.mat <- array(data=NA, | |
dim = c(nRegions, nParties, nSimulations)) | |
dimnames(out.mat)[[1]] <- the.regions | |
dimnames(out.mat)[[2]] <- letters[1:nParties] | |
dimnames(out.mat)[[3]] <- as.character(1:1000) | |
for (national.file in national.files) { | |
the.party <- gsub("jags_output_","",national.file) | |
the.party <- gsub(paste("_",the.date,".RData",sep=""),"",the.party) | |
load(paste("jags_output/",national.file,sep="")) | |
national.out <- out | |
if (the.party == "Lista.Monti.Camera") { | |
the.party <- "Lista.Monti.Senato" | |
} | |
regional.file <- regional.files[grep(paste(the.party,"_",sep=""),regional.files)] | |
load(paste("jags_output/",regional.file,sep="")) | |
regional.out <- out | |
## Get the vote share for this party on the last available date | |
dailies <- grep("alpha",colnames(national.out[[1]])) | |
lastday <- dailies[length(dailies)] | |
## Get the regional scale factors | |
regional.fx <- grep("scale.factor",colnames(regional.out[[1]])) | |
## If you want to know their performance on the last day | |
## summary(national.out[[1]][,lastday]) | |
## If you want to know their regional scale factors | |
## summary(regional.out[[1]][,regional.fx]) | |
rsf <- as.matrix(regional.out[[1]][,regional.fx]) | |
rsf [which(rsf < 0) ] <- 0 | |
national.vote <- as.matrix(national.out[[1]][,lastday]) | |
national.vote [ which(national.vote < 0 )] <- 0 | |
national.vote <- matrix(rep(national.vote,nRegions),nrow=nSimulations,ncol=nRegions) | |
foo <- national.vote * rsf | |
foo [ which(foo < 0) ] <- 0 | |
colnames(foo) <- the.regions | |
out.mat[,match(national.file,national.files),] <- t(foo) | |
dimnames(out.mat)[[2]][match(national.file,national.files)] <- the.party | |
} | |
plot.df <- melt(out.mat[,,]) | |
plot.df$value <- plot.df$value * 100 | |
write.csv(plot.df,file= paste("vote_allocation_",the.date,".csv",sep="")) | |
pdf(file="images/regional_vote_shares.pdf",width=20,height=20) | |
ggplot(plot.df,aes(x=X1,y=value)) + geom_boxplot() + | |
facet_grid(~X2,scale="free") + scale_y_continuous() + | |
opts(axis.text.y = theme_text(angle=90)) + coord_flip() + theme_bw() | |
dev.off() | |
### Now convert to seats | |
seat.df <- data.frame(Region = c("ABRUZZO","BASILICATA","CALABRIA","CAMPANIA", | |
"EMILIA ROMAGNA","FRIULI VENEZIA GIULIA","LAZIO","LIGURIA", | |
"LOMBARDIA","MARCHE","MOLISE","PIEMONTE","PUGLIA","SARDEGNA", | |
"SICILIA","TOSCANA","UMBRIA","VENETO"), | |
Seats = c(7,7,10,29, | |
22,7,28,8, | |
49,8,2,22,20,8, | |
25,18,7,24)) | |
seat.df$MajPrem <- floor(.55 * seat.df$Seats) + 1 | |
seat.df$MajPrem[seat.df$Region=="MOLISE"] <- 1 | |
names(plot.df) <- c("Region","Party","Simulation","value") | |
plot.df$Party <- as.character(plot.df$Party) | |
plot.df <- subset(plot.df,!is.element(Party,c("UDC","FLI"))) | |
plot.df <- merge(plot.df,seat.df) | |
plot.df$Coalition <- car:::recode(as.character(plot.df$Party), | |
"c('Destra','PDL','Lega','Fratelli.d.Italia','Altri.CDX')='CDX'; | |
c('PD','SEL','Altri.CSX') = 'CSX'") | |
plot.df$InCoalition <- as.numeric(plot.df$Party != plot.df$Coalition) | |
system.time(seat.alloc.df <- ddply(plot.df,.(Region,Simulation),function(df){ | |
df <- ddply(df,.(Coalition),transform,CoalitionTotal = sum(value)) | |
df <- ddply(df,.(Coalition),transform,Coalition3Pct = any(value>=3)) | |
df$makesThreshold.b1 <- ((df$InCoalition == 1) & (df$CoalitionTotal > 20) & (df$Coalition3Pct == TRUE)) | |
df$makesThreshold.b2a <- ((df$InCoalition == 0) & (df$value >= 8)) | |
df$makesThreshold.b2b <- ((df$InCoalition == 1) & (df$value >= 8) & (df$makesThreshold.b1 == TRUE)) | |
df$makesThreshold <- ((df$makesThreshold.b1) | (df$makesThreshold.b2a) | (df$makesThreshold.b2b)) | |
df <- subset(df,makesThreshold == TRUE) ## DESTROY BELOW THRESHOLD PARTIES | |
df$RelevantEntity <- ifelse(df$makesThreshold.b1, | |
as.character(df$Coalition), | |
as.character(df$Party)) | |
df$RelevantValue <- ifelse(df$makesThreshold.b1,df$CoalitionTotal,df$value) | |
relevant.df <- unique(df[,c("RelevantEntity","RelevantValue","Seats","MajPrem")]) | |
## Create initial allocation | |
relevant.df$initialAlloc <- NA | |
relevant.df$initialAlloc <- largest.remainders( | |
x = relevant.df$RelevantValue, | |
y = unique(relevant.df$Seats)) | |
## Check whether initial allocation acceptable | |
relevant.df$FinalAlloc <- NA | |
initialAllocSucceeds <- max(relevant.df$initialAlloc) >= unique(relevant.df$MajPrem) | |
if (initialAllocSucceeds) { | |
relevant.df$FinalAlloc <- relevant.df$initialAlloc | |
} else { | |
largest.entity <- which(relevant.df$RelevantValue == max(relevant.df$RelevantValue)) | |
smaller.entities <- which(relevant.df$RelevantValue != max(relevant.df$RelevantValue)) | |
largest.entity <- relevant.df[largest.entity,] | |
smaller.entities <- relevant.df[smaller.entities,] | |
largest.entity$FinalAlloc <- unique(largest.entity$MajPrem) | |
smaller.entities$FinalAlloc <- largest.remainders( | |
x = smaller.entities$RelevantValue, | |
y = unique(smaller.entities$Seats - smaller.entities$MajPrem)) | |
relevant.df <- rbind(largest.entity,smaller.entities) | |
} | |
## Now allocate seats between within-coalition bodies | |
df <- merge(df,relevant.df) | |
df <- ddply(df,.(RelevantEntity),function(df2) { | |
df2$SecondAlloc <- largest.remainders(df2$value,unique(df2$FinalAlloc)) | |
df2 | |
}) | |
}) | |
) | |
write.csv(seat.alloc.df,file=paste("seat_allocation_",the.date,".csv",sep="")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment