chrishanretty · February 10, 2013 18:21
diff --git a/pooling_spe.R b/pooling_spe.R
 ### Takes as input a Google Docs spreadsheet
 ### Outputs national and regional level estimates of support
 ### over time

 ### Load libraries
 library(R2WinBUGS)
 library(rjags)
 library(RCurl)
 library(reshape)
 library(foreach)
 library(car)
 library(RColorBrewer)
 library(ggplot2)
 library(scales)
 library(doMC)

 registerDoMC(cores=4)

 ### Set up configs.
 start.date <- as.Date("2012-12-10")
 n.iter <- 2e6
 n.burn <- n.iter / 4
 n.thin <- n.iter/1000
 the.date <- Sys.Date()

 ### Get data
 gdoc <- getURL("https://docs.google.com/spreadsheet/pub?key=0AlLkeTCb2GrxdFFVeEY1bS1nU3N4MnF2NnduQVNtOUE&single=true&gid=0&output=csv")

 dat <- read.csv(textConnection(gdoc),na.strings=c("","NA"))

 ### Clean up numeric information
 pct.vars <- 9:23
 dat[,pct.vars] <- apply(dat[,pct.vars],2,function(x){
 	as.numeric(gsub("%","",gsub(",",".",x)))/100
 })

 ### Clean up date information
 date.vars <- 2:5
 dat[,date.vars] <- apply(dat[,date.vars],2,function(x){
 	julian(as.Date(as.character(x),format="%d/%m/%Y"),start.date)
 })


 ### Subset to dates after start date
 dat$avg.date <- floor(rowMeans(dat[,c("DateFrom","DateTo")])) + 1
 dat <- subset(dat,avg.date > 0)

 ### Clean up nRespondents
 dat$nRespondents[is.na(dat$nRespondents)] <- 200

 ### Clean up company information
 dat$Company <- recode(dat$Company,
 	"c('Datamonitor')='DATAMONITOR';
 	c('Demos-Demetra','Demos&Pi')='Demos';
 	c('Ipsos','Ipsos pa','Ipsos srl')='Ipsos srl';
 	c('Istituto Piepoli','Istituto Piepoli Spa')='Istituto Piepoli';
 	c('tecne\\'','tecnè','Tecnè')='Tecnè'")

 ### Convert to factor
 ### Explicit listing of companies is brittle
 ### But necessary for the Bugs scripts

 company.levels <- c("SpinCon",
 	"DATAMONITOR",
 	"SWG spa",
 	"DEMOPOLIS - Istituto Nazionale di Ricerche",
 	"ISPO RICERCHE S.R.L.",
 	"Lorien Consulting",
 	"Tecnè",
 	"Quorum",
 	"EMG Srl",
 	"Demos",
 	"Euromedia Research",
 	"IPR Marketing",
 	"Istituto Piepoli",
 	#"Epoké. Ricerche Sociali Applicate",
 	"Ipsos srl")

 dat$Company <- factor(dat$Company,
 	levels = company.levels,
 	ordered=TRUE)
 dat <- dat[!is.na(dat$Company),]
 ### Save the clean version
 save(dat,file= paste(the.date,"_setup.RData",sep=""))

 #################################################################
 ## JAGS MODEL!
 #################################################################

 model1 <- function() {
 ## measurement model
 for(i in 1:nPolls){
       mu[i] <- house[org[i]] + alpha[date[i]] # + senfx * isSenate[i]
       y[i] ~ dnorm(mu[i],prec[i]) # %_%T(0,1)
 }

 ## transition model (aka random walk prior)
 for(i in 2:nPeriods){
        mu.alpha[i] <- alpha[i-1]
        alpha[i] ~ dnorm(mu.alpha[i],tau) # %_%T(0,1)
 }

 ## priors
 tau <- 1/pow(sigma,2)   ## deterministic transform to precision
 sigma ~ dunif(0,.01)    ## uniform prior on standard deviation

 alpha[1] ~ dunif(0,.5) ## initialization of daily track

 #senfx ~ dnorm(0,0.01)

 for(i in 1:nOrgs){          ## normal priors for house effects
 	house.star[i] ~ dnorm(0,1000)
 	house[i] <- house.star[i] - mean(house.star)
 }
 }
 write.model(model1,con="model1.bug")


 ### Do loop around parties
 the.parties <- names(dat)[pct.vars]
 the.parties <- setdiff(the.parties,"Lista.Monti.Senato")

 res <- foreach(i = the.parties) %dopar% {
 	good <- !is.na(dat[,i]) & (dat$Area == "Italy") & (dat$Ramo=="Camera")
 	if (i == "Altri.CDX") {
 		good <- !is.na(dat[,i]) & 
 			!is.na(dat[,"Fratelli.d.Italia"]) & 
 			!is.na(dat[,"Destra"]) & 
 			(dat$Area == "Italy")

 	}
 	y <- dat[good,i]
 	var <- (y)*(1-(y))/(dat$nRespondents[good])
 	prec <- 1/var
 	date <- dat$avg.date[good]
 	org=as.integer(dat$Company[good])
 	isSenate = (dat$Ramo == "Senato")
 	nOrgs = length(levels(dat$Company))
 	nPolls=length(y[good])
 	nPeriods=length(min(dat$avg.date):max(dat$avg.date))
 	alpha=rep(NA,nPeriods)
 	forJags <- list(y=y,prec=prec,date=date,org=org,nOrgs=nOrgs,nPolls=nPolls,
 		isSenate = isSenate,
 		nPeriods = nPeriods,alpha=alpha)
 	initfunc <- function(party){
 		house.star <- rnorm(n=forJags$nOrgs,sd=.075)
 		nPeriods = length(min(dat$avg.date):max(dat$avg.date))
 		sigma <- runif(n=1,0,.01)
 		alpha <- rep(NA,nPeriods)
 		if (i == "PD") {
 			alpha[1] <- .28
 		} else if (i == "PDL") {
 			alpha[1] <- .17
 		} else if (i == "M5S") {
 			alpha[1] <- .15
 		} else if (i == "Lega") {
 			alpha[1] <- .05
 		} else if (i == "Destra") {
 			alpha[1] <- .015
 		} else if (i == "Fratelli.d.Italia") {
 			alpha[1] <- .015
 		} else if (i == "Altri.CDX") {
 			alpha[1] <- .015
 		} else if (i == "UDC") {
 			alpha[1] <- .035
 		} else if (i == "FLI") {
 			alpha[1] <- .015
 		} else if (i == "Lista.Monti.Camera") {
 			alpha[1] <- .09
 		} else if (i == "SEL") {
 			alpha[1] <- .04
 		} else if (i == "Altri.CSX") {
 			alpha[1] <- .015
 		} else if (i == "Rivoluzione.Civile") {
 			alpha[1] <- .04
 		} else if (i == "FermareDeclino") {				
 			alpha[1] <- .015
 		}
 		list(house.star=house.star,alpha=alpha,
 			sigma=sigma)
 	}
 	my.model <- jags.model("model1.bug",
 		data=forJags,inits=initfunc(i),n.chains=1,
 		n.adapt=100)
 	out <- coda.samples(my.model,n.burnin=n.burn,
 		variable.names=c("alpha","house","sigma"),#,"senfx"),
 		n.iter=n.iter,thin=n.thin)

 	filename = paste("jags_output/jags_output_",i,"_",the.date,".RData",sep="")
 	save(out,forJags,file=filename)

 }

 #################################################################
 ## END OF NATIONAL ANALYSIS
 #################################################################


 model2 <- function() {

 	for(j in 1:nOrgs){          ## vague normal priors for house effects
 		house[j] ~ dnorm(true.house[j],true.prec.house[j])
 		house.cut[j] <- cut(house[j])
 	}
 	for(j in 1:nDailies) {
 		alpha[j] ~ dnorm(true.alpha[j],true.prec.alpha[j])
 		alpha.cut[j] <- cut(alpha[j])
 	}

 	 for (i in 1:nregPolls) {
 		   mu[i] <- house.cut[org[i]] + (alpha.cut[date[i]] * scale.factor[region[i]])
 		   y[i] ~ dnorm(mu[i],prec[i])
 	 }
 	for (i in 1:nRegions) {
 		scale.factor[i] ~ dnorm(mm[i],pm[i])
 	}
 }

 write.model(model2,con="model2.bug")

 the.regions <- c("ABRUZZO","BASILICATA","CALABRIA","CAMPANIA",
 	"EMILIA ROMAGNA","FRIULI VENEZIA GIULIA","LAZIO","LIGURIA",
 	"LOMBARDIA","MARCHE","MOLISE","PIEMONTE","PUGLIA","SARDEGNA",
 	"SICILIA","TOSCANA","UMBRIA","VENETO")

 ### Get regional priors, precisions
 gdoc <- getURL("https://docs.google.com/spreadsheet/pub?hl=en&hl=en&key=0AlLkeTCb2GrxdDY3MDlDRzQ0WDVhT2xPRVpXYzAzclE&single=true&gid=0&output=csv")
 mean.mat <- read.csv(textConnection(gdoc),header=T,check.names=T)

 gdoc <- getURL("https://docs.google.com/spreadsheet/pub?hl=en&hl=en&key=0AlLkeTCb2GrxdF9lb2daMURVTlNsOWNFbUxsWktXOUE&single=true&gid=0&output=csv")
 prec.mat <- read.csv(textConnection(gdoc),header=T,check.names=T)

 ### List files 
 ### Exclude UDC, FLI

 the.files <- list.files(path="jags_output/",
 	pattern = paste("jags_output.*",the.date,".RData",sep=""))
 ## Exclude regional files
 the.files <- the.files[grep("region",the.files,invert=TRUE)]
 ## Exclude UDC, FLI
 the.files <- the.files[grep("(UDC|FLI)",the.files,invert=TRUE)]

 for (my.file in the.files) {
 	the.party <- gsub("jags_output_","",my.file)
 	the.party <- gsub(paste("_",the.date,".RData",sep=""),"",the.party)

 	if (the.party == "Lista.Monti.Camera") {
 		the.party <- "Lista.Monti.Senato"
 	}
 	
 	## Load the file
 	load(paste("jags_output/",my.file,sep=""))

 	holder <- summary(out)

 	## Identify parts of the file
 	dailies <- grep("alpha",rownames(holder$statistics))
 	house.fx <- grep("house",rownames(holder$statistics))

 	## ##############################################################
 	## Now get regional stuff
 	## ##############################################################

 	reg <- subset(dat,Area!="Italy")
 	reg$Region <- factor(toupper(reg$Area),
 		levels = the.regions,
 		ordered=TRUE)
 	reg <- reg[!is.na(reg$Region),]

 	reggood <- (!is.na(reg[,the.party]))
 	y <- reg[reggood,the.party]
 	var <- (y)*(1-(y))/(reg$nRespondents[reggood])
 	prec <- 1/ var
 	prec [!is.finite(prec)] <- max(prec[is.finite(prec)])

 	forJags <- list(y=y,
 		true.house = holder$statistics[house.fx,1],
 		true.prec.house = 1 / (holder$statistics[house.fx,2] ^ 2),
 		true.alpha = holder$statistics[dailies,1],
 		true.prec.alpha = 1 / (holder$statistics[dailies,2] ^ 2),
 		org=as.integer(reg$Company[reggood]),
 		nOrgs = length(company.levels),
 		nDailies = length(dailies),
 		region = as.integer(reg$Region[reggood]),
 		nRegions = length(the.regions),
 		date = reg$avg.date[reggood],
 		mm = mean.mat[,the.party],
 		pm = prec.mat[,the.party],
 		prec=prec,
 		nregPolls=length(y[reggood]))


 	#initfunc <- function(){
 	#	house.star <- rnorm(n=forJags$nOrgs,sd=.075)
 	#	nPeriods = length(min(dat$Julian):max(dat$Julian))
 	#	sigma <- runif(n=1,0,.01)
 	#	list(house.star=house.star,
 	#	sigma=sigma)
 	#}
 	initfunc <- function(){
 		scale.factor <- rnorm(length(forJags$mm),
 			mean = forJags$mm,
 			sd = (1/forJags$pm))
 		list(scale.factor=scale.factor)
 	}
 	my.bugs.directory<-"/home/chris/.wine/drive_c/Program Files/WinBUGS14"

 	bugs.obj <- bugs(data=forJags, inits=initfunc,
 	  model.file="model2.bug", 
 	  parameters.to.save=c("scale.factor"), 
 	  n.chains=3,
 	  n.iter=1e5+1e4, 
 	  n.burnin=1e4, 
 	  n.thin=1e5/1e3, 
 	  codaPkg = TRUE,
 	  bugs.directory=my.bugs.directory, 	
 	  working.directory=".",
 	  debug=F)

 	out <- read.bugs(bugs.obj)
 	filename = paste("jags_output/jags_output_",the.party,"_regions_",the.date,".RData",sep="")
 	save.image(file=filename)

 }

 #stop("End of evaluation: continue manually from here")


 ### Now get house effects

 the.files <- list.files(path="jags_output/",pattern="jags_output_")
 the.files <- the.files[grepl(the.date,the.files)]
 ## Exclude regional files
 the.files <- the.files[grep("region",the.files,invert=TRUE)]

 master <- vector("list",length(the.files))

 setwd("jags_output")
 for (i in 1:length(the.files)) {
 	the.party <- gsub("jags_output_","",the.files[i])
 	the.party <- gsub(".RData","",the.party)
 	the.party <- gsub(paste("_",the.date,sep=""),"",the.party)
 	load(the.files[i])
 	holder <- summary(out)
 	my.house <- grep("house",rownames(holder$statistics))
 	master[[i]] <- data.frame(Party=the.party,
 		Mean = holder$statistics[my.house,1],
 		Lo = holder$quantiles[my.house,1],
 		Lwr = holder$quantiles[my.house,2],
 		Upr = holder$quantiles[my.house,4],
 		Hi = holder$quantiles[my.house,5])
 }
 setwd("..")
 master <- do.call("rbind",master)

 ### Now plot trends over time
 ###

 the.files <- list.files(path="jags_output/",pattern="jags_output_")
 the.files <- the.files[grepl(the.date,the.files)]
 ## Exclude regional files
 the.files <- the.files[grep("region",the.files,invert=TRUE)]

 master <- vector("list",length(the.files))

 setwd("jags_output")
 for (i in 1:length(the.files)) {
 	the.party <- gsub("jags_output_","",the.files[i])
 	the.party <- gsub(".RData","",the.party)
 	the.party <- gsub(paste("_",the.date,sep=""),"",the.party)
 	load(the.files[i])
 	holder <- summary(out)
 	my.alpha <- grep("alpha",rownames(holder$statistics))
 	master[[i]] <- data.frame(Party=the.party,
 		Mean = holder$statistics[my.alpha,1],
 		Lo = holder$quantiles[my.alpha,1],
 		Lwr = holder$quantiles[my.alpha,2],
 		Upr = holder$quantiles[my.alpha,4],
 		Hi = holder$quantiles[my.alpha,5])
 }
 setwd("..")
 master <- do.call("rbind",master)

 master$julian <- rep(1:length(my.alpha),times = length(the.files))
 master$Date <- as.Date(master$julian,origin=start.date)

 master$Party <- factor(master$Party,
 	levels=c("PD","PDL",
 		"M5S","Lista.Monti.Camera",
 		"Lega","SEL","Rivoluzione.Civile",
 		"UDC","FLI",
 		"Destra","Fratelli.d.Italia",
 		"Altri.CDX","Altri.CSX","FermareDeclino"))

 my.pal <- brewer.pal(11,"Paired")
 party.colors <- c(my.pal[6],my.pal[2],
 	my.pal[10],my.pal[1],
 	my.pal[4],my.pal[5],my.pal[8], ## riv civ.
 	my.pal[4],my.pal[7], 
 	"black","gray",
 	my.pal[6],my.pal[2],my.pal[9])

 ## Break in to two pieces
 top.parties <- levels(master$Party)[1:3]
 middle.parties <- levels(master$Party)[4:7]
 bottom.parties <- levels(master$Party)[8:length(levels(master$Party))]

 ## Set up limits
 left.lim<-min(master$Date)
 right.lim<-max(master$Date)+10

 ## Plot top
 top.df <- master[master$Party %in% top.parties,]
 maxdate <- max(top.df$Date)
 top.plot <- ggplot(top.df,aes(x=Date,y=Mean,group=Party)) + 
 	geom_smooth(aes(ymin=Lwr,ymax=Upr,color=Party,fill=Party),alpha=.5,stat="identity") +
 	geom_smooth(aes(ymin=Lo,ymax=Hi,color=Party,fill=Party),alpha=.3,stat="identity") +  
 	geom_line(color="white") + 
 	scale_x_date("Date",limits=c(left.lim,right.lim)) + 
 	geom_text(data = subset(top.df,Date==max(Date)),aes(x=Date,y=Mean,color=Party,label=signif(Mean*100,2)),hjust=0,size=6) + 
 	scale_y_continuous("%",labels=percent) + 
 	scale_fill_manual(values = party.colors,drop=T) +
 	scale_color_manual(values = party.colors,drop=T) +  
 	theme_bw() + 
 	opts(legend.position="bottom",legend.direction="horizontal",title=paste("Polls up to ",maxdate,sep=""))

 middle.df <- master[master$Party %in% middle.parties,]
 maxdate <- max(middle.df$Date)
 middle.plot <- ggplot(middle.df,aes(x=Date,y=Mean,group=Party)) + 
 	geom_smooth(aes(ymin=Lwr,ymax=Upr,color=Party,fill=Party),alpha=.5,stat="identity") +
 	geom_smooth(aes(ymin=Lo,ymax=Hi,color=Party,fill=Party),alpha=.3,stat="identity") +  
 	geom_line(color="white") + 
 	scale_x_date("Date",limits=c(left.lim,right.lim)) + 
 	geom_text(data = subset(middle.df,Date==max(Date)),aes(x=Date,y=Mean,color=Party,label=signif(Mean*100,2)),hjust=0,size=6) + 
 	scale_y_continuous("%",labels=percent) + 
 	scale_fill_manual(values = party.colors,drop=T) +
 	scale_color_manual(values = party.colors,drop=T) +  
 	theme_bw() + 
 	opts(legend.position="bottom",legend.direction="horizontal",title=paste("Polls up to ",maxdate,sep=""))


 bottom.df <- master[master$Party %in% bottom.parties,]
 ## bottom.df <- master[master$Party == "Destra",]
 maxdate <- max(bottom.df$Date)
 bottom.plot <- ggplot(bottom.df,aes(x=Date,y=Mean,group=Party)) + 
 	geom_smooth(aes(ymin=Lwr,ymax=Upr,color=Party,fill=Party),alpha=.5,stat="identity") +
 	geom_smooth(aes(ymin=Lo,ymax=Hi,color=Party,fill=Party),alpha=.3,stat="identity") +  
 	geom_line(color="white") + 
 	scale_x_date("Date",limits=c(left.lim,right.lim)) + 
 	geom_text(data = subset(bottom.df,Date==max(Date)),aes(x=Date,y=Mean,color=Party,label=signif(Mean*100,2)),hjust=0,size=6) + 
 	scale_y_continuous("%",labels=percent) + 
 	scale_fill_manual(values = party.colors,drop=T) +
 	scale_color_manual(values = party.colors,drop=T) +  
 	theme_bw() + 
 	opts(legend.position="bottom",legend.direction="horizontal",title=paste("Polls up to ",maxdate,sep=""))


 ### Now plot regional scale factors
 ###

 the.files <- list.files(pattern = paste("jags_output.*regions_",the.date,".RData",sep=""),
 	path="jags_output/")
 the.regions <- c("ABRUZZO","BASILICATA","CALABRIA","CAMPANIA",
 	"EMILIA ROMAGNA","FRIULI VENEZIA GIULIA","LAZIO","LIGURIA",
 	"LOMBARDIA","MARCHE","MOLISE","PIEMONTE","PUGLIA","SARDEGNA",
 	"SICILIA","TOSCANA","UMBRIA","VENETO")

 for (my.file in the.files) {
 	the.party <- gsub("jags_output_","",my.file)
 	the.party <- gsub(paste("_",the.date,".RData",sep=""),"",the.party)
 	the.party <- gsub("_regions","",the.party)
 	## Load the file
 	load(paste("jags_output/",my.file,sep=""))

 	holder <- summary(out)

 	## Plot the regional stuff
 	regional.df <- data.frame(Region = the.regions,
 		Mean = holder$statistics[-1,1],
 		Lo = holder$quantiles[-1,1],
 		Hi = holder$quantiles[-1,5])
 	
 	regional.df$Region <- factor(regional.df$Region,ordered=T)
 	regional.df$Region <- reorder(regional.df$Region,regional.df$Mean)

 	regional.plot <- ggplot(regional.df,aes(ymin=Lo,ymax=Hi,y=Mean,x=Region)) + 
 		geom_pointrange() + 
 		scale_x_discrete("") + 
 		scale_y_continuous(name = paste(the.party, "regional scale factor")) + 
 		coord_flip() + theme_bw()

 	## How does this compare to our priors?

 	mm <- mean.mat [ , the.party]
 	pm <- prec.mat [ , the.party]

 	prior.df <- data.frame(Region = the.regions,
 		Mean = mm,
 		Lo = mm - 1.96 * sqrt(1/pm),
 		Hi = mm + 1.96 * sqrt(1/pm))

 	prior.df$Region <- factor(prior.df$Region,ordered=T)
 	prior.df$Region <- reorder(prior.df$Region,prior.df$Mean)

 	prior.plot <- ggplot(prior.df,aes(ymin=Lo,ymax=Hi,y=Mean,x=Region)) + 
 		geom_pointrange() + 
 		scale_x_discrete("") + 
 		scale_y_continuous(name = paste(the.party, "regional scale factor")) + 
 		coord_flip() + theme_bw()

 	regional.df$Variable <- "Estimated"
 	prior.df$Variable <- "Prior"
 	regional.df <- rbind(regional.df,prior.df)

 	comb.plot <- ggplot(regional.df,aes(ymin=Lo,ymax=Hi,y=Mean,x=Region,color=Variable)) + 
 		geom_pointrange(position=position_dodge(width=.5, height=0)) + 
 		scale_x_discrete("") + 
 		scale_y_continuous(name = paste(the.party, "regional scale factor")) + 
 		coord_flip() + theme_bw()

 	pdf(file = paste("images/",the.party,"_regional_scale.pdf",sep=""))
 	print(comb.plot)
 	dev.off()

 }

 ### Votes to seat allocations
 ###

 ## First, the Camera

 largest.remainders <- function(x,y) {
 	## x is vote shares
 	## y is number seats
 	if (length(y)>1) {
 		stop("Number of seats should have length 1")
 	}
 	x2 <- x/sum(x,na.rm=T)
 	quota <- 1 / y
 	seats <- floor(x2 / quota)
 	seats.left <- y - sum(seats)

 	if (seats.left > 0) {
 		remainder <- x2 %% quota
 		additional.seats <- order(remainder,decreasing=T)[1:seats.left]
 		seats[additional.seats] <- seats[additional.seats] + 1
 	}
 	seats
 }

 doCameraAlloc <- function(dat){
 	## Create coalitions
 	dat$Coalition <- car:::recode(as.character(dat$Party),
 		"c('Destra','PDL','Lega','Fratelli.d.Italia','Altri.CDX')='CDX';
 		c('UDC','FLI','Lista.Monti.Camera') = 'Centre';
 		c('PD','SEL','Altri.CSX') = 'CSX'")

 	## Check coalition/list thresholds
 	dat$InCoalition <- (dat$Party != dat$Coalition)

 	dat <- ddply(dat,.(Coalition),transform,CoalitionTotal = sum(Value,na.rm=T))
 	dat <- ddply(dat,.(Coalition),transform,Coalition2Pct = any(Value>2))

 	dat$makesThreshold.a <- ((dat$InCoalition == 1) & (dat$CoalitionTotal > 10) & (dat$Coalition2Pct == TRUE))
 	dat$makesThreshold.b <- ((dat$InCoalition == 0) & (dat$CoalitionTotal > 4))

 	dat$makesThreshold <- (dat$makesThreshold.a | dat$makesThreshold.b)

 	## Remove things which do not make the cut at this stage 
 	dat <- subset(dat,makesThreshold)

 	## Check maj. premium, initial allocation
 	coalition.df <- unique(dat[,c("CoalitionTotal","Coalition")])
 	coalition.df$CoalitionAlloc <- largest.remainders(coalition.df$CoalitionTotal,629) ## 630 less Val d'Aosta
 	dat <- merge(dat,coalition.df,all=T)
 	isOkay <- (max(dat$CoalitionAlloc)>340)
 	if (isOkay) {
 	} else {
 		dat$CoalitionAlloc[which(dat$CoalitionTotal == max(dat$CoalitionTotal))] <- 340
 	}

 	## Check list within coalition thresholds
 	dat$makesThreshold.c <- (dat$InCoalition ==0) | (dat$Value > 2)
 	dat <- ddply(dat,.(Coalition),function(inner.df) {
 		inner.df$lowValues <-inner.df$Value
 		inner.df$lowValues [which(inner.df$Value > 2)] <- NA
 		inner.df$makesThreshold.c[which(inner.df$lowValues == max(inner.df$lowValues,na.rm=T))] <- TRUE
 		inner.df
 	})

 	## But impose a ban on Altri.CDX getting anything
 	dat$makesThreshold.c[dat$Party=="Altri.CDX"] <- FALSE

 	## Remove things which do not make the cut at this stage 
 	dat <- subset(dat,makesThreshold.c)

 	## Allocate seats

 	dat <- ddply(dat,.(Coalition),function(inner.df){
 		inner.df$PartyAlloc <- largest.remainders(inner.df$Value,
 			unique(inner.df$CoalitionAlloc))
 		inner.df
 	})
 	dat
 }

 national.files <- list.files(pattern = paste("jags_output.*",the.date,".RData",sep=""),
 	path="jags_output/")
 national.files <- national.files[grep("region",national.files,invert=TRUE)]
 ## Remove Monti in the Senate
 national.files <- national.files[grep("Senato",national.files,invert=TRUE)]

 nParties <- length(national.files)
 nSimulations <- 1000

 out.mat <- array(data=NA,
 	dim = c(nSimulations,nParties))

 dimnames(out.mat)[[1]] <- as.character(1:1000)
 dimnames(out.mat)[[2]] <- letters[1:nParties]

 for (national.file in national.files) {
 	the.party <- gsub("jags_output_","",national.file)
 	the.party <- gsub(paste("_",the.date,".RData",sep=""),"",the.party)
 	load(paste("jags_output/",national.file,sep=""))
 	national.out <- out

 	## Get the vote share for this party on the last available date
 	dailies <- grep("alpha",colnames(national.out[[1]]))
 	lastday <- dailies[length(dailies)]

 	national.vote <- as.matrix(national.out[[1]][,lastday])
 	national.vote [ which(national.vote < 0 )] <- 0
 	out.mat[,match(national.file,national.files)] <- national.vote
 	dimnames(out.mat)[[2]][match(national.file,national.files)] <- the.party
 }

 plot.df <- melt(out.mat)
 names(plot.df) <- c("Simulation","Party","Value")
 plot.df$Value <- plot.df$Value * 100
 camera.seats <- ddply(plot.df,.(Simulation),doCameraAlloc)

 write.csv(camera.seats,file=paste("camera_seats_",the.date,".csv",sep=""))



 ### Now get the regional vote shares
 national.files <- list.files(pattern = paste("jags_output.*",the.date,".RData",sep=""),
 	path="jags_output/")
 national.files <- national.files[grep("region",national.files,invert=TRUE)]
 ## Remove UDC, FLI
 national.files <- national.files[grep("UDC|FLI",national.files,invert=TRUE)]
 regional.files <- list.files(pattern = paste("jags_output.*regions_",the.date,".RData",sep=""),
 	path="jags_output/")

 nRegions <- length(the.regions)
 nParties <- length(national.files)
 nSimulations <- 1000

 out.mat <- array(data=NA,
 	dim = c(nRegions, nParties, nSimulations))

 dimnames(out.mat)[[1]] <- the.regions
 dimnames(out.mat)[[2]] <- letters[1:nParties]
 dimnames(out.mat)[[3]] <- as.character(1:1000)

 for (national.file in national.files) {
 	the.party <- gsub("jags_output_","",national.file)
 	the.party <- gsub(paste("_",the.date,".RData",sep=""),"",the.party)
 	load(paste("jags_output/",national.file,sep=""))
 	national.out <- out
 	if (the.party == "Lista.Monti.Camera") {
 		the.party <- "Lista.Monti.Senato"
 	}
 	regional.file <- regional.files[grep(paste(the.party,"_",sep=""),regional.files)]
 	load(paste("jags_output/",regional.file,sep=""))
 	regional.out <- out

 	## Get the vote share for this party on the last available date
 	dailies <- grep("alpha",colnames(national.out[[1]]))
 	lastday <- dailies[length(dailies)]

 	## Get the regional scale factors
 	regional.fx <- grep("scale.factor",colnames(regional.out[[1]]))
 	## If you want to know their performance on the last day
 	## summary(national.out[[1]][,lastday])
 	## If you want to know their regional scale factors
 	## summary(regional.out[[1]][,regional.fx])
 	rsf <- as.matrix(regional.out[[1]][,regional.fx])
 	rsf [which(rsf < 0) ] <- 0
 	national.vote <- as.matrix(national.out[[1]][,lastday])
 	national.vote [ which(national.vote < 0 )] <- 0
 	national.vote <- matrix(rep(national.vote,nRegions),nrow=nSimulations,ncol=nRegions)
 	foo <- national.vote * rsf
 	foo [ which(foo < 0) ] <- 0
 	colnames(foo) <- the.regions
 	out.mat[,match(national.file,national.files),] <- t(foo)
 	dimnames(out.mat)[[2]][match(national.file,national.files)] <- the.party
 }

 plot.df <- melt(out.mat[,,])
 plot.df$value <- plot.df$value * 100
 write.csv(plot.df,file= paste("vote_allocation_",the.date,".csv",sep=""))

 pdf(file="images/regional_vote_shares.pdf",width=20,height=20)
 ggplot(plot.df,aes(x=X1,y=value)) + geom_boxplot() + 
 	facet_grid(~X2,scale="free") + scale_y_continuous() +
 	opts(axis.text.y = theme_text(angle=90)) + coord_flip() + theme_bw()
 
 dev.off()

 ### Now convert to seats
 seat.df <- data.frame(Region = c("ABRUZZO","BASILICATA","CALABRIA","CAMPANIA",
 	"EMILIA ROMAGNA","FRIULI VENEZIA GIULIA","LAZIO","LIGURIA",
 	"LOMBARDIA","MARCHE","MOLISE","PIEMONTE","PUGLIA","SARDEGNA",
 	"SICILIA","TOSCANA","UMBRIA","VENETO"),
 	Seats = c(7,7,10,29,
 	22,7,28,8,
 	49,8,2,22,20,8,
 	25,18,7,24))

 seat.df$MajPrem <- floor(.55 * seat.df$Seats) + 1
 seat.df$MajPrem[seat.df$Region=="MOLISE"] <- 1
 names(plot.df) <- c("Region","Party","Simulation","value")
 plot.df$Party <- as.character(plot.df$Party)

 plot.df <- subset(plot.df,!is.element(Party,c("UDC","FLI")))
 plot.df <- merge(plot.df,seat.df)
 plot.df$Coalition <- car:::recode(as.character(plot.df$Party),
 	"c('Destra','PDL','Lega','Fratelli.d.Italia','Altri.CDX')='CDX';
 	c('PD','SEL','Altri.CSX') = 'CSX'")
 plot.df$InCoalition <- as.numeric(plot.df$Party != plot.df$Coalition)

 system.time(seat.alloc.df <- ddply(plot.df,.(Region,Simulation),function(df){

 	df <- ddply(df,.(Coalition),transform,CoalitionTotal = sum(value))
 	df <- ddply(df,.(Coalition),transform,Coalition3Pct = any(value>=3))
 	df$makesThreshold.b1 <- ((df$InCoalition == 1) & (df$CoalitionTotal > 20) & (df$Coalition3Pct == TRUE))

 	df$makesThreshold.b2a <- ((df$InCoalition == 0) & (df$value >= 8))
 	df$makesThreshold.b2b <- ((df$InCoalition == 1) & (df$value >= 8) & (df$makesThreshold.b1 == TRUE))

 	df$makesThreshold <- ((df$makesThreshold.b1) | (df$makesThreshold.b2a) | (df$makesThreshold.b2b))

 	df <- subset(df,makesThreshold == TRUE) ## DESTROY BELOW THRESHOLD PARTIES
 	df$RelevantEntity <- ifelse(df$makesThreshold.b1,
 		as.character(df$Coalition),
 		as.character(df$Party))

 	df$RelevantValue <-  ifelse(df$makesThreshold.b1,df$CoalitionTotal,df$value)

 	relevant.df <- unique(df[,c("RelevantEntity","RelevantValue","Seats","MajPrem")])
 	## Create initial allocation
 	relevant.df$initialAlloc <- NA
 	relevant.df$initialAlloc <- largest.remainders(
 		x = relevant.df$RelevantValue,
 		y = unique(relevant.df$Seats))

 	## Check whether initial allocation acceptable
 	relevant.df$FinalAlloc <- NA
 	initialAllocSucceeds <- max(relevant.df$initialAlloc) >= unique(relevant.df$MajPrem)
 	if (initialAllocSucceeds) {
 		relevant.df$FinalAlloc <- relevant.df$initialAlloc
 	} else {
 		largest.entity <- which(relevant.df$RelevantValue == max(relevant.df$RelevantValue))
 		smaller.entities <- which(relevant.df$RelevantValue != max(relevant.df$RelevantValue))
 		largest.entity <- relevant.df[largest.entity,]
 		smaller.entities <- relevant.df[smaller.entities,]

 		largest.entity$FinalAlloc <- unique(largest.entity$MajPrem)
 		smaller.entities$FinalAlloc <- largest.remainders( 
 			x = smaller.entities$RelevantValue,
 			y = unique(smaller.entities$Seats - smaller.entities$MajPrem))
 		relevant.df <- rbind(largest.entity,smaller.entities)
 	}

 	## Now allocate seats between within-coalition bodies
 	df <- merge(df,relevant.df)
 	df <- ddply(df,.(RelevantEntity),function(df2) {
 		df2$SecondAlloc <- largest.remainders(df2$value,unique(df2$FinalAlloc))
 		df2
 	})
 })

 )
 	
 write.csv(seat.alloc.df,file=paste("seat_allocation_",the.date,".csv",sep=""))