Last active
August 4, 2016 09:28
-
-
Save pitakakariki/2791866 to your computer and use it in GitHub Desktop.
Code to generate an interactive SVG graph of NZ general election polling.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### | |
## | |
## --- New Zealand General Election Polling --- | |
## | |
## Plots polling data scraped from Wikipedia. | |
## | |
## - GAM - | |
## | |
## The smoothed curves are calculated using a generalised additive model (GAM). The | |
## smoothing parameter is estimated using cross-validation. This means that the curves | |
## are estimated based on subsets of the data, and the parameter that best predicts | |
## the hold-out data is chosen. Note that the error bounds are based on the assumption | |
## that the smoothing parameter is correct, i.e. uncertainty in the smoothing parameter | |
## estimate are not accounted for. | |
## | |
## A separate smoothed curve is estimated for each combination of polling firm and party | |
## (the election is treated as a poll and given more weight than the opinion polls). The | |
## shape of the curves for each party is constrained to be the same for every polling firm, | |
## although they can be vertically offset from each other. The displayed curve is aligned | |
## so that it passes through the election result, but in the interactive version you can | |
## see the offset curves for each polling outfit. | |
## | |
## - SVG - | |
## | |
## The svg graphics format allows interactivity via Javascript. Note that the | |
## SVGAnnotation library comes from http://www.bioconductor.org/ rather that | |
## the CRAN repositories. This requires Cairo to be available (use | |
## capabilities("cairo") to check this). | |
## | |
### | |
library(stringr) | |
library(plyr) | |
library(XML) | |
library(xts) | |
library(ggplot2) | |
library(lubridate) | |
library(mgcv) | |
library(reshape) | |
library(Hmisc) | |
library(rje) | |
### | |
## | |
## Settings | |
## | |
### | |
# Options | |
opt_begin <- as.Date("2008-01-01") #as.Date("1996-01-01") #as.Date("2002-06-01") | |
opt_end <- today() | |
opt_filename <- "test" | |
opt_svg <- TRUE # set to FALSE if you can't get SVGAnnotation to work | |
opt_png <- !opt_svg | |
opt_filename <- str_c(opt_filename, if(opt_svg) ".svg" else ".png") | |
opt_bc <- TRUE # bias corrections | |
opt_bcplot <- FALSE # plot bias-corrected points | |
opt_bcreport <- TRUE # print bias estimates | |
opt_bccurves <- opt_svg # curves for each polling firm | |
opt_events <- FALSE # event markers | |
opt_thresh <- TRUE # 5% threshold marker | |
opt_ewt <- 1000 # weighting for election data points | |
opt_adapt <- FALSE # Adaptive smoother | |
opt_svgwidth <- 12 # width and height if the output file is svg | |
opt_svgheight <- 7 # these will be multiplied by 72 for png | |
opt_xwidth <- 3/2 * opt_svgwidth # 1 / opt_xwidth of the plot is reserved for final results | |
opt_xheight <- 3/2 * opt_svgheight # 1 / opt_xheight for event markers | |
opt_dashes <- "[\u002D\u2013\u2014\u2212]" | |
opt_spaces <- "[\u0020\u00A0]" | |
opt_plotcurves <- TRUE | |
opt_curvepts <- 500 # number of line segements to make "curves" | |
opt_curvett <- TRUE # tooltips on curves | |
opt_moe <- 0 | |
opt_legend <- TRUE | |
# Wikipedia urls | |
urls <- str_c("http://en.wikipedia.org/wiki/Opinion_polling_for_the_New_Zealand_general_election,_", c(2005, 2008, 2011)) | |
urls <- c(urls, "http://en.wikipedia.org/wiki/Opinion_polling_for_the_next_New_Zealand_general_election") | |
# Party details | |
party_ctl <- read.table( | |
header = TRUE, | |
row.names = "Party", | |
stringsAsFactors = FALSE, | |
comment = "", | |
text = " | |
Party Colour Contrast Include | |
ACT #FFE401 #FFFF80 - | |
Conservative #00AEEF #33CCFF - | |
Destiny #FF0000 #000000 - | |
Green #098137 #B3FFB3 Y | |
Labour #FF0000 #FFBAA8 Y | |
Mana #770808 #FF6E6E - | |
Maori #EF4A42 #FFCC80 - | |
National #00529F #CCDDFF Y | |
'NZ First' #000000 #CCCCCC Y | |
Progressive #9E9E9E #DDCCDD - | |
Internet #662C92 #EE77FF - | |
'Internet/Mana' #662C92 #FF6E6E - | |
'United Future' #501557 #DD99DD - | |
'Labour/Green' #FF0000 #B3FFB3 - | |
") | |
party_ctl[, "Include"] <- (party_ctl[, "Include"] == "Y") | |
rownames(party_ctl)[match("Maori", rownames(party_ctl))] <- "M\u0101ori" | |
if(opt_svg) library(SVGAnnotation, quietly=TRUE) | |
### | |
## | |
## Helper function definitions | |
## | |
### | |
get_data <- function(data_url, quiet=FALSE, i=1) { | |
if(!quiet) message(str_c("Reading data from ", data_url)) | |
h <- htmlParse(data_url, encoding="UTF-8") | |
x <- readHTMLTable(h, encoding="UTF-8", stringsAsFactors=FALSE) | |
x $ toc <- NULL | |
x <- x[[i]] # the poll data usually the first table, not counting "table" of contents | |
names(x) <- trim(names(x)) | |
x[,1] <- trim(x[,1]) | |
# If the second column is NA, then we have some sort of non-poll event | |
r_events <- is.na(x[2]) | |
events <- unlist(str_split(x[r_events, 1], "\n")) | |
events <- str_split(events, opt_dashes, n=2) | |
Date <- sapply(events, `[`, 1) | |
Date <- as.Date(Date, "%d %b %Y") | |
Event <- sapply(events, `[`, 2) | |
Event <- trim(Event) | |
events <- data.frame(Date=Date, Event=Event, stringsAsFactors=FALSE) | |
# Polls are everything else, minus "heading" rows | |
r_polls <- !r_events & !(x$Date == "Date") | |
c_polls <- (names(x) != "") | |
polls <- x[r_polls, c_polls] | |
# Conversions | |
polls $ Poll [str_detect(polls $ Poll, "election")] <- "Election" | |
polls $ Poll <- as.factor(polls $ Poll) | |
polls $ Poll <- reorder(polls $ Poll, polls $ Poll, function(x) if(x[1] == "Election") 1 else 2) | |
polls $ Date <- as.character(polls $ Date) | |
for(col in names(polls)[-c(1,2)]) | |
polls[col] <- suppressWarnings(as.numeric(polls[[col]])) # suppress conversion to NA warnings | |
# Return both data frames | |
attr(polls, "events") <- events | |
null0 <- function(x) if(is.null(x)) 0 else ifelse(is.na(x), 0, x) | |
# Combinations | |
polls[['Labour/Green']] <- rep(0, nrow(polls)) + | |
null0(polls$Labour) + | |
null0(polls$Green) | |
IM <- rep(0, nrow(polls)) + | |
null0(polls$Internet) + | |
null0(polls$Mana) + | |
null0(polls$`Internet Mana`) | |
IM <- ifelse(IM==0, NA, IM) | |
polls[['Internet/Mana']] <- IM | |
return(polls) | |
} | |
trim <- function(x) { | |
x <- str_replace_all(x, "\\[(.*?)\\]", "") # remove any Wikipedia footnotes, e.g. [1] | |
x <- str_trim(x) # trim leading/trailing spaces | |
return(x) | |
} | |
alpha <- function(x, alpha=1) rgb(t(col2rgb(x)), alpha=255*alpha, max=255) | |
date <- function(x) { | |
temp <- lapply(str_split(x, opt_dashes), rev) # Four different kinds of dashes used! | |
# Date that poll coverage ends | |
a <- sapply(temp, `[`, 1) | |
a <- str_replace(a, "c\\.", "") | |
a <- str_replace(a, "Released", "") | |
a <- str_trim(a) | |
date_end <- as.Date(a, "%d %b %Y") | |
# Date that poll coverage starts | |
b <- sapply(temp, `[`, 2) | |
b <- str_trim(b) | |
b <- str_split(b, opt_spaces) # Second space is a non-breaking space. | |
year_start <- as.numeric(sapply(b, `[`, 3)) | |
month_start <- match(sapply(b, `[`, 2), month.name) | |
day_start <- as.numeric(sapply(b, `[`, 1)) | |
date_start <- date_end | |
s <- !is.na(year_start); if(any(s)) date_start[s] <- update(date_start[s], years=year_start[s]) | |
s <- !is.na(month_start); if(any(s)) date_start[s] <- update(date_start[s], months=month_start[s]) | |
s <- !is.na(day_start); if(any(s)) date_start[s] <- update(date_start[s], days=day_start[s]) | |
# Polls with only month and year | |
s <- is.na(date_start) | |
if(any(s)) { | |
date_start[s] <- as.Date(as.yearmon(a[s])) | |
date_end[s] <- date_start[s] + new_period(month=1) - new_period(day=1) | |
} | |
# Combine and return | |
date_mean <- as.Date(apply(cbind(date_start, date_end), 1, mean)) | |
data.frame(orig=x, date_start, date_end, date_mean) | |
} | |
loop <- function(x, y=x) c(x, rev(y)) | |
format_num <- function(x) formatC(x, digits=1, format="f") | |
format_date <- function(x) format(as.Date(x), "%d %B %Y") | |
format_date_range <- function(x, y) { | |
x_day <- day(x) | |
x_month <- month.name[month(x)] | |
x_year <- year(x) | |
y_day <- day(y) | |
y_month <- month.name[month(y)] | |
y_year <- year(y) | |
en <- "\u2013" | |
ifelse( | |
x_year != y_year, | |
str_c(x_day, x_month, x_year, en, y_day, y_month, y_year, sep=" "), | |
ifelse( | |
x_month != y_month, | |
str_c(x_day, x_month, en, y_day, y_month, y_year, sep=" "), | |
ifelse( | |
x_day != y_day, | |
str_c(x_day, en, y_day, y_month, y_year, sep=" "), | |
str_c(y_day, y_month, y_year, sep=" ") | |
) | |
) | |
) | |
} | |
embiggen <- function(node) { | |
for(i in seq_along(node)) { | |
xmlAttrs(node[[i]]) <- c( | |
`onmouseover` = "embiggen(this)", | |
`onmouseout` = "unembiggen(this)", | |
`onclick` = "click(this)" | |
) | |
} | |
} | |
link <- function(pt_node, sm_node, party, poll) { | |
id <- str_c(party, poll) | |
for(i in seq_along(pt_node)) { | |
xmlAttrs(pt_node[[i]]) <- c( | |
`id` = str_c(id, i), | |
`onmouseover` = sprintf("link('%s', %i)", id, length(pt_node)), | |
`onmouseout` = sprintf("unlink('%s', %i)", id, length(pt_node)), | |
`onclick` = sprintf("clink('%s', %i)", id, length(pt_node)) | |
) | |
} | |
if(!is.null(sm_node)) { | |
xmlAttrs(sm_node) <- c(`id` = id) | |
modifyStyle(sm_node, `stroke-opacity`="0") | |
} | |
} | |
get_party_data <- function(p) { | |
s <- !is.na(poll_matrix[, p]) | |
n <<- sum(s) | |
x <<- as.numeric(x_date[s]) | |
x0 <<- poll_data $ date_start[s] | |
x1 <<- poll_data $ date_end[s] | |
y <<- poll_matrix[s, p] | |
z <<- poll_data $ Poll[s] | |
this_colour <<- party_ctl[p, "Colour"] | |
this_contrast <<- party_ctl[p, "Contrast"] | |
xs <<- seq(min(x), max(x), length=opt_curvepts) | |
new_X <<- data.frame(x=xs, z="Election") | |
s_elect <<- (z == "Election") | |
invisible() | |
} | |
### | |
## | |
## Collect the data. | |
## | |
### | |
data_list <- lapply(urls, get_data) | |
#load('datalist.rda') | |
#data_list[[5]] <- data.frame(Poll="3 News Reid Research", Date="25 May 2014", Labour=29.5, National=50.3, `NZ First`=5.6, Green=10.2, check.names=FALSE, stringsAsFactors=FALSE) | |
#attr(data_list[[5]], "events") <- data.frame(Date=as.Date(Sys.Date()), Event="Today") | |
poll_data <- Reduce(function(x, y) merge(x, y, all=TRUE), data_list) | |
event_data_list <- lapply(data_list, attr, "events") | |
event_data <- Reduce(function(x, y) merge(x, y, all=TRUE), event_data_list) | |
event_data <- event_data[!is.na(event_data $ Date), ] | |
poll_data <- cbind(poll_data, date(poll_data $ Date)) # Parse dates | |
poll_data <- poll_data[!is.na(poll_data $ date_mean), ] # Remove any polls where we can't parse a date | |
poll_data <- sort_df(poll_data, 'date_mean') # Sort from earliest to latest poll | |
# Restrict to selected dates | |
s_date <- (poll_data $ date_start >= opt_begin & poll_data $ date_end <= opt_end) | |
poll_data <- poll_data[s_date, ] | |
poll_data <- droplevels(poll_data) # Remove unused polling firms | |
incl_parties <- rownames(party_ctl)[party_ctl $ Include] | |
poll_matrix <- as.matrix(poll_data[, incl_parties]) # Restrict to parties of interest | |
x_date <- poll_data $ date_mean | |
### | |
## | |
## Analysis and plotting. | |
## | |
### | |
if(opt_svg) { | |
svg(opt_filename, width=opt_svgwidth, height=opt_svgheight) | |
} | |
if(opt_png && !opt_svg) { | |
png(opt_filename, width=opt_svgwidth*72, height=opt_svgheight*72) | |
#pdf(opt_filename, width=opt_svgwidth, height=opt_svgheight, bg='white') | |
} | |
svg_counter <- 1 | |
svg_index <- list() | |
xw <- as.numeric(opt_end - opt_begin) / (opt_xwidth - 1) | |
xh <- if(opt_events) max(poll_matrix, na.rm=TRUE) / (opt_xheight - 1) else 0 | |
# This sets up the plot region | |
matplot( | |
x_date, poll_matrix, | |
pch = NA, | |
main = "New Zealand General Election Polling", | |
xlim = c(min(x_date), max(x_date) + xw), xlab = "", xaxt = "n", | |
ylim = c(-xh, max(6, 1.1 * max(poll_matrix, na.rm=TRUE))), ylab = "Party Percentage (%)" | |
) | |
axis.Date(1, x_date) | |
abline(h=0); inc(svg_counter) <- 1 | |
is_election <- (poll_data $ Poll == "Election") | |
abline(v=x_date[is_election]); inc(svg_counter) <- sum(is_election) | |
if(opt_thresh) { | |
abline(h=5, lty=2) | |
inc(svg_counter) <- 1 | |
} | |
# Fit the Generalised Additive Model | |
a_list <- list() | |
b_list <- list() | |
cf_list <- list() | |
final <- character() | |
for(p in incl_parties) { | |
get_party_data(p) | |
##logit - add logit links | |
#y <- y / 100 | |
if(opt_bc) { | |
a <- if(opt_adapt) { | |
gam(y ~ s(x, bs="ad") + z, weights=ifelse(z=="Election", opt_ewt, 1)) | |
#gam(y ~ s(x, bs="ad") + z, weights=ifelse(z=="Election", opt_ewt, 1), family=gaussian(link="logit")) | |
} else { | |
gam(y ~ s(x) + z, weights=ifelse(z=="Election", opt_ewt, 1)) | |
###gamm(y ~ s(x) + z) | |
#gam(y ~ s(x) + z, weights=ifelse(z=="Election", opt_ewt, 1), family=gaussian(link="logit")) | |
} | |
a_list[[p]] <- a | |
###a_list[[p]] <- a$gam | |
cf <- coef(a) | |
###cf <- coef(a $ gam) | |
names(cf)[1] <- "zElection"; cf[1] <- 0 | |
cf_list[[p]] <- cf[str_c("z", levels(poll_data $ Poll)[-1])] # Keep track of bias estimates | |
names(cf_list[[p]]) <- levels(poll_data $ Poll)[-1] | |
} | |
else { | |
a <- if(opt_adapt) { | |
gam(y ~ s(x, bs="ad"), weights=ifelse(z=="Election", opt_ewt, 1)) | |
#gam(y ~ s(x, bs="ad"), weights=ifelse(z=="Election", opt_ewt, 1), family=gaussian(link="logit")) | |
} else { | |
gam(y ~ s(x), weights=ifelse(z=="Election", opt_ewt, 1)) | |
#gam(y ~ s(x), weights=ifelse(z=="Election", opt_ewt, 1), family=gaussian(link="logit")) | |
} | |
a_list[[p]] <- a | |
} | |
} | |
bias <- do.call(cbind, cf_list) | |
colnames(bias)[grep("Maori", colnames(bias))] <- "M\u0101ori" | |
# Plot spline curves | |
if(opt_plotcurves) for(p in incl_parties) { | |
get_party_data(p) | |
a <- a_list[[p]] | |
b <- predict(a, newdata=new_X, se=TRUE) | |
#b$fit <- 100 * expit(b$fit) ##logit | |
b_list[[p]] <- b | |
polygon( | |
loop(xs), | |
loop(b$fit) + 1.96 * loop(b$se.fit, -b$se.fit), | |
col = alpha(this_contrast, 1/2), | |
border = NA | |
) | |
lines(xs, b$fit, lwd=3, col=this_colour) | |
svg_index[[str_c("sm_err:", p)]] <- svg_counter; inc(svg_counter) <- 1 | |
svg_index[[str_c("sm:", p)]] <- svg_counter; inc(svg_counter) <- 1 | |
if(opt_curvett) { | |
points(xs, b$fit, col=alpha(this_colour, 0), bg=alpha(this_contrast, 0.1), pch=21) | |
svg_index[[str_c("smpts:", p)]] <- seq(svg_counter, length=opt_curvepts) | |
inc(svg_counter) <- opt_curvepts | |
} | |
} | |
# Plot data points | |
for(p in incl_parties) { | |
get_party_data(p) | |
if(opt_bc && opt_bcplot) { | |
cf <- cf_list[[p]] | |
y[!s_elect] <- y[!s_elect] - cf[as.character(z[!s_elect])] # Bias correction | |
} | |
n <- sum(!is.na(y)) | |
if(opt_moe > 0) { | |
x_me <- ggplot2:::interleave(x, x, NA) | |
y_me <- ggplot2:::interleave(y-opt_moe, y+opt_moe, NA) | |
lines(x_me, y_me, col=this_colour) | |
inc(svg_counter) <- n | |
} | |
points(x, y, pch=21, cex=1, col=this_colour, bg=this_contrast) | |
svg_index[[str_c("pts:", p)]] <- seq(svg_counter, length=n); inc(svg_counter) <- n | |
} | |
# Uncorrected smooths | |
for(p in incl_parties) { | |
get_party_data(p) | |
b <- b_list[[p]] | |
if(opt_bc && opt_bccurves && !opt_bcplot) { | |
for(i in levels(droplevels(z))[-1]) { | |
lines(xs, b$fit + bias[i, p], lwd=2, col=alpha(this_colour, 1/2)) | |
svg_index[[sprintf("psm:%s:%s", p, i)]] <- svg_counter; inc(svg_counter) <- 1 | |
} | |
} | |
} | |
# Election results | |
for(p in incl_parties) { | |
get_party_data(p) | |
points( | |
x[s_elect], y[s_elect], | |
pch = 21, cex = 2, | |
col = party_ctl[p, "Colour"], | |
bg = party_ctl[p, "Contrast"] | |
) | |
n_elect <- sum(s_elect) | |
svg_index[[str_c("election:", p)]] <- seq(svg_counter, length=n_elect); inc(svg_counter) <- n_elect | |
} | |
# Plot final estimates | |
for(p in incl_parties) { | |
get_party_data(p) | |
b <- b_list[[p]] | |
final_x <- tail(x_date, 1) | |
final_fit <- tail(b $ fit, 1) | |
final_err <- 1.96 * tail(b $ se.fit, 1) | |
final[p] <- sprintf("%s \u00B1 %s%%", format_num(final_fit), format_num(final_err)) | |
final_date <- tail(x_date, 1) | |
right_x <- par("usr")[2] | |
avail_w <- right_x - as.numeric(final_date) | |
test_str <- sprintf("%s \u00B1 %s%%_", format_num(800/9), format_num(80/9)) # "88.8 ± 8.8%_" | |
test_w <- strwidth(test_str, font=4) | |
text( | |
final_x, final_fit, final[p], | |
pos = 4, font = 4, xpd = TRUE, | |
col = this_colour, | |
cex = avail_w / test_w, | |
) | |
} | |
# Events | |
if(opt_events) { | |
s_date <- (event_data $ Date >= opt_begin & event_data $ Date <= opt_end) | |
event_data <- event_data[s_date, ] | |
points(event_data $ Date, rep(-c(0, 2, 4, 1, 3), length=nrow(event_data)) * (xh / 4), pch=21, cex=1.5, bg="#FC7928") | |
svg_index[["events"]] <- seq(svg_counter, length=nrow(event_data)); inc(svg_counter) <- nrow(event_data) | |
} | |
# Legend (SVG is broken for this, do it last) | |
if(opt_legend) { | |
party_col <- sapply(incl_parties, function(p) {get_party_data(p); this_colour}) | |
party_bg <- sapply(incl_parties, function(p) {get_party_data(p); this_contrast}) | |
legend("topleft", legend=incl_parties, pch=21, col=party_col, pt.bg=party_bg) | |
for(p in incl_parties) { | |
svg_index[[str_c("legend:", p)]] <- svg_counter | |
inc(svg_counter) <- 1 | |
} | |
} | |
if(opt_svg || opt_png) dev.off() | |
# Bias estimates | |
if(opt_bc && opt_bcreport) print(round(bias, 1)) | |
### | |
## | |
## Fancy SVG stuff. | |
## | |
### | |
if(opt_svg) { | |
doc <- xmlParse(opt_filename) | |
svg_points <- unlist(getPlotPoints(doc)) | |
addChildren(xmlChildren(doc) $ svg, kids=list(newXMLNode(name="title", "New Zealand General Election Polling")), at=0) | |
addECMAScripts(doc, I(' | |
function embiggen(path) { | |
d = path.getAttributeNS(null, "d").match(/-?\\d+(\\.\\d+)?/ig) | |
x = ( parseFloat(d[0]) + parseFloat(d[4]) ) / 4; | |
y = parseFloat(d[1]) / 2; | |
path.setAttributeNS(null, "transform", "scale(2) translate(-" + x + ", -" + y + ")"); | |
} | |
function unembiggen(path) { | |
clicked = path.getAttributeNS(null, "desc") | |
if(clicked != "clicked") { | |
path.setAttributeNS(null, "transform", "scale(1)"); | |
} | |
} | |
function link(desc, n) { | |
for(i = 1; i <= n; i++) { | |
path = document.getElementById(desc + i); | |
embiggen(path); | |
} | |
path = document.getElementById(desc); | |
bigline(path); | |
} | |
function unlink(desc, n) { | |
for(i = 1; i <= n; i++) { | |
path = document.getElementById(desc + i); | |
unembiggen(path); | |
} | |
path = document.getElementById(desc); | |
smallline(path); | |
} | |
function clink(desc, n) { | |
for(i =1; i <= n; i++) { | |
path = document.getElementById(desc + i); | |
click(path) | |
} | |
path = document.getElementById(desc); | |
click(path) | |
} | |
function click(path) { | |
desc = path.getAttributeNS(null, "desc") | |
if(desc == "clicked") { | |
path.setAttributeNS(null, "desc", ""); | |
} | |
else { | |
path.setAttributeNS(null, "desc", "clicked"); | |
} | |
} | |
function bigline(path) { | |
path.style.setProperty("stroke-opacity", "0.5"); | |
} | |
function smallline(path) { | |
clicked = path.getAttributeNS(null, "desc") | |
if(clicked != "clicked") { | |
path.style.setProperty("stroke-opacity", "0"); | |
} | |
} | |
'), at=1) | |
# Poll results | |
for(p in incl_parties) { | |
get_party_data(p) | |
poll_index <- svg_index[[str_c("pts:", p)]] | |
pt1 <- format_date_range(x0, x1) | |
pt2 <- z | |
pt3 <- sprintf("%s: %s%%", p, y) | |
poll_text <- str_c(pt1, pt2, pt3, sep=", \n") | |
addToolTips(doc, text=poll_text, path=svg_points[poll_index]) | |
# for(i in levels(poll_data $ Poll)[-1]) { | |
for(i in levels(droplevels(z))[-1]) { | |
if(opt_bc && opt_bccurves && !opt_bcplot) { | |
sm_index <- svg_index[[sprintf("psm:%s:%s", p, i)]] | |
link(svg_points[poll_index[z==i]], svg_points[[sm_index]], p, i) | |
} | |
else { | |
link(svg_points[poll_index[z==i]], NULL, p, i) | |
} | |
} | |
elect_index <- svg_index[[str_c("election:", p)]] | |
et1 <- format_date(x[s_elect]) | |
et2 <- "Election" | |
et3 <- sprintf("%s: %s%%", p, y[s_elect]) | |
elect_text <- str_c(et1, et2, et3, sep="\n") | |
addToolTips(doc, text=elect_text, path=svg_points[elect_index]) | |
embiggen(svg_points[elect_index]) | |
if(opt_curvett) { | |
b <- b_list[[p]] | |
smpts_date <- format_date(xs) | |
smpts_pc <- str_c(round(b$fit, 1), '%') | |
smpts_text <- str_c(smpts_date, smpts_pc, sep='\n') | |
smpts_index <- svg_index[[str_c("smpts:", p)]] | |
addToolTips(doc, text=smpts_text, path=svg_points[smpts_index]) | |
} | |
} | |
# Events | |
if(opt_events) { | |
event_index <- svg_index[["events"]] | |
event_text <- with(event_data, str_c(format_date(Date), "\n", Event)) | |
addToolTips(doc, text=event_text, path=svg_points[event_index]) | |
embiggen(svg_points[event_index]) | |
} | |
saveXML(doc, opt_filename) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The smoothed curves are calculated using a generalised additive model (GAM). The smoothing parameter is estimated using cross-validation. This means that the curves are estimated based on subsets of the data, and the parameter that best predicts the hold-out data is chosen. Note that the error bounds are based on the assumption that the smoothing parameter is correct, i.e. uncertainty in the smoothing parameter estimate are not accounted for.
A separate smoothed curve is estimated for each combination of polling firm and party (the election is treated as a poll and given more weight than the opinion polls). The shape of the curves for each party is constrained to be the same for every polling firm, although they can be vertically offset from each other. The displayed curve is aligned so that it passes through the election result, but in the interactive version you can see the offset curves for each polling outfit.