Last active
September 23, 2020 19:11
-
-
Save k5cents/5dabffdb9e3ef45a0e5170170cf28db5 to your computer and use it in GitHub Desktop.
Comparing past and future Presidential election turnout
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(patchwork) | |
library(tidyverse) | |
library(lubridate) | |
library(jsonlite) | |
library(predictr) | |
library(scales) | |
library(rvest) | |
# get vap estimate -------------------------------------------------------- | |
# read from census bureau | |
pop_est <- read_csv(file = "https://bit.ly/3mKlL6I") | |
# find nationwide total | |
pop <- pop_est$POPEST18PLUS2019[pop_est$NAME == "United States"]/1e6 | |
# read past turnout from wikipedia | |
turnout <- read_html("https://w.wiki/dAj") | |
turnout <- turnout %>% | |
html_node(".wikitable") %>% | |
html_table() %>% | |
as_tibble() %>% | |
set_names(c("election", "vap", "turnout", "prop")) %>% | |
filter(vap != "No data") %>% | |
type_convert( | |
na = "", | |
col_types = cols( | |
prop = col_number() | |
) | |
) | |
# plot market prices ------------------------------------------------------ | |
# scrape predictit api | |
prices <- market_price(6882) | |
# shorten contract names | |
con_labs <- prices$contract %>% | |
str_replace("Fewer than ", "< ") %>% | |
str_replace("(160) mil. or more", "> \\1") %>% | |
str_remove_all("[a-z]") %>% | |
str_remove_all("\\.") %>% | |
str_squish() | |
# convert to percentages | |
pop_prop <- function(n, p = 255.2004) { | |
scales::percent(as.numeric(n)/pop, 0.1) | |
} | |
con_labs <- str_replace_all(con_labs, "\\d+", pop_prop) | |
# reassign as ordered factor | |
prices$contract <- factor(prices$contract, labels = con_labs) | |
# plot past turnout ------------------------------------------------------- | |
# first tue after first mon in nov | |
# need date for 26a line | |
election_dates <- as.Date(character()) | |
for (y in seq(1932, 2016, by = 4)) { | |
elec_day <- ymd(paste(y, 11, 1)) | |
week_day <- wday(elec_day) | |
while (week_day != 2) { | |
elec_day <- elec_day + 1 | |
week_day <- wday(elec_day) | |
} | |
election_dates <- election_dates %>% | |
append(elec_day + 1) | |
} | |
# plot turnout history | |
turnout_past <- turnout %>% | |
filter(election >= 1932) %>% | |
mutate( | |
date = election_dates, | |
prop = prop/100 | |
) %>% | |
ggplot(aes(date, prop)) + | |
geom_vline(xintercept = as.Date("1971-07-01"), linetype = 2) + | |
geom_line(size = 1) + | |
geom_point(aes(color = prop), size = 5) + | |
scale_size_continuous(labels = percent, range = c(1, 10), guide = FALSE) + | |
scale_color_viridis_c(guide = FALSE, end = 0.75) + | |
scale_y_continuous(labels = scales::percent) + | |
scale_x_date(date_breaks = "8 years", labels = lubridate::year) + | |
coord_cartesian(ylim = c(0.475, 0.65)) + | |
geom_label( | |
mapping = aes( | |
x = as.Date("1982-01-01"), | |
y = 0.5875, | |
label = "26A lowers voting age" | |
) | |
) + | |
theme( | |
legend.position = "bottom", | |
axis.title.x = element_text(hjust = 1) | |
) + | |
labs( | |
title = "Voter Turnout in Past Presidential Elections", | |
subtitle = "Percentage of voting age population", | |
caption = "Source: US Census Bureau", | |
x = "Election Date", | |
y = "Turnout" | |
) | |
# geom with rectangles ---------------------------------------------------- | |
# calculate rect shape | |
price_rect <- prices %>% | |
mutate( | |
date = as.Date("2020-11-03"), | |
range = str_extract_all(contract, "[0-9]{2}(?:\\.[0-9]+)?"), | |
low = as.double(map_chr(range, `[`, 1)), | |
high = as.double(map_chr(range, `[`, 2)), | |
middle = coalesce(low + (low - high), low, high)/100 | |
) %>% | |
select(contract, low, high, last) | |
# fix high and low end values | |
price_rect$high[1] <- price_rect$low[1] | |
price_rect$low[1] <- price_rect$low[1] - 1.2 | |
price_rect$high[12] <- price_rect$low[12] + 1.2 | |
# create sideways histogram | |
turnout_rect <- price_rect %>% | |
mutate(across(2:3, `/`, 100)) %>% | |
mutate(mid = low + ((high - low)/2)) %>% | |
ggplot() + | |
geom_rect( | |
mapping = aes( | |
fill = high, | |
ymin = low, | |
ymax = high, | |
xmin = 0, | |
xmax = last | |
) | |
) + | |
scale_fill_viridis_c(guide = FALSE, end = 0.75) + | |
coord_cartesian(ylim = c(0.475, 0.65)) + | |
labs( | |
title = "Predicted Turnout", | |
subtitle = "Contract prices estimate probability", | |
caption = "Source: PredictIt/6882", | |
x = "Contract Price", | |
y = "Turnout Bracket" | |
) + | |
scale_x_continuous(labels = dollar) + | |
scale_y_continuous(position = "right") + | |
theme( | |
axis.text.y = element_blank(), | |
axis.ticks.y = element_blank(), | |
axis.title.x = element_text(hjust = 1), | |
axis.title.y = element_text(margin = margin(l = 20)) | |
) | |
# combine plots | |
turnout_both <- | |
turnout_past + | |
turnout_rect + | |
plot_layout(widths = c(5, 2)) | |
ggsave( | |
filename = "~/Pictures/turnout_both.png", | |
plot = turnout_both, | |
height = 6, | |
width = 12, | |
dpi = "retina" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This graph was made in R using ggplot and my predictr package. The source code can be found on GitHub. Past turnout comes from this Wikipedia page. Future turnout comes from this prediction market.
The histogram of turnout predictions comes from the PredictIt.org prediction market, where traders buy and sell binary futures contracts with real money. In this market, each contract is tied to a range of turnout. As an outcome becomes more or less likely, demand for a given contract rises and falls and the equilibrium price adjusts to reflect an underlying probability.
It's important to note that the smallest) and largest brackets (fewer than 130 million and more than 160 million) are open ended. That's why they're more likely outcomes than the others nearby. Wasn't quite sure how to convey this without making the bars wider and make them look even more likely.
The turnout ranges were converted to a percentage of voting age population using the 2019 estimates from the Census Bureau.