Skip to content

Instantly share code, notes, and snippets.

@resulumit
Last active June 18, 2021 14:40
Show Gist options
  • Save resulumit/faa60ed393a418da5989c19204ef9006 to your computer and use it in GitHub Desktop.
Save resulumit/faa60ed393a418da5989c19204ef9006 to your computer and use it in GitHub Desktop.
# load the required libraries ---------------------------------------------
library(tidyverse)
# create the dataset of votes ---------------------------------------------
file_ext <- c(1997, 2001, 2005, 2010, 2015, 2017)
temp_list <- list()
for(i in 1:length(file_ext)){
file <- read.delim(paste0("https://www.publicwhip.org.uk/data/votematrix-", file_ext[i], ".dat"),
header = TRUE, fill = TRUE, sep = "\t", quote = "")
last_mp <- length(file) - 1
votes <- file %>%
pivot_longer(cols = 5:all_of(last_mp),
names_to = "mpid", values_to = "vote") %>%
mutate(term = file_ext[i]) %>%
select(term, date, voteno, vote)
temp_list[[i]] <- votes
}
df <- as_tibble(do.call(rbind, temp_list))
# tidy the dataset --------------------------------------------------------
df_tidy <- df %>%
mutate(present = if_else(vote == -9, 0, 1),
year = as.numeric(format(as.Date(date),"%Y")),
month = as.numeric(format(as.Date(date), "%m")),
day = as.numeric(format(as.Date(date), "%d"))) %>%
filter(month == 6 & day < 15 | month < 6) %>%
group_by(date, year, voteno) %>%
summarise(attendance = case_when(term <= 2001 ~ sum(present) * 100 / 659,
term == 2005 ~ sum(present) * 100 / 646,
term > 2005 ~ sum(present) * 100 / 650))
# line plot ---------------------------------------------------------------
df_tidy %>%
group_by(year) %>%
summarise(mean_attendance = mean(attendance)) %>%
ggplot(aes(x = year, y = mean_attendance)) +
geom_line(size = 1) +
scale_y_continuous(labels = function(x) paste0(x, "%")) +
labs(y = "Attendance to votes, \nJan to mid-June\n", x = "") +
theme_minimal()
# density plot ------------------------------------------------------------
df_tidy %>%
mutate(category = case_when(year < 2020 ~ "1997-2019",
year == 2020 ~ "2020",
year == 2021 ~ "2021")) %>%
ggplot(aes(x = attendance, color = category, group = factor(year))) +
geom_density(size = 1, show.legend = FALSE) +
stat_density(geom = "line", position="identity", size = 1) +
scale_color_manual(name = "",
values = c("gray", "#F8766D", "#00BFC4")) +
scale_x_continuous(labels = function(x) paste0(x, "%")) +
labs(x = "\nAttendance to votes, Jan to mid-June", y = "Density\n") +
theme_minimal() +
theme(legend.position = "bottom")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment