Skip to content

Instantly share code, notes, and snippets.

@thoughtfulbloke
Created November 4, 2020 02:10
Show Gist options
  • Save thoughtfulbloke/073e4ed43d65b278613b77fccd977c1a to your computer and use it in GitHub Desktop.
Save thoughtfulbloke/073e4ed43d65b278613b77fccd977c1a to your computer and use it in GitHub Desktop.
R code showing processing the xml data for the NZ 2020 general election
# example code for processing the XML New Zealand elections provisional data
# party vote only, but is extendable to cadidate vote.
# source: https://electionresults.govt.nz/electionresults_2020_preliminary/xml/
#
# preceding this code was downloading a local copy of the NZ election
# XML provisional results using wget.
# Before that, since I am on a Mac, was using homebrew to install wget
# then I ran the terminal command
# wget -r -np -k https://electionresults.govt.nz/electionresults_2020_preliminary/xml/
library(xml2)
library(dplyr)
# party info
parties <- read_xml("electionresults.govt.nz/electionresults_2020_preliminary/xml/parties.xml")
p_no <- xml_attr(xml_find_all(parties, ".//party"), "p_no")
p_abbrev <- xml_text(xml_find_all(parties, ".//party/abbrev"))
p_short <- xml_text(xml_find_all(parties, ".//party/short_name"))
p_name <- xml_text(xml_find_all(parties, ".//party/party_name"))
p_registered <- xml_text(xml_find_all(parties, ".//party/registered"))
parties <- data.frame(p_no, p_abbrev, p_short, p_name, p_registered,
stringsAsFactors = FALSE)
# location info
booths <- read_xml("electionresults.govt.nz/electionresults_2020_preliminary/xml/votingplaces.xml")
vp_id <- xml_attr(xml_find_all(booths, ".//votingplace"), "vp_id")
vp_e_no <- xml_attr(xml_find_all(booths, ".//votingplace"), "vp_e_no")
vp_address <- xml_text(xml_find_all(booths, ".//votingplace/vp_address"))
vp_lat <- xml_text(xml_find_all(booths, ".//votingplace/vp_lat"))
vp_lon <- xml_text(xml_find_all(booths, ".//votingplace/vp_lon"))
locations <- data.frame(vp_id, vp_e_no, vp_address, vp_lat, vp_lon,
stringsAsFactors = FALSE)
#######
# voting_data.
electorate_files <- list.files(path = "electionresults.govt.nz/electionresults_2020_preliminary/xml",
recursive = TRUE, full.names = TRUE)
booth_votes <- grep("votingplaces/.+-.+xml", electorate_files, value = TRUE)
booth_content <- function(x){
x <- read_xml(x)
y <- xml_find_all(x, ".//party")
p_no <- xml_attr(y, "p_no")
votes <- xml_text(y, trim = TRUE)
vp_id <- xml_attr(x, "vp_id")
vp_e_no <- xml_attr(x, "vp_e_no")
e_no <- xml_attr(x, "e_no")
vp_no <- xml_attr(x, "vp_no")
df <- data.frame(p_no, votes, vp_id, vp_e_no, e_no, vp_no,
stringsAsFactors = FALSE)
return(df)
}
votes <- bind_rows(lapply(booth_votes, booth_content))
combined <- votes %>%
left_join(parties, by=c("p_no")) %>%
left_join(locations, by="vp_id", "vp_e_no") %>%
mutate(vote = as.numeric(votes),
lat = as.numeric(vp_lat),
lon = as.numeric(vp_lon))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment