Created
November 4, 2020 02:10
-
-
Save thoughtfulbloke/073e4ed43d65b278613b77fccd977c1a to your computer and use it in GitHub Desktop.
R code showing processing the xml data for the NZ 2020 general election
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# example code for processing the XML New Zealand elections provisional data | |
# party vote only, but is extendable to cadidate vote. | |
# source: https://electionresults.govt.nz/electionresults_2020_preliminary/xml/ | |
# | |
# preceding this code was downloading a local copy of the NZ election | |
# XML provisional results using wget. | |
# Before that, since I am on a Mac, was using homebrew to install wget | |
# then I ran the terminal command | |
# wget -r -np -k https://electionresults.govt.nz/electionresults_2020_preliminary/xml/ | |
library(xml2) | |
library(dplyr) | |
# party info | |
parties <- read_xml("electionresults.govt.nz/electionresults_2020_preliminary/xml/parties.xml") | |
p_no <- xml_attr(xml_find_all(parties, ".//party"), "p_no") | |
p_abbrev <- xml_text(xml_find_all(parties, ".//party/abbrev")) | |
p_short <- xml_text(xml_find_all(parties, ".//party/short_name")) | |
p_name <- xml_text(xml_find_all(parties, ".//party/party_name")) | |
p_registered <- xml_text(xml_find_all(parties, ".//party/registered")) | |
parties <- data.frame(p_no, p_abbrev, p_short, p_name, p_registered, | |
stringsAsFactors = FALSE) | |
# location info | |
booths <- read_xml("electionresults.govt.nz/electionresults_2020_preliminary/xml/votingplaces.xml") | |
vp_id <- xml_attr(xml_find_all(booths, ".//votingplace"), "vp_id") | |
vp_e_no <- xml_attr(xml_find_all(booths, ".//votingplace"), "vp_e_no") | |
vp_address <- xml_text(xml_find_all(booths, ".//votingplace/vp_address")) | |
vp_lat <- xml_text(xml_find_all(booths, ".//votingplace/vp_lat")) | |
vp_lon <- xml_text(xml_find_all(booths, ".//votingplace/vp_lon")) | |
locations <- data.frame(vp_id, vp_e_no, vp_address, vp_lat, vp_lon, | |
stringsAsFactors = FALSE) | |
####### | |
# voting_data. | |
electorate_files <- list.files(path = "electionresults.govt.nz/electionresults_2020_preliminary/xml", | |
recursive = TRUE, full.names = TRUE) | |
booth_votes <- grep("votingplaces/.+-.+xml", electorate_files, value = TRUE) | |
booth_content <- function(x){ | |
x <- read_xml(x) | |
y <- xml_find_all(x, ".//party") | |
p_no <- xml_attr(y, "p_no") | |
votes <- xml_text(y, trim = TRUE) | |
vp_id <- xml_attr(x, "vp_id") | |
vp_e_no <- xml_attr(x, "vp_e_no") | |
e_no <- xml_attr(x, "e_no") | |
vp_no <- xml_attr(x, "vp_no") | |
df <- data.frame(p_no, votes, vp_id, vp_e_no, e_no, vp_no, | |
stringsAsFactors = FALSE) | |
return(df) | |
} | |
votes <- bind_rows(lapply(booth_votes, booth_content)) | |
combined <- votes %>% | |
left_join(parties, by=c("p_no")) %>% | |
left_join(locations, by="vp_id", "vp_e_no") %>% | |
mutate(vote = as.numeric(votes), | |
lat = as.numeric(vp_lat), | |
lon = as.numeric(vp_lon)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment