Skip to content

Instantly share code, notes, and snippets.

@thoughtfulbloke
Created November 15, 2020 07:58
Show Gist options
  • Save thoughtfulbloke/9183419ac82bff775abab4e074b42658 to your computer and use it in GitHub Desktop.
Save thoughtfulbloke/9183419ac82bff775abab4e074b42658 to your computer and use it in GitHub Desktop.
# example code for processing the XML New Zealand elections final data
# party and cadidate vote.
# source: https://electionresults.govt.nz/electionresults_2020/xml/
#
# preceding this code was downloading a local copy of the NZ election
# XML provisional results using wget.
# Before that, since I am on a Mac, was using homebrew to install wget
# then I ran the terminal command
# wget -r -np -k https://electionresults.govt.nz/electionresults_2020/xml/
library(xml2)
library(dplyr)
# party info
parties <- read_xml("electionresults.govt.nz/electionresults_2020/xml/parties.xml")
p_no <- xml_attr(xml_find_all(parties, ".//party"), "p_no")
p_abbrev <- xml_text(xml_find_all(parties, ".//party/abbrev"))
p_short <- xml_text(xml_find_all(parties, ".//party/short_name"))
p_name <- xml_text(xml_find_all(parties, ".//party/party_name"))
p_registered <- xml_text(xml_find_all(parties, ".//party/registered"))
parties <- data.frame(p_no, p_abbrev, p_short, p_name, p_registered,
stringsAsFactors = FALSE)
write.csv(parties, file = "parties.csv", row.names = FALSE)
# electorate info
electorates <- read_xml("electionresults.govt.nz/electionresults_2020/xml/electorates.xml")
e_no <- xml_attr(xml_find_all(electorates, ".//electorate"), "e_no")
electorate_name <- xml_text(xml_find_all(electorates, ".//electorate/electorate_name"))
electorates <- data.frame(e_no, electorate_name,
stringsAsFactors = FALSE)
write.csv(electorates, file = "electorates.csv", row.names = FALSE)
# location info
booths <- read_xml("electionresults.govt.nz/electionresults_2020/xml/votingplaces.xml")
vp_id <- xml_attr(xml_find_all(booths, ".//votingplace"), "vp_id")
vp_e_no <- xml_attr(xml_find_all(booths, ".//votingplace"), "vp_e_no")
vp_address <- xml_text(xml_find_all(booths, ".//votingplace/vp_address"))
vp_lat <- xml_text(xml_find_all(booths, ".//votingplace/vp_lat"))
vp_lon <- xml_text(xml_find_all(booths, ".//votingplace/vp_lon"))
locations <- data.frame(vp_id, vp_e_no, vp_address, vp_lat, vp_lon,
stringsAsFactors = FALSE)
write.csv(locations, file = "locations.csv", row.names = FALSE)
# candidate info
folk <- read_xml("electionresults.govt.nz/electionresults_2020/xml/candidates.xml")
c_no <- xml_attr(xml_find_all(folk, ".//candidate"), "c_no")
candidate_name <- xml_text(xml_find_all(folk, ".//candidate/candidate_name"))
electorate <- xml_text(xml_find_all(folk, ".//candidate/electorate"))
party <- xml_text(xml_find_all(folk, ".//candidate/party"))
list_no <- xml_text(xml_find_all(folk, ".//candidate/list_no"))
candidates <- data.frame(c_no, candidate_name, electorate, party, list_no,
stringsAsFactors = FALSE)
write.csv(candidates, file = "candidates.csv", row.names = FALSE)
#######
# voting_data.
electorate_files <- list.files(path = "electionresults.govt.nz/electionresults_2020/xml",
recursive = TRUE, full.names = TRUE)
booth_votes <- grep("votingplaces/.+-.+xml", electorate_files, value = TRUE)
booth_party <- function(x){
x <- read_xml(x)
y <- xml_find_all(x, ".//party")
p_no <- xml_attr(y, "p_no")
votes <- xml_text(y, trim = TRUE)
vp_id <- xml_attr(x, "vp_id")
vp_e_no <- xml_attr(x, "vp_e_no")
e_no <- xml_attr(x, "e_no")
vp_no <- xml_attr(x, "vp_no")
df <- data.frame(p_no, votes, vp_id, vp_e_no, e_no, vp_no,
stringsAsFactors = FALSE)
return(df)
}
pvotes <- bind_rows(lapply(booth_votes, booth_party))
write.csv(pvotes, file = "party_votes.csv", row.names = FALSE)
booth_candidates <- function(x){
x <- read_xml(x)
y <- xml_find_all(x, ".//candidate")
c_no <- xml_attr(y, "c_no")
votes <- xml_text(y, trim = TRUE)
vp_id <- xml_attr(x, "vp_id")
vp_e_no <- xml_attr(x, "vp_e_no")
e_no <- xml_attr(x, "e_no")
vp_no <- xml_attr(x, "vp_no")
df <- data.frame(c_no, votes, vp_id, vp_e_no, e_no, vp_no,
stringsAsFactors = FALSE)
return(df)
}
cvotes <- bind_rows(lapply(booth_votes, booth_candidates))
write.csv(cvotes, file = "candidate_votes.csv", row.names = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment