Created
November 15, 2020 07:58
-
-
Save thoughtfulbloke/9183419ac82bff775abab4e074b42658 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# example code for processing the XML New Zealand elections final data | |
# party and cadidate vote. | |
# source: https://electionresults.govt.nz/electionresults_2020/xml/ | |
# | |
# preceding this code was downloading a local copy of the NZ election | |
# XML provisional results using wget. | |
# Before that, since I am on a Mac, was using homebrew to install wget | |
# then I ran the terminal command | |
# wget -r -np -k https://electionresults.govt.nz/electionresults_2020/xml/ | |
library(xml2) | |
library(dplyr) | |
# party info | |
parties <- read_xml("electionresults.govt.nz/electionresults_2020/xml/parties.xml") | |
p_no <- xml_attr(xml_find_all(parties, ".//party"), "p_no") | |
p_abbrev <- xml_text(xml_find_all(parties, ".//party/abbrev")) | |
p_short <- xml_text(xml_find_all(parties, ".//party/short_name")) | |
p_name <- xml_text(xml_find_all(parties, ".//party/party_name")) | |
p_registered <- xml_text(xml_find_all(parties, ".//party/registered")) | |
parties <- data.frame(p_no, p_abbrev, p_short, p_name, p_registered, | |
stringsAsFactors = FALSE) | |
write.csv(parties, file = "parties.csv", row.names = FALSE) | |
# electorate info | |
electorates <- read_xml("electionresults.govt.nz/electionresults_2020/xml/electorates.xml") | |
e_no <- xml_attr(xml_find_all(electorates, ".//electorate"), "e_no") | |
electorate_name <- xml_text(xml_find_all(electorates, ".//electorate/electorate_name")) | |
electorates <- data.frame(e_no, electorate_name, | |
stringsAsFactors = FALSE) | |
write.csv(electorates, file = "electorates.csv", row.names = FALSE) | |
# location info | |
booths <- read_xml("electionresults.govt.nz/electionresults_2020/xml/votingplaces.xml") | |
vp_id <- xml_attr(xml_find_all(booths, ".//votingplace"), "vp_id") | |
vp_e_no <- xml_attr(xml_find_all(booths, ".//votingplace"), "vp_e_no") | |
vp_address <- xml_text(xml_find_all(booths, ".//votingplace/vp_address")) | |
vp_lat <- xml_text(xml_find_all(booths, ".//votingplace/vp_lat")) | |
vp_lon <- xml_text(xml_find_all(booths, ".//votingplace/vp_lon")) | |
locations <- data.frame(vp_id, vp_e_no, vp_address, vp_lat, vp_lon, | |
stringsAsFactors = FALSE) | |
write.csv(locations, file = "locations.csv", row.names = FALSE) | |
# candidate info | |
folk <- read_xml("electionresults.govt.nz/electionresults_2020/xml/candidates.xml") | |
c_no <- xml_attr(xml_find_all(folk, ".//candidate"), "c_no") | |
candidate_name <- xml_text(xml_find_all(folk, ".//candidate/candidate_name")) | |
electorate <- xml_text(xml_find_all(folk, ".//candidate/electorate")) | |
party <- xml_text(xml_find_all(folk, ".//candidate/party")) | |
list_no <- xml_text(xml_find_all(folk, ".//candidate/list_no")) | |
candidates <- data.frame(c_no, candidate_name, electorate, party, list_no, | |
stringsAsFactors = FALSE) | |
write.csv(candidates, file = "candidates.csv", row.names = FALSE) | |
####### | |
# voting_data. | |
electorate_files <- list.files(path = "electionresults.govt.nz/electionresults_2020/xml", | |
recursive = TRUE, full.names = TRUE) | |
booth_votes <- grep("votingplaces/.+-.+xml", electorate_files, value = TRUE) | |
booth_party <- function(x){ | |
x <- read_xml(x) | |
y <- xml_find_all(x, ".//party") | |
p_no <- xml_attr(y, "p_no") | |
votes <- xml_text(y, trim = TRUE) | |
vp_id <- xml_attr(x, "vp_id") | |
vp_e_no <- xml_attr(x, "vp_e_no") | |
e_no <- xml_attr(x, "e_no") | |
vp_no <- xml_attr(x, "vp_no") | |
df <- data.frame(p_no, votes, vp_id, vp_e_no, e_no, vp_no, | |
stringsAsFactors = FALSE) | |
return(df) | |
} | |
pvotes <- bind_rows(lapply(booth_votes, booth_party)) | |
write.csv(pvotes, file = "party_votes.csv", row.names = FALSE) | |
booth_candidates <- function(x){ | |
x <- read_xml(x) | |
y <- xml_find_all(x, ".//candidate") | |
c_no <- xml_attr(y, "c_no") | |
votes <- xml_text(y, trim = TRUE) | |
vp_id <- xml_attr(x, "vp_id") | |
vp_e_no <- xml_attr(x, "vp_e_no") | |
e_no <- xml_attr(x, "e_no") | |
vp_no <- xml_attr(x, "vp_no") | |
df <- data.frame(c_no, votes, vp_id, vp_e_no, e_no, vp_no, | |
stringsAsFactors = FALSE) | |
return(df) | |
} | |
cvotes <- bind_rows(lapply(booth_votes, booth_candidates)) | |
write.csv(cvotes, file = "candidate_votes.csv", row.names = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment