Last active
January 19, 2025 22:48
-
-
Save migurski/d353bf793fd089c8fe93bcfc687c27f9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Convert input CSV to OpenElections CSV | |
Adapt large tabular sources like this Lenawee County example (1) converted from PDFs (2) | |
to format required by OpenElections (3) intepreting categories of votes, candidates, and | |
offices along the way. | |
1) https://docs.google.com/spreadsheets/d/1iOEqLFcwuA3J_HXUtEWIZUirA7J1_NdsnVUnc8z0TZY/edit?gid=2129694166#gid=2129694166 | |
2) https://github.com/openelections/openelections-sources-mi/blob/master/2024/general/Gogebic%20MI%20Official%20Statement%20of%20Votes%20Cast%20with%20Certification%2011.5.2024.pdf | |
3) https://github.com/openelections/openelections-data-mi/issues/69 | |
""" | |
import collections | |
import csv | |
import itertools | |
import re | |
import sys | |
VOTING_METHODS = { | |
"AV Counting Boards": "mail", | |
"Early Voting": "early_voting", | |
"Election Day": "election_day", | |
"Total": "votes", | |
} | |
district_pat = re.compile(r"^(.+\S)[\n\s]+\(District (\w+)\)$", re.DOTALL) | |
party_pat = re.compile(r"^(.+\S)[\n\s]+\((\w+)\)$", re.DOTALL) | |
if __name__ == "__main__": | |
county, input_path, output_path = sys.argv[1:] | |
with open(input_path, mode="r") as file1: | |
offices, candidates, *input_rows = csv.reader(file1) | |
output_votes = collections.defaultdict(collections.defaultdict) | |
for row in input_rows: | |
precinct, voting_method = row[0].strip(), VOTING_METHODS.get(row[1].strip()) | |
if not (precinct and voting_method): | |
continue | |
for col, votes_raw in zip(itertools.count(2), row[2:]): | |
votes = votes_raw.strip() | |
if not votes: | |
continue | |
office_raw = offices[col].strip() | |
if matched := district_pat.match(office_raw): | |
office, district = matched.group(1), matched.group(2) | |
else: | |
office, district = office_raw, None | |
if office == "Registered Voters" and voting_method != "votes": | |
continue | |
candidate_raw = candidates[col].strip() | |
if matched := party_pat.match(candidate_raw): | |
candidate, party = matched.group(1), matched.group(2) | |
else: | |
candidate, party = candidate_raw, None | |
output_key = county, precinct, office, district, party, candidate | |
output_votes[output_key][voting_method] = votes | |
with open(output_path, mode="w") as file2: | |
out = csv.DictWriter( | |
file2, | |
( | |
"county", | |
"precinct", | |
"office", | |
"district", | |
"party", | |
"candidate", | |
"votes", | |
"early_voting", | |
"election_day", | |
"provisional", | |
"mail", | |
), | |
) | |
out.writeheader() | |
for output_key, votes in output_votes.items(): | |
county, precinct, office, district, party, candidate = output_key | |
output_row = dict( | |
county=county, | |
precinct=precinct, | |
office=office, | |
district=district, | |
party=party, | |
candidate=candidate, | |
**votes, | |
) | |
out.writerow(output_row) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment