Skip to content

Instantly share code, notes, and snippets.

@migurski
Last active January 19, 2025 22:48
Show Gist options
  • Save migurski/d353bf793fd089c8fe93bcfc687c27f9 to your computer and use it in GitHub Desktop.
Save migurski/d353bf793fd089c8fe93bcfc687c27f9 to your computer and use it in GitHub Desktop.
"""Convert input CSV to OpenElections CSV
Adapt large tabular sources like this Lenawee County example (1) converted from PDFs (2)
to format required by OpenElections (3) intepreting categories of votes, candidates, and
offices along the way.
1) https://docs.google.com/spreadsheets/d/1iOEqLFcwuA3J_HXUtEWIZUirA7J1_NdsnVUnc8z0TZY/edit?gid=2129694166#gid=2129694166
2) https://github.com/openelections/openelections-sources-mi/blob/master/2024/general/Gogebic%20MI%20Official%20Statement%20of%20Votes%20Cast%20with%20Certification%2011.5.2024.pdf
3) https://github.com/openelections/openelections-data-mi/issues/69
"""
import collections
import csv
import itertools
import re
import sys
VOTING_METHODS = {
"AV Counting Boards": "mail",
"Early Voting": "early_voting",
"Election Day": "election_day",
"Total": "votes",
}
district_pat = re.compile(r"^(.+\S)[\n\s]+\(District (\w+)\)$", re.DOTALL)
party_pat = re.compile(r"^(.+\S)[\n\s]+\((\w+)\)$", re.DOTALL)
if __name__ == "__main__":
county, input_path, output_path = sys.argv[1:]
with open(input_path, mode="r") as file1:
offices, candidates, *input_rows = csv.reader(file1)
output_votes = collections.defaultdict(collections.defaultdict)
for row in input_rows:
precinct, voting_method = row[0].strip(), VOTING_METHODS.get(row[1].strip())
if not (precinct and voting_method):
continue
for col, votes_raw in zip(itertools.count(2), row[2:]):
votes = votes_raw.strip()
if not votes:
continue
office_raw = offices[col].strip()
if matched := district_pat.match(office_raw):
office, district = matched.group(1), matched.group(2)
else:
office, district = office_raw, None
if office == "Registered Voters" and voting_method != "votes":
continue
candidate_raw = candidates[col].strip()
if matched := party_pat.match(candidate_raw):
candidate, party = matched.group(1), matched.group(2)
else:
candidate, party = candidate_raw, None
output_key = county, precinct, office, district, party, candidate
output_votes[output_key][voting_method] = votes
with open(output_path, mode="w") as file2:
out = csv.DictWriter(
file2,
(
"county",
"precinct",
"office",
"district",
"party",
"candidate",
"votes",
"early_voting",
"election_day",
"provisional",
"mail",
),
)
out.writeheader()
for output_key, votes in output_votes.items():
county, precinct, office, district, party, candidate = output_key
output_row = dict(
county=county,
precinct=precinct,
office=office,
district=district,
party=party,
candidate=candidate,
**votes,
)
out.writerow(output_row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment