Skip to content

Instantly share code, notes, and snippets.

@umbernhard
Last active May 27, 2019 19:49
Show Gist options
  • Save umbernhard/a2891d63d5002098aee17dae15d78cdf to your computer and use it in GitHub Desktop.
Save umbernhard/a2891d63d5002098aee17dae15d78cdf to your computer and use it in GitHub Desktop.
# Parser for MEDSL's precinct-level data.
# Matt Bernhard, May 2019
import csv
import sys
from collections import defaultdict
class AutoVivification(dict):
"""
Implementation of perl's autovivification feature.
Stolen from https://stackoverflow.com/questions/651794/whats-the-best-way-to-initialize-a-dict-of-dicts-in-python
"""
def __getitem__(self, item):
try:
return dict.__getitem__(self, item)
except KeyError:
value = self[item] = type(self)()
return value
def read_data(filename, sel_state=None, sel_county=None, sel_candidate=None, sel_mode=None, outfile=''):
'''
Reads data from filename into a dictionary. This function can consume the whole dataverse
file, or it can parse votes in particular states and counties, for particular candidates,
or by particular modes. Once parsed, data can be saved to <outfile>, and the dictionary
is returned.
Input:
filename - the file containing election results. Assumes a csv.
sel_state - Only parses results for this state
sel_county - Only parses results for this county. You must specify a state along with the county.
sel_candidate - Only parses results for this candidate.
sel_mode - Only parses results for this mode of voting. Options include 'election day', 'absentee', 'advance in person', and 'provisional', and possibly others.
outfile - saves results of parse to <outfile>, so you don't have to reread all the data if you just care about one state/county/candidate/etc.
Output:
election - A dictionary mapping candidate results to state, county, jurisdiction, precinct, mode, and candidate.
'''
if not sel_state and sel_county:
raise Exception('You must specify a state when selecting a county')
ctr = 0
election = AutoVivification()
out = None
writer = None
if outfile:
out = open(outfile, 'w', encoding='ISO-8859-1')
print('Parsing for {}, {}'.format(sel_county, sel_state))
with open(filename, encoding='ISO-8859-1') as f:
for line in csv.DictReader(f):
if not writer and out:
writer = csv.DictWriter(out, fieldnames=line.keys())
writer.writeheader()
state = line['state']
county = line['county_name']
if not county:
county = 'NA'
jurisdiction = line['jurisdiction']
precinct = line['precinct']
write_in = line['writein']
candidate = line['candidate_normalized']
if candidate in ['for', 'unopposed', 'all', 'candidates']:
continue
elif candidate in ['in', 'ins', 'choice']:
# NJ specified write-ins by 'Personal Choice'
candidate = 'write-in'
elif not candidate and write_in:
candidate = 'write-in'
# IN and SC have straight-ticket totals, but these are double counted.
if candidate == 'ticket':# and state not in ['Indiana', 'South Carolina']:
continue
# AL records straight-party voting in three ways!
if candidate in ['democrat', 'republican', 'party']:
continue
# NY records absentee military votes this way
if candidate in ['military', 'affidavit', 'federal', 'emergency', 'presidential', 'votes', 'above'] and state == 'New York':
continue
votes = int(line['votes'])
mode = line['mode']
if sel_state and sel_state != state:
continue
if sel_county and sel_county != county:
continue
if sel_candidate and sel_candidate != candidate:
continue
if sel_mode and sel_mode != mode:
continue
if write_in:
mode += '-write-in'
cand_dict = {
'votes': votes,
}
if election[state][county][jurisdiction][precinct][mode][candidate]['votes']:
election[state][county][jurisdiction][precinct][mode][candidate]['votes'] += votes
else:
election[state][county][jurisdiction][precinct][mode][candidate]['votes'] = votes
ctr += 1
if out:
writer.writerow(line)
sys.stdout.write('{} lines parsed\r'.format(ctr))
sys.stdout.flush()
sys.stdout.write('{} lines parsed\n'.format(ctr))
if out:
out.close()
return election
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment