Last active
May 27, 2019 19:49
-
-
Save umbernhard/a2891d63d5002098aee17dae15d78cdf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Parser for MEDSL's precinct-level data. | |
# Matt Bernhard, May 2019 | |
import csv | |
import sys | |
from collections import defaultdict | |
class AutoVivification(dict): | |
""" | |
Implementation of perl's autovivification feature. | |
Stolen from https://stackoverflow.com/questions/651794/whats-the-best-way-to-initialize-a-dict-of-dicts-in-python | |
""" | |
def __getitem__(self, item): | |
try: | |
return dict.__getitem__(self, item) | |
except KeyError: | |
value = self[item] = type(self)() | |
return value | |
def read_data(filename, sel_state=None, sel_county=None, sel_candidate=None, sel_mode=None, outfile=''): | |
''' | |
Reads data from filename into a dictionary. This function can consume the whole dataverse | |
file, or it can parse votes in particular states and counties, for particular candidates, | |
or by particular modes. Once parsed, data can be saved to <outfile>, and the dictionary | |
is returned. | |
Input: | |
filename - the file containing election results. Assumes a csv. | |
sel_state - Only parses results for this state | |
sel_county - Only parses results for this county. You must specify a state along with the county. | |
sel_candidate - Only parses results for this candidate. | |
sel_mode - Only parses results for this mode of voting. Options include 'election day', 'absentee', 'advance in person', and 'provisional', and possibly others. | |
outfile - saves results of parse to <outfile>, so you don't have to reread all the data if you just care about one state/county/candidate/etc. | |
Output: | |
election - A dictionary mapping candidate results to state, county, jurisdiction, precinct, mode, and candidate. | |
''' | |
if not sel_state and sel_county: | |
raise Exception('You must specify a state when selecting a county') | |
ctr = 0 | |
election = AutoVivification() | |
out = None | |
writer = None | |
if outfile: | |
out = open(outfile, 'w', encoding='ISO-8859-1') | |
print('Parsing for {}, {}'.format(sel_county, sel_state)) | |
with open(filename, encoding='ISO-8859-1') as f: | |
for line in csv.DictReader(f): | |
if not writer and out: | |
writer = csv.DictWriter(out, fieldnames=line.keys()) | |
writer.writeheader() | |
state = line['state'] | |
county = line['county_name'] | |
if not county: | |
county = 'NA' | |
jurisdiction = line['jurisdiction'] | |
precinct = line['precinct'] | |
write_in = line['writein'] | |
candidate = line['candidate_normalized'] | |
if candidate in ['for', 'unopposed', 'all', 'candidates']: | |
continue | |
elif candidate in ['in', 'ins', 'choice']: | |
# NJ specified write-ins by 'Personal Choice' | |
candidate = 'write-in' | |
elif not candidate and write_in: | |
candidate = 'write-in' | |
# IN and SC have straight-ticket totals, but these are double counted. | |
if candidate == 'ticket':# and state not in ['Indiana', 'South Carolina']: | |
continue | |
# AL records straight-party voting in three ways! | |
if candidate in ['democrat', 'republican', 'party']: | |
continue | |
# NY records absentee military votes this way | |
if candidate in ['military', 'affidavit', 'federal', 'emergency', 'presidential', 'votes', 'above'] and state == 'New York': | |
continue | |
votes = int(line['votes']) | |
mode = line['mode'] | |
if sel_state and sel_state != state: | |
continue | |
if sel_county and sel_county != county: | |
continue | |
if sel_candidate and sel_candidate != candidate: | |
continue | |
if sel_mode and sel_mode != mode: | |
continue | |
if write_in: | |
mode += '-write-in' | |
cand_dict = { | |
'votes': votes, | |
} | |
if election[state][county][jurisdiction][precinct][mode][candidate]['votes']: | |
election[state][county][jurisdiction][precinct][mode][candidate]['votes'] += votes | |
else: | |
election[state][county][jurisdiction][precinct][mode][candidate]['votes'] = votes | |
ctr += 1 | |
if out: | |
writer.writerow(line) | |
sys.stdout.write('{} lines parsed\r'.format(ctr)) | |
sys.stdout.flush() | |
sys.stdout.write('{} lines parsed\n'.format(ctr)) | |
if out: | |
out.close() | |
return election |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment