Last active
March 1, 2016 21:34
-
-
Save zstumgoren/3cdf2e325396c5a9e15d to your computer and use it in GitHub Desktop.
Elec results script for shredding at NICAR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv, urllib | |
from operator import itemgetter | |
from collections import defaultdict | |
from os.path import dirname, join | |
url = "https://docs.google.com/spreadsheet/pub?key=0AhhC0IWaObRqdGFkUW1kUmp2ZlZjUjdTYV9lNFJ5RHc&output=csv" | |
filename = join(dirname(dirname(__file__)), 'fake_va_elec_results.csv') | |
urllib.urlretrieve(url, filename) | |
reader = csv.DictReader(open(filename, 'rb')) | |
# defaultdict auto-creates non-existent keys with an empty dictionary as the default value. | |
results = defaultdict(dict) | |
# Initial data clean-up | |
for row in reader: | |
# Parse name into first and last | |
row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')] | |
# Convert total votes to an integer | |
row['votes'] = int(row['votes']) | |
# Store county-level results by slugified office and district (if there is one), | |
# then by candidate party and raw name | |
race_key = row['office'] | |
if row['district']: | |
race_key += "-%s" % row['district'] | |
# Create unique candidate key from party and name, in case multiple candidates have same | |
cand_key = "-".join((row['party'], row['candidate'])) | |
# Get/create race in results dict | |
race = results[race_key] | |
# setdefault creates empty dict and list for a key if it doesn't already exist. | |
race.setdefault(cand_key, []).append(row) | |
# Tally votes for Races and candidates and assign winners | |
summary = defaultdict(dict) | |
for race_key, cand_results in results.items(): | |
all_votes = 0 | |
cands = [] | |
for cand_key, results in cand_results.items(): | |
# Populate a new candidate dict using one set of county results | |
cand = { | |
'first_name': results[0]['first_name'], | |
'last_name': results[0]['last_name'], | |
'party': results[0]['party'], | |
'winner': '', | |
} | |
# Calculate candidate total votes | |
cand_total_votes = sum([result['votes'] for result in results]) | |
cand['votes'] = cand_total_votes | |
# Add cand totals to racewide vote count | |
all_votes += cand_total_votes | |
# And stash the candidate's data | |
cands.append(cand) | |
# sort cands from highest to lowest vote count | |
sorted_cands = sorted(cands, key=itemgetter('votes'), reverse=True) | |
# Determine winner, if any | |
first = sorted_cands[0] | |
second = sorted_cands[1] | |
if first['votes'] != second['votes']: | |
first['winner'] = 'X' | |
# Get race metadata from one set of results | |
result = cand_results.values()[0][0] | |
# Add results to output | |
summary[race_key] = { | |
'date': result['date'], | |
'office': result['office'], | |
'district': result['district'], | |
'all_votes': all_votes, | |
'candidates': sorted_cands, | |
} | |
outfile = join(dirname(__file__), 'summary_results.csv') | |
with open(outfile, 'wb') as fh: | |
# We'll limit the output to cleanly parsed, standardized values | |
fieldnames = [ | |
'date', 'office', 'district', 'last_name', | |
'first_name','party', 'all_votes', 'votes', 'winner', | |
] | |
writer = csv.DictWriter(fh, fieldnames, extrasaction='ignore', quoting=csv.QUOTE_MINIMAL) | |
writer.writeheader() | |
for race, results in summary.items(): | |
cands = results.pop('candidates') | |
for cand in cands: | |
results.update(cand) | |
writer.writerow(results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment