Skip to content

Instantly share code, notes, and snippets.

@zstumgoren
Last active March 1, 2016 21:34
Show Gist options
  • Save zstumgoren/3cdf2e325396c5a9e15d to your computer and use it in GitHub Desktop.
Save zstumgoren/3cdf2e325396c5a9e15d to your computer and use it in GitHub Desktop.
Elec results script for shredding at NICAR
import csv, urllib
from operator import itemgetter
from collections import defaultdict
from os.path import dirname, join
url = "https://docs.google.com/spreadsheet/pub?key=0AhhC0IWaObRqdGFkUW1kUmp2ZlZjUjdTYV9lNFJ5RHc&output=csv"
filename = join(dirname(dirname(__file__)), 'fake_va_elec_results.csv')
urllib.urlretrieve(url, filename)
reader = csv.DictReader(open(filename, 'rb'))
# defaultdict auto-creates non-existent keys with an empty dictionary as the default value.
results = defaultdict(dict)
# Initial data clean-up
for row in reader:
# Parse name into first and last
row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')]
# Convert total votes to an integer
row['votes'] = int(row['votes'])
# Store county-level results by slugified office and district (if there is one),
# then by candidate party and raw name
race_key = row['office']
if row['district']:
race_key += "-%s" % row['district']
# Create unique candidate key from party and name, in case multiple candidates have same
cand_key = "-".join((row['party'], row['candidate']))
# Get/create race in results dict
race = results[race_key]
# setdefault creates empty dict and list for a key if it doesn't already exist.
race.setdefault(cand_key, []).append(row)
# Tally votes for Races and candidates and assign winners
summary = defaultdict(dict)
for race_key, cand_results in results.items():
all_votes = 0
cands = []
for cand_key, results in cand_results.items():
# Populate a new candidate dict using one set of county results
cand = {
'first_name': results[0]['first_name'],
'last_name': results[0]['last_name'],
'party': results[0]['party'],
'winner': '',
}
# Calculate candidate total votes
cand_total_votes = sum([result['votes'] for result in results])
cand['votes'] = cand_total_votes
# Add cand totals to racewide vote count
all_votes += cand_total_votes
# And stash the candidate's data
cands.append(cand)
# sort cands from highest to lowest vote count
sorted_cands = sorted(cands, key=itemgetter('votes'), reverse=True)
# Determine winner, if any
first = sorted_cands[0]
second = sorted_cands[1]
if first['votes'] != second['votes']:
first['winner'] = 'X'
# Get race metadata from one set of results
result = cand_results.values()[0][0]
# Add results to output
summary[race_key] = {
'date': result['date'],
'office': result['office'],
'district': result['district'],
'all_votes': all_votes,
'candidates': sorted_cands,
}
outfile = join(dirname(__file__), 'summary_results.csv')
with open(outfile, 'wb') as fh:
# We'll limit the output to cleanly parsed, standardized values
fieldnames = [
'date', 'office', 'district', 'last_name',
'first_name','party', 'all_votes', 'votes', 'winner',
]
writer = csv.DictWriter(fh, fieldnames, extrasaction='ignore', quoting=csv.QUOTE_MINIMAL)
writer.writeheader()
for race, results in summary.items():
cands = results.pop('candidates')
for cand in cands:
results.update(cand)
writer.writerow(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment