Created
November 8, 2016 12:45
-
-
Save jessykate/1587f9345d520474709a8e800a222354 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import csv | |
''' | |
usage: ./cobudget_merge.py file1.csv file2.csv ... fileN.csv | |
combine cobudget allocations across mulitple files and print result to stdout. | |
take the full outer join of the rows in each file, *summing* balances if an | |
email is present multiple times. | |
Assumes each file has NO headers, and the first two columns are as follows: | |
email, amount. | |
Other columns may be present and will be ignored. If a row has a $0 amount | |
value, it will be skipped. | |
''' | |
allocations = {} | |
# start with the second argument (index 1), and iterate until there's no more | |
# files. | |
num_files = len(sys.argv) | |
total_allocated = 0 | |
for n in range(1,num_files): | |
the_file = sys.argv[n] | |
sys.stderr.write("\n\n**** %s ****\n\n" % the_file) | |
open_file = open(the_file, 'r') | |
the_csv = csv.reader(open_file, delimiter=",") | |
allocated_this_file = 0 | |
for row in the_csv: | |
try: | |
# row is a list of strings | |
# row[0] is email | |
# row[1] is amount | |
# other rows MAY be present, this specifically allows for that and will ignore them. | |
# strip any surrounding whitespace | |
email = row[0].strip() | |
new_allocation = float(row[1]) | |
if new_allocation == 0: | |
continue | |
except: | |
# if there's a comment row or some other non conforming row in the | |
# file, skip it. | |
continue | |
balance = allocations.get(email, 0.0) | |
if balance > 0: | |
sys.stderr.write('%s: found existing balance of %s. appending new allocation of $%f.\n' % (email, balance, new_allocation)) | |
else: | |
sys.stderr.write("adding new user %s with allocation of $%s\n" % (email, new_allocation)) | |
allocations[email] = balance + new_allocation | |
allocated_this_file += new_allocation | |
sys.stderr.write("\nAllocated this file: %f\n" % allocated_this_file) | |
total_allocated += allocated_this_file | |
sys.stderr.write("\nTotal allocated: %f\n" % total_allocated) | |
sys.stderr.write("\n\n") | |
emails = allocations.keys() | |
all_emails = ",".join(emails) | |
sys.stderr.write("\nAll Group Emails (should you want to bulk email your group):\n\n") | |
sys.stderr.write("%s\n" % all_emails) | |
sys.stderr.write("\n\n") | |
for email, amount in allocations.iteritems(): | |
print "%s,%f" % (email, amount) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment