Last active
December 15, 2015 02:49
-
-
Save blech/5189854 to your computer and use it in GitHub Desktop.
Lego expenses
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import csv | |
import datetime | |
import sys | |
from decimal import * | |
data = [] | |
csvfile = open(sys.argv[1], 'r') | |
csvreader = csv.reader(csvfile) | |
columns = csvreader.next() | |
# print columns | |
for row in csvreader: | |
if not row: | |
continue | |
truerow = [x if x != "Yes" else True for x in row] | |
rowdict = dict(zip(columns, truerow)) | |
# del(rowdict['']) | |
try: | |
[d,m,y] = [int(x) for x in rowdict['DateAcquired'].split('/')] | |
except: | |
if rowdict['DateAcquired'] != '': | |
print "Datetime not parsed: '%s'" % rowdict['DateAcquired'] | |
[d,m,y] = [None,None,None] | |
pass | |
try: | |
rowdict['Date'] = datetime.date(day=d, month=m, year=y) | |
except: | |
# print "Datetime not valid: %s/%s/%s" % (d, m, y) | |
pass | |
if d and m and y and not 'Date' in rowdict: | |
try: | |
rowdict['Date'] = datetime.date(year=d, month=m, day=y) | |
# print "... valid in reverse format" | |
except: | |
print "Datetime not valid: %s-%s-%s" % (d, m, y) | |
pass | |
rowdict['Online'] = False | |
rowdict['Used'] = False | |
rowdict['PricePaidDec'] = Decimal('0.00') | |
rowdict['AdditionalPricePaidDec'] = Decimal('0.00') | |
if (rowdict['PricePaid']): | |
rowdict['PricePaidDec'] = Decimal(rowdict['PricePaid']) | |
if (rowdict['AdditionalPricePaid']): | |
rowdict['AdditionalPricePaidDec'] = Decimal(rowdict['AdditionalPricePaid']) | |
rowdict['PriceTotal'] = rowdict['PricePaidDec']+rowdict['AdditionalPricePaidDec'] | |
AF = rowdict['AcquiredFrom'] | |
if 'ebay' in AF or 'bricklink' in AF: | |
# print "Online/Used vendor '%s'" % rowdict['AcquiredFrom'] | |
rowdict['Online'] = True | |
rowdict['Used'] = True | |
if 'online' in AF: | |
# print "Online vendor '%s'" % rowdict['AcquiredFrom'] | |
rowdict['Online'] = True | |
if ' (' in AF: | |
rowdict['Chain'] = AF[:AF.index(' (')] | |
else: | |
rowdict['Chain'] = AF | |
data.append(rowdict) | |
print "Have %s sets" % len(data) | |
print "Have %s different sets" % len(set([x['SetNumber'] for x in data])) | |
total_price_paid = sum([x['PricePaidDec'] for x in data]) | |
total_additional_price_paid = sum([x['AdditionalPricePaidDec'] for x in data]) | |
total_total = total_price_paid+total_additional_price_paid | |
print "Total cost $%s + $%s (total $%s)" % (total_price_paid, total_additional_price_paid, | |
total_total) | |
vendors = set([x['Chain'] for x in data]) | |
print "\nFrom %s vendors:" % len(vendors) | |
for vendor in vendors: | |
vendor_sets = [x for x in data if x['Chain'] == vendor] | |
vendor_total = sum([x['PriceTotal'] for x in vendor_sets]) | |
print " %s: %s sets costing $%s" % (vendor, len(vendor_sets), vendor_total) | |
branches = set([x['AcquiredFrom'] for x in vendor_sets]) | |
if len(branches) > 1: | |
for branch in branches: | |
branch_sets = [x for x in vendor_sets if x['AcquiredFrom'] == branch] | |
branch_total = sum([x['PriceTotal'] for x in branch_sets]) | |
print " %s: %s sets costing $%s" % (branch, len(branch_sets), branch_total) | |
months_text = sorted(set(['%4i/%02i' % (x['Date'].year, x['Date'].month) for x in data if 'Date' in x])) | |
months_tuples = sorted(set([(x['Date'].year, x['Date'].month) for x in data if 'Date' in x])) | |
months = dict(zip(months_text, months_tuples)) | |
print "\nBought over %s months" % len(months) | |
for month in sorted(months): | |
month_sets = [x for x in data if 'Date' in x and | |
x['Date'].year == months[month][0] and | |
x['Date'].month == months[month][1] ] | |
month_sets_bought = len(month_sets) | |
month_total = sum([x['PriceTotal'] for x in month_sets]) | |
print " %s: %s sets costing $%s" % (month, month_sets_bought, month_total) | |
new_sets = [x for x in data if not x['Used']] | |
used_sets = [x for x in data if x['Used']] | |
online_sets = [x for x in data if x['Online']] | |
shop_sets = [x for x in data if not x['Online']] | |
count = len(new_sets) | |
cost = sum(x['PriceTotal'] for x in new_sets) # TODO routine | |
print "\n%s sets new (costing $%s)" % (count, cost) | |
count = len(used_sets) | |
cost = sum(x['PriceTotal'] for x in used_sets) # TODO routine | |
print "%s sets used (costing $%s)" % (count, cost) | |
count = len(online_sets) | |
cost = sum(x['PriceTotal'] for x in online_sets) # TODO routine | |
print "\n%s sets online (costing $%s)" % (count, cost) | |
count = len(shop_sets) | |
cost = sum(x['PriceTotal'] for x in shop_sets) # TODO routine | |
print "%s sets in shops (costing $%s)" % (count, cost) | |
# print data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import csv | |
import datetime | |
import decimal | |
import json | |
import os | |
import sys | |
import urllib2 | |
from collections import defaultdict | |
from warnings import warn | |
### JSON utility | |
class CustomEncoder(json.JSONEncoder): | |
def default(self, o): | |
if isinstance(o, decimal.Decimal): | |
return float(o) | |
if isinstance(o, datetime.date): | |
return str(o) | |
return super(CustomEncoder, self).default(o) | |
### ACM parser | |
def parse_acm_csv(filename): | |
data = [] | |
csvfile = open(sys.argv[1], 'r') | |
csvreader = csv.reader(csvfile) | |
columns = csvreader.next() | |
for row in csvreader: | |
if not row: | |
continue | |
rowdict = parse_acm_csv_row(columns, row) | |
data.append(rowdict) | |
return (columns, data) | |
def parse_acm_csv_row(columns, row): | |
truerow = [x if x != "Yes" else True for x in row] | |
rowdict = dict(zip(columns, truerow)) | |
del(rowdict['']) | |
### parse dates | |
try: | |
[y,m,d] = [int(x) for x in rowdict['DateAcquired'].split('-')] | |
except: | |
if rowdict['DateAcquired'] != '': | |
warn("Datetime not parsed: '%s'" % rowdict['DateAcquired']) | |
[d,m,y] = [None,None,None] | |
pass | |
try: | |
rowdict['Date'] = datetime.date(day=d, month=m, year=y) | |
except: | |
# print "Datetime not valid: %s/%s/%s" % (d, m, y) | |
pass | |
if d and m and y and not 'Date' in rowdict: | |
try: | |
rowdict['Date'] = datetime.date(year=d, month=m, day=y) | |
# print "... valid in reverse format" | |
except: | |
warn("Datetime not valid: %s-%s-%s" % (d, m, y)) | |
pass | |
### set up additional fields | |
rowdict['Online'] = False | |
rowdict['Used'] = False | |
rowdict['PricePaidDec'] = decimal.Decimal('0.00') | |
rowdict['AdditionalPricePaidDec'] = decimal.Decimal('0.00') | |
if (rowdict['PricePaid']): | |
rowdict['PricePaidDec'] = decimal.Decimal(rowdict['PricePaid']) | |
if (rowdict['AdditionalPricePaid']): | |
rowdict['AdditionalPricePaidDec'] = decimal.Decimal(rowdict['AdditionalPricePaid']) | |
rowdict['PriceTotal'] = rowdict['PricePaidDec']+rowdict['AdditionalPricePaidDec'] | |
AF = rowdict['AcquiredFrom'] | |
### parse vendor information to chains / online | |
if 'eBay' in AF or 'bricklink' in AF: | |
# print "Online/Used vendor '%s'" % rowdict['AcquiredFrom'] | |
rowdict['Online'] = True | |
rowdict['Used'] = True | |
if 'online' in AF: | |
# print "Online vendor '%s'" % rowdict['AcquiredFrom'] | |
rowdict['Online'] = True | |
if ' (' in AF: | |
rowdict['Chain'] = AF[:AF.index(' (')] | |
else: | |
rowdict['Chain'] = AF | |
return rowdict | |
### data manipulation | |
def get_totals(data): | |
totals = dict() | |
totals['owned'] = len(data) | |
totals['distinct'] = len(set([x['SetNumber'] for x in data])) | |
totals['paid'] = sum([x['PricePaidDec'] for x in data]) | |
totals['additional'] = sum([x['AdditionalPricePaidDec'] for x in data]) | |
totals['total'] = totals['paid']+totals['additional'] | |
return totals | |
def get_segments(data): | |
# TODO DSL/metalanguage for name/condition mapping | |
segments = defaultdict(dict) | |
new = [x for x in data if not x['Used']] | |
segments['new'] = calculate_segment(new) | |
used = [x for x in data if x['Used']] | |
segments['used'] = calculate_segment(used) | |
online = [x for x in data if x['Online']] | |
segments['online'] = calculate_segment(online) | |
shop = [x for x in data if not x['Online']] | |
segments['shop'] = calculate_segment(shop) | |
return segments | |
def calculate_segment(set_list): | |
detail = dict() | |
detail['sets'] = set_list | |
detail['count'] = len(set_list) | |
detail['cost'] = sum(x['PriceTotal'] for x in set_list) | |
return detail | |
def get_vendor_details(data): | |
vendor_list = set([x['Chain'] for x in data]) | |
vendors = defaultdict(dict) | |
for vendor in vendor_list: | |
vendor_sets = [x for x in data if x['Chain'] == vendor] | |
vendors[vendor]['sets'] = vendor_sets | |
vendors[vendor]['total'] = sum([x['PriceTotal'] for x in vendor_sets]) | |
# print " %s: %s sets costing $%s" % (vendor, len(vendor_sets), vendor_total) | |
branches = set([x['AcquiredFrom'] for x in vendor_sets]) | |
if len(branches) > 1: | |
vendors[vendor]['branches'] = dict() | |
for branch in branches: | |
branch_sets = [x for x in vendor_sets if x['AcquiredFrom'] == branch] | |
vendors[vendor]['branches']['sets'] = branch_sets | |
vendors[vendor]['branches']['total'] = sum([x['PriceTotal'] for x in branch_sets]) | |
return vendors | |
def get_month_details(data): | |
months = defaultdict(dict) | |
months_text = sorted(set(['%4i/%02i' % (x['Date'].year, x['Date'].month) for x in data if 'Date' in x])) | |
months_tuples = sorted(set([(x['Date'].year, x['Date'].month) for x in data if 'Date' in x])) | |
months_list = dict(zip(months_text, months_tuples)) | |
# print "\nBought over %s months" % len(months) | |
for month in sorted(months_list): | |
month_sets = [x for x in data if 'Date' in x and | |
x['Date'].year == months_list[month][0] and | |
x['Date'].month == months_list[month][1] ] | |
months[month]['sets'] = month_sets | |
months[month]['count'] = len(month_sets) | |
months[month]['cost'] = sum([x['PriceTotal'] for x in month_sets]) | |
return months | |
### set information | |
def get_inventories(data): | |
set_list = set([x['SetNumber'] for x in data]) | |
if not os.path.exists('inventories'): | |
os.makedirs('inventories') | |
for set_number in sorted(set_list): | |
if not os.path.exists('inventories/%s.csv' % set_number): | |
fetch_inventory_csv(set_number) | |
sys.exit() | |
def fetch_inventory_csv(set_number): | |
inventory_url = "http://brickset.com/exportscripts/inventory/%s" % set_number | |
inventory_file = "inventories/%s.csv" % set_number | |
# naughty | |
i_am_ie = { 'User-agent' : "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)" } | |
req = urllib2.Request(inventory_url, headers=i_am_ie) | |
resp = urllib2.urlopen(req) | |
with open(inventory_file, "wb") as local_file: | |
local_file.write(resp.read()) | |
print "Fetched %s.csv" % set_number | |
### output | |
def report(data): | |
totals = get_totals(data) | |
months = get_month_details(data) | |
print "Have %s sets" % totals['owned'] | |
print "Have %s different sets" % totals['distinct'] | |
print "Total cost $%s + $%s (total $%s)" % (totals['paid'], totals['additional'], totals['total']) | |
vendors = get_vendor_details(data) | |
print "\nFrom %s vendors:" % len(vendors) | |
for vendor in sorted(vendor.keys()): | |
print vendor | |
# TODO reinstate months (years?) | |
segments = get_segments(data) | |
print "\n%s sets new (costing $%s)" % (segment['new']['count'], segment['new']['cost']) | |
print "%s sets used (costing $%s)" % (count, cost) | |
print "\n%s sets online (costing $%s)" % (count, cost) | |
print "%s sets in shops (costing $%s)" % (count, cost) | |
### main | |
filename = sys.argv[1] | |
(columns, data) = parse_acm_csv(filename) | |
# report(data) | |
get_inventories(data) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment