Skip to content

Instantly share code, notes, and snippets.

@blech
Last active December 15, 2015 02:49
Show Gist options
  • Save blech/5189854 to your computer and use it in GitHub Desktop.
Save blech/5189854 to your computer and use it in GitHub Desktop.
Lego expenses
#!/usr/bin/python
import csv
import datetime
import sys
from decimal import *
data = []
csvfile = open(sys.argv[1], 'r')
csvreader = csv.reader(csvfile)
columns = csvreader.next()
# print columns
for row in csvreader:
if not row:
continue
truerow = [x if x != "Yes" else True for x in row]
rowdict = dict(zip(columns, truerow))
# del(rowdict[''])
try:
[d,m,y] = [int(x) for x in rowdict['DateAcquired'].split('/')]
except:
if rowdict['DateAcquired'] != '':
print "Datetime not parsed: '%s'" % rowdict['DateAcquired']
[d,m,y] = [None,None,None]
pass
try:
rowdict['Date'] = datetime.date(day=d, month=m, year=y)
except:
# print "Datetime not valid: %s/%s/%s" % (d, m, y)
pass
if d and m and y and not 'Date' in rowdict:
try:
rowdict['Date'] = datetime.date(year=d, month=m, day=y)
# print "... valid in reverse format"
except:
print "Datetime not valid: %s-%s-%s" % (d, m, y)
pass
rowdict['Online'] = False
rowdict['Used'] = False
rowdict['PricePaidDec'] = Decimal('0.00')
rowdict['AdditionalPricePaidDec'] = Decimal('0.00')
if (rowdict['PricePaid']):
rowdict['PricePaidDec'] = Decimal(rowdict['PricePaid'])
if (rowdict['AdditionalPricePaid']):
rowdict['AdditionalPricePaidDec'] = Decimal(rowdict['AdditionalPricePaid'])
rowdict['PriceTotal'] = rowdict['PricePaidDec']+rowdict['AdditionalPricePaidDec']
AF = rowdict['AcquiredFrom']
if 'ebay' in AF or 'bricklink' in AF:
# print "Online/Used vendor '%s'" % rowdict['AcquiredFrom']
rowdict['Online'] = True
rowdict['Used'] = True
if 'online' in AF:
# print "Online vendor '%s'" % rowdict['AcquiredFrom']
rowdict['Online'] = True
if ' (' in AF:
rowdict['Chain'] = AF[:AF.index(' (')]
else:
rowdict['Chain'] = AF
data.append(rowdict)
print "Have %s sets" % len(data)
print "Have %s different sets" % len(set([x['SetNumber'] for x in data]))
total_price_paid = sum([x['PricePaidDec'] for x in data])
total_additional_price_paid = sum([x['AdditionalPricePaidDec'] for x in data])
total_total = total_price_paid+total_additional_price_paid
print "Total cost $%s + $%s (total $%s)" % (total_price_paid, total_additional_price_paid,
total_total)
vendors = set([x['Chain'] for x in data])
print "\nFrom %s vendors:" % len(vendors)
for vendor in vendors:
vendor_sets = [x for x in data if x['Chain'] == vendor]
vendor_total = sum([x['PriceTotal'] for x in vendor_sets])
print " %s: %s sets costing $%s" % (vendor, len(vendor_sets), vendor_total)
branches = set([x['AcquiredFrom'] for x in vendor_sets])
if len(branches) > 1:
for branch in branches:
branch_sets = [x for x in vendor_sets if x['AcquiredFrom'] == branch]
branch_total = sum([x['PriceTotal'] for x in branch_sets])
print " %s: %s sets costing $%s" % (branch, len(branch_sets), branch_total)
months_text = sorted(set(['%4i/%02i' % (x['Date'].year, x['Date'].month) for x in data if 'Date' in x]))
months_tuples = sorted(set([(x['Date'].year, x['Date'].month) for x in data if 'Date' in x]))
months = dict(zip(months_text, months_tuples))
print "\nBought over %s months" % len(months)
for month in sorted(months):
month_sets = [x for x in data if 'Date' in x and
x['Date'].year == months[month][0] and
x['Date'].month == months[month][1] ]
month_sets_bought = len(month_sets)
month_total = sum([x['PriceTotal'] for x in month_sets])
print " %s: %s sets costing $%s" % (month, month_sets_bought, month_total)
new_sets = [x for x in data if not x['Used']]
used_sets = [x for x in data if x['Used']]
online_sets = [x for x in data if x['Online']]
shop_sets = [x for x in data if not x['Online']]
count = len(new_sets)
cost = sum(x['PriceTotal'] for x in new_sets) # TODO routine
print "\n%s sets new (costing $%s)" % (count, cost)
count = len(used_sets)
cost = sum(x['PriceTotal'] for x in used_sets) # TODO routine
print "%s sets used (costing $%s)" % (count, cost)
count = len(online_sets)
cost = sum(x['PriceTotal'] for x in online_sets) # TODO routine
print "\n%s sets online (costing $%s)" % (count, cost)
count = len(shop_sets)
cost = sum(x['PriceTotal'] for x in shop_sets) # TODO routine
print "%s sets in shops (costing $%s)" % (count, cost)
# print data
#!/usr/bin/python
import csv
import datetime
import decimal
import json
import os
import sys
import urllib2
from collections import defaultdict
from warnings import warn
### JSON utility
class CustomEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, decimal.Decimal):
return float(o)
if isinstance(o, datetime.date):
return str(o)
return super(CustomEncoder, self).default(o)
### ACM parser
def parse_acm_csv(filename):
data = []
csvfile = open(sys.argv[1], 'r')
csvreader = csv.reader(csvfile)
columns = csvreader.next()
for row in csvreader:
if not row:
continue
rowdict = parse_acm_csv_row(columns, row)
data.append(rowdict)
return (columns, data)
def parse_acm_csv_row(columns, row):
truerow = [x if x != "Yes" else True for x in row]
rowdict = dict(zip(columns, truerow))
del(rowdict[''])
### parse dates
try:
[y,m,d] = [int(x) for x in rowdict['DateAcquired'].split('-')]
except:
if rowdict['DateAcquired'] != '':
warn("Datetime not parsed: '%s'" % rowdict['DateAcquired'])
[d,m,y] = [None,None,None]
pass
try:
rowdict['Date'] = datetime.date(day=d, month=m, year=y)
except:
# print "Datetime not valid: %s/%s/%s" % (d, m, y)
pass
if d and m and y and not 'Date' in rowdict:
try:
rowdict['Date'] = datetime.date(year=d, month=m, day=y)
# print "... valid in reverse format"
except:
warn("Datetime not valid: %s-%s-%s" % (d, m, y))
pass
### set up additional fields
rowdict['Online'] = False
rowdict['Used'] = False
rowdict['PricePaidDec'] = decimal.Decimal('0.00')
rowdict['AdditionalPricePaidDec'] = decimal.Decimal('0.00')
if (rowdict['PricePaid']):
rowdict['PricePaidDec'] = decimal.Decimal(rowdict['PricePaid'])
if (rowdict['AdditionalPricePaid']):
rowdict['AdditionalPricePaidDec'] = decimal.Decimal(rowdict['AdditionalPricePaid'])
rowdict['PriceTotal'] = rowdict['PricePaidDec']+rowdict['AdditionalPricePaidDec']
AF = rowdict['AcquiredFrom']
### parse vendor information to chains / online
if 'eBay' in AF or 'bricklink' in AF:
# print "Online/Used vendor '%s'" % rowdict['AcquiredFrom']
rowdict['Online'] = True
rowdict['Used'] = True
if 'online' in AF:
# print "Online vendor '%s'" % rowdict['AcquiredFrom']
rowdict['Online'] = True
if ' (' in AF:
rowdict['Chain'] = AF[:AF.index(' (')]
else:
rowdict['Chain'] = AF
return rowdict
### data manipulation
def get_totals(data):
totals = dict()
totals['owned'] = len(data)
totals['distinct'] = len(set([x['SetNumber'] for x in data]))
totals['paid'] = sum([x['PricePaidDec'] for x in data])
totals['additional'] = sum([x['AdditionalPricePaidDec'] for x in data])
totals['total'] = totals['paid']+totals['additional']
return totals
def get_segments(data):
# TODO DSL/metalanguage for name/condition mapping
segments = defaultdict(dict)
new = [x for x in data if not x['Used']]
segments['new'] = calculate_segment(new)
used = [x for x in data if x['Used']]
segments['used'] = calculate_segment(used)
online = [x for x in data if x['Online']]
segments['online'] = calculate_segment(online)
shop = [x for x in data if not x['Online']]
segments['shop'] = calculate_segment(shop)
return segments
def calculate_segment(set_list):
detail = dict()
detail['sets'] = set_list
detail['count'] = len(set_list)
detail['cost'] = sum(x['PriceTotal'] for x in set_list)
return detail
def get_vendor_details(data):
vendor_list = set([x['Chain'] for x in data])
vendors = defaultdict(dict)
for vendor in vendor_list:
vendor_sets = [x for x in data if x['Chain'] == vendor]
vendors[vendor]['sets'] = vendor_sets
vendors[vendor]['total'] = sum([x['PriceTotal'] for x in vendor_sets])
# print " %s: %s sets costing $%s" % (vendor, len(vendor_sets), vendor_total)
branches = set([x['AcquiredFrom'] for x in vendor_sets])
if len(branches) > 1:
vendors[vendor]['branches'] = dict()
for branch in branches:
branch_sets = [x for x in vendor_sets if x['AcquiredFrom'] == branch]
vendors[vendor]['branches']['sets'] = branch_sets
vendors[vendor]['branches']['total'] = sum([x['PriceTotal'] for x in branch_sets])
return vendors
def get_month_details(data):
months = defaultdict(dict)
months_text = sorted(set(['%4i/%02i' % (x['Date'].year, x['Date'].month) for x in data if 'Date' in x]))
months_tuples = sorted(set([(x['Date'].year, x['Date'].month) for x in data if 'Date' in x]))
months_list = dict(zip(months_text, months_tuples))
# print "\nBought over %s months" % len(months)
for month in sorted(months_list):
month_sets = [x for x in data if 'Date' in x and
x['Date'].year == months_list[month][0] and
x['Date'].month == months_list[month][1] ]
months[month]['sets'] = month_sets
months[month]['count'] = len(month_sets)
months[month]['cost'] = sum([x['PriceTotal'] for x in month_sets])
return months
### set information
def get_inventories(data):
set_list = set([x['SetNumber'] for x in data])
if not os.path.exists('inventories'):
os.makedirs('inventories')
for set_number in sorted(set_list):
if not os.path.exists('inventories/%s.csv' % set_number):
fetch_inventory_csv(set_number)
sys.exit()
def fetch_inventory_csv(set_number):
inventory_url = "http://brickset.com/exportscripts/inventory/%s" % set_number
inventory_file = "inventories/%s.csv" % set_number
# naughty
i_am_ie = { 'User-agent' : "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)" }
req = urllib2.Request(inventory_url, headers=i_am_ie)
resp = urllib2.urlopen(req)
with open(inventory_file, "wb") as local_file:
local_file.write(resp.read())
print "Fetched %s.csv" % set_number
### output
def report(data):
totals = get_totals(data)
months = get_month_details(data)
print "Have %s sets" % totals['owned']
print "Have %s different sets" % totals['distinct']
print "Total cost $%s + $%s (total $%s)" % (totals['paid'], totals['additional'], totals['total'])
vendors = get_vendor_details(data)
print "\nFrom %s vendors:" % len(vendors)
for vendor in sorted(vendor.keys()):
print vendor
# TODO reinstate months (years?)
segments = get_segments(data)
print "\n%s sets new (costing $%s)" % (segment['new']['count'], segment['new']['cost'])
print "%s sets used (costing $%s)" % (count, cost)
print "\n%s sets online (costing $%s)" % (count, cost)
print "%s sets in shops (costing $%s)" % (count, cost)
### main
filename = sys.argv[1]
(columns, data) = parse_acm_csv(filename)
# report(data)
get_inventories(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment