oddskool · September 10, 2013 07:00
diff --git a/parse_aws_s3_billing.py b/parse_aws_s3_billing.py
 # -*- coding:utf-8 -*-
 '''
 Simplistic script to parse the detailed AWS billing CSV file.

 Script displays cost of S3 operations broken down per region, bucket and usage 
 type (either storage or network). It also sums up the amount of storage used per bucket. 
 Output is filtered wrt to costs < 1$.

 See http://docs.aws.amazon.com/awsaccountbilling/latest/about/programaccess.html for 
 how to set up programmatic access to your billing.

 Should be simple enough to enhance this script and use it for other AWS resources 
 (EC2, EMR, etc)

 @author: @oddskool <https://github.com/oddskool>
 @license: BSD 3 clauses
 '''

 import sys
 import csv
 from collections import defaultdict

 def add_type(d):
    if d['RecordType'] == 'UsageQuantity':
        return None
    for field in ('Cost', 'UsageQuantity'):
        d[field] = float(d[field])
    for field in ('LinkedAccountId', 'InvoiceID', 'RecordType', 'RecordId',
                  'PayerAccountId', 'SubscriptionId'):
        del d[field]
    return d

 def parse(stats, d):
    d = add_type(d)
    if not d:
        return
    if d['ProductName'] != 'Amazon Simple Storage Service':
        return
    stats[(d['AvailabilityZone'] or 'N/A')+' * '+d['ResourceId']+' * '+d['UsageType']]['Cost'] += d['Cost']
    stats[(d['AvailabilityZone'] or 'N/A')+' * '+d['ResourceId']+' * '+d['UsageType']]['UsageQuantity'] += d['UsageQuantity']

 if __name__ == '__main__':
    fd = open(sys.argv[1]) if len(sys.argv) > 1 else sys.stdin
    reader = csv.reader(fd, delimiter=',', quotechar='"')
    legend = None
    stats = defaultdict(lambda: defaultdict(int))
    for row in reader:
        if not legend:
            legend = row    
            continue
        d = dict(zip(legend, row))
        try:
            parse(stats, d)
        except Exception as e:
            print e
            print row
            print d
    data = [ (resource, cost_usage) for resource, cost_usage in 
             stats.iteritems() if cost_usage['Cost'] > 1.0 ]
    data.sort(key=lambda x:x[-1]['Cost'], reverse=True)
    for d in data:
        print "%50s : $%.2f - %.2f GB" % (d[0],d[1]['Cost'],d[1]['UsageQuantity'])
	# -- coding:utf-8 --
	'''
	Simplistic script to parse the detailed AWS billing CSV file.

	Script displays cost of S3 operations broken down per region, bucket and usage
	type (either storage or network). It also sums up the amount of storage used per bucket.
	Output is filtered wrt to costs < 1$.

	See http://docs.aws.amazon.com/awsaccountbilling/latest/about/programaccess.html for
	how to set up programmatic access to your billing.

	Should be simple enough to enhance this script and use it for other AWS resources
	(EC2, EMR, etc)

	@author: @oddskool <https://github.com/oddskool>
	@license: BSD 3 clauses
	'''

	import sys
	import csv
	from collections import defaultdict

	def add_type(d):
	if d['RecordType'] == 'UsageQuantity':
	return None
	for field in ('Cost', 'UsageQuantity'):
	d[field] = float(d[field])
	for field in ('LinkedAccountId', 'InvoiceID', 'RecordType', 'RecordId',
	'PayerAccountId', 'SubscriptionId'):
	del d[field]
	return d

	def parse(stats, d):
	d = add_type(d)
	if not d:
	return
	if d['ProductName'] != 'Amazon Simple Storage Service':
	return
	stats[(d['AvailabilityZone'] or 'N/A')+' * '+d['ResourceId']+' * '+d['UsageType']]['Cost'] += d['Cost']
	stats[(d['AvailabilityZone'] or 'N/A')+' * '+d['ResourceId']+' * '+d['UsageType']]['UsageQuantity'] += d['UsageQuantity']

	if __name__ == '__main__':
	fd = open(sys.argv[1]) if len(sys.argv) > 1 else sys.stdin
	reader = csv.reader(fd, delimiter=',', quotechar='"')
	legend = None
	stats = defaultdict(lambda: defaultdict(int))
	for row in reader:
	if not legend:
	legend = row
	continue
	d = dict(zip(legend, row))
	try:
	parse(stats, d)
	except Exception as e:
	print e
	print row
	print d
	data = [ (resource, cost_usage) for resource, cost_usage in
	stats.iteritems() if cost_usage['Cost'] > 1.0 ]
	data.sort(key=lambda x:x[-1]['Cost'], reverse=True)
	for d in data:
	print "%50s : $%.2f - %.2f GB" % (d[0],d[1]['Cost'],d[1]['UsageQuantity'])
No results found