Skip to content

Instantly share code, notes, and snippets.

@addomafi
Last active August 26, 2020 14:24
Show Gist options
  • Save addomafi/3cd021a134121f59cdae3f8f1a730940 to your computer and use it in GitHub Desktop.
Save addomafi/3cd021a134121f59cdae3f8f1a730940 to your computer and use it in GitHub Desktop.
import sys
import boto3, json
from datetime import datetime, timedelta
import dateutil.parser
from time import sleep
# pbpaste | jq -r '.[] | [.BucketName, .StorageType, .SizeBytes, .ObjectCount, .Timestamp] | @tsv'
# based on http://www.quora.com/Amazon-S3/What-is-the-fastest-way-to-measure-the-total-size-of-an-S3-bucket
# assumes you've already configured your access id & secret key
session = boto3.Session(profile_name='bkp-default', region_name='us-east-1')
cw = session.client('cloudwatch')
AWS_NAMESPACE = 'AWS/S3'
def getValue(key, list):
for item in list:
return item[key]
return ""
def getItem(key, list):
for item in list:
if item["Name"] == key:
return item["Value"]
return None
def get_bucket_size(base_date_str):
'''Given a bucket name, retrieve the size of each key in the bucket
and sum them together. Returns the size in gigabytes and
the number of objects.'''
base_date = datetime.strptime(base_date_str, '%Y-%m-%d').replace(hour=0, minute=0, second=0, microsecond=0)
response = cw.list_metrics(
Namespace=AWS_NAMESPACE,
MetricName='BucketSizeBytes'
)
dataSize = []
# For each metric get data
for metric in response['Metrics']:
bucketName = getItem('BucketName', metric['Dimensions'])
print("Getting metrics for bucket: {}".format(bucketName))
bucketSize = cw.get_metric_statistics(
Namespace=AWS_NAMESPACE,
MetricName='BucketSizeBytes',
Dimensions=metric['Dimensions'],
StartTime=base_date.isoformat(),
EndTime=(base_date + timedelta(days=1)).isoformat(),
Period=86400,
Statistics=[
'Average',
],
Unit='Bytes'
)
sleep(0.5)
bucketFileCount = cw.get_metric_statistics(
Namespace=AWS_NAMESPACE,
MetricName='NumberOfObjects',
Dimensions=[{
"Name": "BucketName",
"Value": bucketName
}, {
"Name": "StorageType",
"Value": "AllStorageTypes"
}],
StartTime=base_date.isoformat(),
EndTime=(base_date + timedelta(days=1)).isoformat(),
Period=86400,
Statistics=[
'Average',
],
Unit='Count'
)
dateIso8601 = ""
timestamp = getValue('Timestamp', bucketSize['Datapoints'])
if timestamp:
dateIso8601 = timestamp.isoformat()
dataSize.append({
"BucketName": bucketName,
"StorageType": getItem('StorageType', metric['Dimensions']),
"SizeBytes": getValue('Average', bucketSize['Datapoints']),
"ObjectCount": getValue('Average', bucketFileCount['Datapoints']),
"Timestamp": dateIso8601,
})
sleep(0.5)
return dataSize
if __name__ == '__main__':
bucket_sizes = get_bucket_size(sys.argv[1])
text_file = open("/Users/admartins/LocalDocuments/notes/s3_bucket_size_metrics_{}.log".format(sys.argv[1]), "w")
text_file.write(json.dumps(bucket_sizes, ensure_ascii=False))
text_file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment