Last active
August 26, 2020 14:24
-
-
Save addomafi/3cd021a134121f59cdae3f8f1a730940 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import boto3, json | |
from datetime import datetime, timedelta | |
import dateutil.parser | |
from time import sleep | |
# pbpaste | jq -r '.[] | [.BucketName, .StorageType, .SizeBytes, .ObjectCount, .Timestamp] | @tsv' | |
# based on http://www.quora.com/Amazon-S3/What-is-the-fastest-way-to-measure-the-total-size-of-an-S3-bucket | |
# assumes you've already configured your access id & secret key | |
session = boto3.Session(profile_name='bkp-default', region_name='us-east-1') | |
cw = session.client('cloudwatch') | |
AWS_NAMESPACE = 'AWS/S3' | |
def getValue(key, list): | |
for item in list: | |
return item[key] | |
return "" | |
def getItem(key, list): | |
for item in list: | |
if item["Name"] == key: | |
return item["Value"] | |
return None | |
def get_bucket_size(base_date_str): | |
'''Given a bucket name, retrieve the size of each key in the bucket | |
and sum them together. Returns the size in gigabytes and | |
the number of objects.''' | |
base_date = datetime.strptime(base_date_str, '%Y-%m-%d').replace(hour=0, minute=0, second=0, microsecond=0) | |
response = cw.list_metrics( | |
Namespace=AWS_NAMESPACE, | |
MetricName='BucketSizeBytes' | |
) | |
dataSize = [] | |
# For each metric get data | |
for metric in response['Metrics']: | |
bucketName = getItem('BucketName', metric['Dimensions']) | |
print("Getting metrics for bucket: {}".format(bucketName)) | |
bucketSize = cw.get_metric_statistics( | |
Namespace=AWS_NAMESPACE, | |
MetricName='BucketSizeBytes', | |
Dimensions=metric['Dimensions'], | |
StartTime=base_date.isoformat(), | |
EndTime=(base_date + timedelta(days=1)).isoformat(), | |
Period=86400, | |
Statistics=[ | |
'Average', | |
], | |
Unit='Bytes' | |
) | |
sleep(0.5) | |
bucketFileCount = cw.get_metric_statistics( | |
Namespace=AWS_NAMESPACE, | |
MetricName='NumberOfObjects', | |
Dimensions=[{ | |
"Name": "BucketName", | |
"Value": bucketName | |
}, { | |
"Name": "StorageType", | |
"Value": "AllStorageTypes" | |
}], | |
StartTime=base_date.isoformat(), | |
EndTime=(base_date + timedelta(days=1)).isoformat(), | |
Period=86400, | |
Statistics=[ | |
'Average', | |
], | |
Unit='Count' | |
) | |
dateIso8601 = "" | |
timestamp = getValue('Timestamp', bucketSize['Datapoints']) | |
if timestamp: | |
dateIso8601 = timestamp.isoformat() | |
dataSize.append({ | |
"BucketName": bucketName, | |
"StorageType": getItem('StorageType', metric['Dimensions']), | |
"SizeBytes": getValue('Average', bucketSize['Datapoints']), | |
"ObjectCount": getValue('Average', bucketFileCount['Datapoints']), | |
"Timestamp": dateIso8601, | |
}) | |
sleep(0.5) | |
return dataSize | |
if __name__ == '__main__': | |
bucket_sizes = get_bucket_size(sys.argv[1]) | |
text_file = open("/Users/admartins/LocalDocuments/notes/s3_bucket_size_metrics_{}.log".format(sys.argv[1]), "w") | |
text_file.write(json.dumps(bucket_sizes, ensure_ascii=False)) | |
text_file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment