Skip to content

Instantly share code, notes, and snippets.

@dlinsley
Last active June 27, 2022 06:25
Show Gist options
  • Save dlinsley/7670b1e14c936182bd09e9114fde63d7 to your computer and use it in GitHub Desktop.
Save dlinsley/7670b1e14c936182bd09e9114fde63d7 to your computer and use it in GitHub Desktop.
aws s3 bucket change storage class by object size
import boto3
import argparse
import string
parser = argparse.ArgumentParser('Change storage class of s3 objects')
parser.add_argument('--bucket', dest='myBucket', default='yourBucketName', help='S3 Bucket to search')
parser.add_argument('--from', dest='fromPath', default='', help='s3 path to start search from')
cliArgs = parser.parse_args()
myBucket = cliArgs.myBucket
# each list_objects_v2 request will return up to 1000 objects.
# We will loop for every 1000, make another list_objects_v2 until end of bucket is reached
lastReqLength = 1000
# at the end of each 1000, know the last key so we can get the next 1000 after it
lastKey = cliArgs.fromPath
change = False
#counters
whereItShouldBe = 0
movedToStandard = 0
movedToIA = 0
#######################################
def toStandard(client, bucket, key):
copy_source = {'Bucket': bucket,'Key': key}
client.copy(copy_source, bucket, key, ExtraArgs = {'StorageClass': 'STANDARD','MetadataDirective': 'COPY'})
def toInfrequent(client, bucket, key):
copy_source = {'Bucket': bucket,'Key': key}
client.copy(copy_source, bucket, key, ExtraArgs = {'StorageClass': 'STANDARD_IA','MetadataDirective': 'COPY'})
#########################################
s3 = boto3.client('s3')
if change:
print("Changing all objects in bucket with STANDARD Storage that are over 128KB to STANDARD_IA")
print("Changing all objects in bucket with STANDARD_IA Storage that are under 128KB to STANDARD")
else:
print("Objects with STANDARD Storage that are over 128KB")
print("And Objects with STANDARD_IA Storage that are under 128KB")
while lastReqLength == 1000:
if (lastKey == ""):
print('Query from root')
myObjects = s3.list_objects_v2(Bucket=myBucket)
else:
print('Query from %s' % lastKey)
myObjects = s3.list_objects_v2(Bucket=myBucket,StartAfter=lastKey)
lastReqLength = len(myObjects['Contents'])
for obj in myObjects['Contents']:
lastKey = obj['Key']
thisKey = obj['Key']
thisSize = obj['Size']
if obj['StorageClass'] == 'STANDARD' and thisSize > 127999:
#go to IA
movedToIA += 1
if change:
print('[STANDARD] -> [STANDARD_IA] %d %s' % (thisSize, thisKey))
toInfrequent(s3, myBucket, thisKey)
else:
print('[STANDARD] !!TOO BIG!! %d %s' % (thisSize, thisKey))
elif obj['StorageClass'] == 'STANDARD_IA' and thisSize < 128000:
#go to standard
movedToStandard += 1
if change:
print('[STANDARD_IA] -> [STANDARD] %d %s' % (thisSize, thisKey))
toStandard(s3, myBucket, thisKey)
else:
print('[STANDARD_IA] !!TOO SMALL!! %d %s' % (thisSize, thisKey))
else:
whereItShouldBe += 1
print('')
print('Objects that were where they should be: %d' % (whereItShouldBe))
if change:
print('Objects changed to STANDARD: %d' % (movedToStandard))
print('Objects changed to STANDARD_IA: %d' % (movedToIA))
else:
print('Objects that should be changed to STANDARD: %d' % (movedToStandard))
print('Objects that should be changed to STANDARD_IA: %d' % (movedToIA))
print('... The End.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment