Skip to content

Instantly share code, notes, and snippets.

@ivanlonel
Last active April 1, 2021 00:32
Show Gist options
  • Select an option

  • Save ivanlonel/b127cf717fedf76c0316bb0930ad184f to your computer and use it in GitHub Desktop.

Select an option

Save ivanlonel/b127cf717fedf76c0316bb0930ad184f to your computer and use it in GitHub Desktop.
Renaming files in AWS S3
import boto3
import json
import logging
import argparse
import itertools
# Bucket.delete_objects allows a maximum of 1000 objects to be deleted in a single request
def break_in_chunks(iterable, max_size):
if max_size == 0:
yield iterable
return
sentinel = object()
chunks = itertools.zip_longest(*[iter(iterable)]*max_size, fillvalue=sentinel)
try:
chunk = next(chunks)
except StopIteration: # iterable was empty or max_size < 0.
return
for lookahead in chunks:
yield chunk
chunk = lookahead
# Filter last chunk to remove sentinel values filled by itertools.zip_longest
yield (item for item in chunk if item is not sentinel)
def copy_objects(dest_bucket, src_bucket_name, prefix, transform_function):
for obj in dest_bucket.objects.filter(Prefix=prefix):
original_key = obj.key
modified_key = transform_function(original_key)
if original_key != modified_key:
#logging.debug((original_key, modified_key))
try:
dest_bucket.copy(CopySource={'Bucket': src_bucket_name, 'Key': original_key}, Key=modified_key)
except Exception as e:
logging.error(f'Error copying object {original_key} to {modified_key}: {e!r}')
else:
yield {'Key': original_key}
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Batch rename files in S3 buckets.")
parser.add_argument('-a', '--access-key-id', required=True, help='AWS_ACCESS_KEY_ID')
parser.add_argument('-s', '--secret-access-key', required=True, help='AWS_SECRET_ACCESS_KEY')
parser.add_argument('-r', '--region-name', help='AWS_REGION_NAME')
parser.add_argument('-p', '--prefix', required=True, help='Common prefix of the files to be renamed')
parser.add_argument('-b', '--bucket-name', required=True, help='Source bucket')
parser.add_argument('-d', '--dest-bucket-name', help='Destination bucket, if different from source')
parser.add_argument('-t', '--transform-function', type=eval, required=True, help='''
Transform function that receives the original file name and returns the new name.
The string passed will be fed to the eval() builtin function.
Valid examples are "lambda s: s.upper()" and "exec('import urllib') or urllib.parse.unquote".
Yes, this is highly exploitable.
''')
args = parser.parse_args()
if args.dest_bucket_name is None:
args.dest_bucket_name = args.bucket_name
session = boto3.Session(
aws_access_key_id=args.access_key_id,
aws_secret_access_key=args.secret_access_key,
region_name=args.region_name
)
client = session.resource('s3')
bucket = client.Bucket(args.dest_bucket_name)
for batch in break_in_chunks(copy_objects(bucket, args.bucket_name, args.prefix, args.transform_function), 1000):
print(json.dumps(bucket.delete_objects(Delete={'Objects': list(batch)}), indent='\t'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment