Created
November 21, 2022 19:15
-
-
Save alisade/1035102391bc5811ea155e26cb5a70a5 to your computer and use it in GitHub Desktop.
merge existing files on s3 without downloading them first using multi part copy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Using s3 copy with multipart uploads for efficient file merging | |
# limitation: min part size is 5MB | |
import boto3 | |
from datetime import datetime | |
client = boto3.client('s3') | |
now = datetime.now() | |
timestamp = now.strftime("%Y%m%d_%H%M%S") | |
bucket = "bucket-testing-dev" | |
key = f'other/FILE_{timestamp}' | |
files = [] | |
for item in client.list_objects_v2(Bucket=bucket, Prefix="other/test/")['Contents']: | |
files.append(item['Key']) | |
r = client.create_multipart_upload(Bucket=bucket, Key=key) | |
upload_id = r['UploadId'] | |
mp = {} | |
mp['Parts'] = [] | |
for i, file in enumerate(files): | |
r = client.upload_part_copy(Bucket=bucket, Key=key, CopySource={'Bucket': bucket, 'Key': file}, PartNumber=i+1, UploadId=upload_id) | |
mp['Parts'].append(r['CopyPartResult']) | |
mp['Parts'][i]['PartNumber'] = i + 1 | |
del mp['Parts'][i]['LastModified'] | |
client.complete_multipart_upload(Bucket=bucket, Key=key, UploadId=upload_id, MultipartUpload=mp) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Please work