Created
July 6, 2016 09:16
-
-
Save foobarna/913a84136a4357bbdc4112c924c0c41e to your computer and use it in GitHub Desktop.
transfer file from source url to Amazon S3 with aiohttp and boto3/botocore client
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import aiohttp | |
import boto3 | |
from contextlib import closing | |
file_url = 'http://speedtest.wdc01.softlayer.com/downloads/test100.zip' | |
bucket = 'some.bucket' | |
path = 'path/to/folder/' | |
kb = 1024 # bytes | |
mb = 1024 * kb | |
CHUNK_SIZE = 8 * mb | |
s3 = boto3.client('s3') | |
async def transfer_file(session, url): | |
fname = 'aiohttp_{}'.format(url.split('/')[-1]) | |
key = '{}{}'.format(path, fname) | |
mpu = s3.create_multipart_upload(Bucket=bucket, Key=key) | |
print('S3 multipart upload created with UploadId: {}', mpu['UploadId']) | |
part_no = 0 | |
parts = { | |
'Parts': [] | |
} | |
async with session.get(url) as response: | |
assert response.status == 200 | |
data = bytearray() | |
data_to_read = True | |
while data_to_read: | |
red = 0 | |
data.clear() | |
while red < CHUNK_SIZE: | |
chunk = await response.content.readany() | |
if not chunk: | |
data_to_read = False | |
break | |
data.extend(chunk) | |
red += len(chunk) | |
part_no +=1 | |
part = s3.upload_part(Body=data, | |
Bucket=bucket, | |
Key=key, | |
PartNumber=part_no, | |
UploadId=mpu['UploadId']) | |
parts['Parts'].append({ | |
'ETag': part['ETag'], | |
'PartNumber': part_no | |
}) | |
del data | |
s3.complete_multipart_upload(Bucket=bucket, | |
Key=key, | |
MultipartUpload=parts, | |
UploadId=mpu['UploadId']) | |
loop = asyncio.get_event_loop() | |
with closing(aiohttp.ClientSession(loop=loop)) as session: | |
loop.run_until_complete(transfer_file(session, file_url)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@foobarna ty for this.
Why
del data
in line 53? Won't it be automatically garbage collected once the function call ends?Wouldn't
s3.upload_part
ands3.complete_multipart_upload
block the event loop? I think they perform blocking HTTP requests under the hood. As a workaround, they could be put in an executor.