Last active
July 26, 2024 11:45
-
-
Save ripiuk/29da12bbfef629dbe8b4c27cdf5891ec to your computer and use it in GitHub Desktop.
A tool for downloading a list of files from s3 using asyncio
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
import asyncio | |
import aiobotocore | |
import aiofiles | |
AWS_ACCESS_KEY_ID = '' # aws access key | |
AWS_SECRET_ACCESS_KEY = '' # aws secret key | |
READ_TIMEOUT = 500 | |
CONNECT_TIMEOUT = 500 | |
async def go(loop): | |
bucket = 'bucket-example' # your bucket name | |
folder = 'Example/The_folder' # path to folder in s3 | |
local_dir = '/home/some_user_name/data_from_s3/' # where do you want to download the files (local storage) | |
region_name = 'us-west-2' | |
keys = [] | |
start = time.time() | |
session = aiobotocore.get_session(loop=loop) | |
configuration = aiobotocore.config.AioConfig(read_timeout=READ_TIMEOUT, connect_timeout=CONNECT_TIMEOUT) | |
async with session.create_client('s3', region_name=region_name, | |
aws_secret_access_key=AWS_SECRET_ACCESS_KEY, | |
aws_access_key_id=AWS_ACCESS_KEY_ID config=configuration) as client: | |
paginator = client.get_paginator('list_objects') | |
async for result in paginator.paginate(Bucket=bucket, Prefix=folder): | |
for c in result.get('Contents', []): | |
keys.append(c.get('Key')) | |
downloaded_files_count = 0 | |
total_files = len(keys) | |
download_futures = [download_file(client, bucket, local_dir, key) for key in keys] | |
for download_future in asyncio.as_completed(download_futures): | |
await download_future | |
downloaded_files_count += 1 | |
print('{} of {} files downloaded'.format(downloaded_files_count, total_files)) | |
print(time.time() - start) | |
async def download_file(client, bucket: str, local_dir: str, key: str): | |
response = await client.get_object(Bucket=bucket, Key=key) | |
async with response['Body'] as stream: | |
downloaded_bytes = await stream.read() | |
download_path = local_dir + os.path.dirname(key) | |
if not os.path.exists(download_path): | |
os.makedirs(download_path) | |
async with aiofiles.open(local_dir + key, 'wb+') as file: | |
await file.write(downloaded_bytes) | |
loop = asyncio.get_event_loop() | |
loop.run_until_complete(go(loop)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment