Last active
February 5, 2025 15:57
-
-
Save altipard/5d9735c446ddf7c2fcb8 to your computer and use it in GitHub Desktop.
Download multiple files using the asyncio module from python3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| __author__ = "Daniel Altiparmak ([email protected])" | |
| __copyright__ = "Copyright (C) 2015 Daniel Altiparmak" | |
| __license__ = "GPL 3.0" | |
| import asyncio | |
| import aiohttp | |
| import tqdm | |
| import string | |
| import random | |
| # get content and write it to file | |
| def write_to_file(filename, content): | |
| f = open(filename, 'wb') | |
| f.write(content) | |
| f.close() | |
| # a helper coroutine to perform GET requests: | |
| @asyncio.coroutine | |
| def get(*args, **kwargs): | |
| response = yield from aiohttp.request('GET', *args, **kwargs) | |
| return (yield from response.read_and_close()) | |
| @asyncio.coroutine | |
| def download_file(url): | |
| # this routine is protected by a semaphore | |
| with (yield from r_semaphore): | |
| content = yield from asyncio.async(get(url)) | |
| # create random filename | |
| length = 10 | |
| file_string = ''.join(random.choice( | |
| string.ascii_lowercase + string.digits) for _ in range(length) | |
| ) | |
| filename = '{}.png'.format(file_string) | |
| write_to_file(filename, content) | |
| ''' | |
| make nice progressbar | |
| install it by using `pip install tqdm` | |
| ''' | |
| @asyncio.coroutine | |
| def wait_with_progressbar(coros): | |
| for f in tqdm.tqdm(asyncio.as_completed(coros), total=len(coros)): | |
| yield from f | |
| images = ['http://lorempixel.com/1920/1920/' for i in range(100)] | |
| # avoid to many requests(coroutines) the same time. | |
| # limit them by setting semaphores (simultaneous requests) | |
| r_semaphore = asyncio.Semaphore(10) | |
| coroutines = [download_file(url) for url in images] | |
| eloop = asyncio.get_event_loop() | |
| #eloop.run_until_complete(asyncio.wait(coroutines)) | |
| eloop.run_until_complete(wait_with_progressbar(coroutines)) | |
| eloop.close() | |
This was a nice, simple example that shows up fairly high on Google results - unfortunately it's not using modern asyncio syntax. Here's the exact same script that works with newer versions of Python:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = "Daniel Altiparmak ([email protected]); Updated 2025 Ryan Meyers (github.com/sreyemnayr)"
__copyright__ = "Copyright (C) 2015 Daniel Altiparmak; 2025 Daniel Altiparmak & Ryan Meyers"
__license__ = "GPL 3.0"
import asyncio
import aiohttp
import tqdm
import string
import random
r_semaphore = asyncio.Semaphore(20)
def write_to_file(filename, content):
f = open(filename, "wb")
f.write(content)
f.close()
# a helper coroutine to perform GET requests:
async def get(*args, **kwargs):
async with aiohttp.ClientSession() as session:
async with session.get(*args, **kwargs) as response:
return await response.read()
async def download_file(url):
# this routine is protected by a semaphore
async with r_semaphore:
# create random filename
length = 10
file_string = ''.join(random.choice(
string.ascii_lowercase + string.digits) for _ in range(length)
)
filename = '{}.png'.format(file_string)
# update tqdm progress bar
content = await get(url)
write_to_file(filename, content)
'''
make nice progressbar
install it by using `pip install tqdm`
'''
async def wait_with_progressbar(coros):
for f in tqdm.tqdm(asyncio.as_completed(coros), total=len(coros)):
await f
coroutines = [download_file('http://lorempixel.com/1920/1920/') for _ in range(100)]
eloop = asyncio.get_event_loop()
eloop.run_until_complete(wait_with_progressbar(coroutines))
eloop.close()
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hi, i'm getting errors of the form
python 3.6.5, ubuntu.
I am not very familiar with async -- does this make sense?