Skip to content

Instantly share code, notes, and snippets.

@Hammer2900
Forked from harrisont/download_multiple.py
Created September 28, 2016 17:51
Show Gist options
  • Save Hammer2900/2b5da5c08f6406ab49ddb02b0c5ae9f7 to your computer and use it in GitHub Desktop.
Save Hammer2900/2b5da5c08f6406ab49ddb02b0c5ae9f7 to your computer and use it in GitHub Desktop.
Use asyncio and aiohttp to asynchronously download multiple files at once and handle the responses as they finish
import asyncio
from contextlib import closing
import aiohttp
async def download_file(session: aiohttp.ClientSession, url: str):
async with session.get(url) as response:
assert response.status == 200
# For large files use response.content.read(chunk_size) instead.
return url, await response.read()
@asyncio.coroutine
def download_multiple(session: aiohttp.ClientSession):
urls = (
'http://cnn.com',
'http://nytimes.com',
'http://google.com',
'http://leagueoflegends.com',
'http://python.org',
)
download_futures = [download_file(session, url) for url in urls]
print('Results')
for download_future in asyncio.as_completed(download_futures):
result = yield from download_future
print('finished:', result)
return urls
def main():
with closing(asyncio.get_event_loop()) as loop:
with aiohttp.ClientSession() as session:
result = loop.run_until_complete(download_multiple(session))
print('finished:', result)
main()
import asyncio
from contextlib import closing
import aiohttp
CHUNK_SIZE = 4 * 1024 # 4 KB
def coroutine(func):
def start(*args, **kwargs):
cr = func(*args, **kwargs)
next(cr)
return cr
return start
@coroutine
def chunk_printer(url: str):
while True:
chunk = yield
print('got chunk: {}: {} bytes'.format(url, len(chunk)))
async def download_file(session: aiohttp.ClientSession, url: str, sink):
async with session.get(url) as response:
assert response.status == 200
while True:
chunk = await response.content.read(CHUNK_SIZE)
if not chunk:
break
sink.send(chunk)
return url
@asyncio.coroutine
def download_multiple(session: aiohttp.ClientSession):
urls = (
'http://cnn.com',
'http://nytimes.com',
'http://google.com',
'http://leagueoflegends.com',
'http://python.org',
)
download_futures = [download_file(session, url, sink=chunk_printer(url)) for url in urls]
print('Results')
for download_future in asyncio.as_completed(download_futures):
result = yield from download_future
print('finished:', result)
return urls
def main():
with closing(asyncio.get_event_loop()) as loop:
with aiohttp.ClientSession() as session:
result = loop.run_until_complete(download_multiple(session))
print('finished:', result)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment