Skip to content

Instantly share code, notes, and snippets.

@kwsp
Created February 21, 2021 08:05
Show Gist options
  • Select an option

  • Save kwsp/20a5e29f9768b60f0e031006ee6c7dbe to your computer and use it in GitHub Desktop.

Select an option

Save kwsp/20a5e29f9768b60f0e031006ee6c7dbe to your computer and use it in GitHub Desktop.
Asycio bulk file download
#!/usr/bin/env python3
"""Asynchronously download a list of links."""
from typing import Iterable
import asyncio
import logging
import pathlib
import sys
import aiofiles
from aiohttp import ClientSession
logging.basicConfig(
format="%(asctime)s %(levelname)s:%(name)s: %(message)s",
level=logging.DEBUG,
datefmt="%H:%M:%S",
stream=sys.stderr,
)
logger = logging.getLogger("adownload")
logging.getLogger("chardet.charsetprober").disabled = True
async def download_link(
file: pathlib.Path, url: str, session: ClientSession, chunk_size=65536, **kwargs
) -> None:
"""GET request wrapper to download file
kwargs are passed to `session.request()`.
"""
resp = await session.request(method="GET", url=url, **kwargs)
resp.raise_for_status()
logger.info("Got response [%s] for URL: %s", resp.status, url)
async with aiofiles.open(file, "wb") as fd:
while True:
chunk = await resp.content.read(chunk_size)
print(len(chunk))
if not chunk:
break
await fd.write(chunk)
async def bulk_download_and_write(
outdir: pathlib.Path, urls: Iterable[str], **kwargs
) -> None:
"""Concurrently download multiple `urls` to `outdir`."""
async with ClientSession() as session:
tasks = []
for url in urls:
fname = url.split("/")[-1]
tasks.append(
download_link(file=(outdir / fname), url=url, session=session, **kwargs)
)
await asyncio.gather(*tasks)
def bulk_download(outdir: pathlib.Path, urls: Iterable[str]) -> None:
"""Sync interface to asynchronously download multiple `urls` to `outdir`."""
assert sys.version_info >= (3, 7), "Asyncio requires Python 3.7+."
asyncio.run(bulk_download_and_write(outdir=outdir, urls=urls))
if __name__ == "__main__":
assert sys.version_info >= (3, 7), "Script requires Python 3.7+."
here = pathlib.Path(__file__).parent
with open(here.joinpath("links.txt")) as infile:
urls = set(map(str.strip, infile))
outpath = here.joinpath("new_dir")
outpath.mkdir()
bulk_download(outpath, urls)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment