Skip to content

Instantly share code, notes, and snippets.

@motebaya
Last active August 26, 2023 03:40
Show Gist options
  • Save motebaya/810f82acdcff69b09ed46e3c418a5c2b to your computer and use it in GitHub Desktop.
Save motebaya/810f82acdcff69b09ed46e3c418a5c2b to your computer and use it in GitHub Desktop.
async multi threading downloader with rich progress bar
#!/usr/bin/env python3
# @credit: @gist.github.com/motebaya
# @modify: 2023-08-21 10:48:13.274582300 +0700
# @gist: simple async multi threading python for download batch file
# @docs:
# https://rich.readthedocs.io/en/latest/progress.html
# https://github.com/Textualize/rich/discussions/1102
# https://testfiledownload.com/
# https://stackoverflow.com/questions/64282309/aiohttp-download-large-list-of-pdf-files
# https://docs.python.org/3/library/functools.html
from rich.progress import (
Progress,
SpinnerColumn,
BarColumn,
TextColumn,
DownloadColumn,
TransferSpeedColumn,
TimeRemainingColumn
)
from rich.console import Console
from aiohttp import ClientSession
from concurrent.futures import ThreadPoolExecutor
from functools import partial
import aiofiles, asyncio, os, time
async def async_download(**kwargs):
if kwargs.get('url'):
# get fullpath filename.
filename = os.path.join(
os.path.dirname('.'), os.path.realpath(
kwargs.get('url', '').split('/')[-1]
)
)
# skip existing file when called without ARG force.
if (not os.path.exists(filename) and \
not kwargs.get('force')):
raise FileExistsError(
" file skip exist: {}".format(
filename
)
)
async with ClientSession() as session:
async with session.get(kwargs.get('url')) as response:
async with aiofiles.open(filename, "wb") as f:
with Progress(
SpinnerColumn(speed=1.5),
TextColumn("[green] Downloading..", justify="right"),
BarColumn(),
"[progress.percentage]{task.percentage:>3.0f}%",
DownloadColumn(
binary_units=False
),
TransferSpeedColumn(),
TimeRemainingColumn(),
console=Console(),
transient=True
) as progress:
task = progress.add_task(
"[green] Downloading..", total=int(response.headers.get('content-length', 0))
)
async for content in response.content.iter_chunks():
await f.write(
content[0]
)
progress.update(
task, advance=len(content[0])
)
await f.close()
progress.stop()
Console().print(f"[green] file saved as: [blue]{filename}")
return filename
else:
raise ValueError(
'none spesific url found'
)
# func: with_thread, execute download from list
async def with_thread(url: list = []):
start_time = time.time()
with ThreadPoolExecutor(max_workers=10) as thread:
loop = asyncio.get_event_loop()
tasks = list(map(
lambda url: loop.run_in_executor(
thread, partial(
asyncio.run,
async_download(
url=url,
force=True
)
)
), url
))
await asyncio.gather(*tasks)
print(f"Completed With: {time.time() - start_time} Seconds.")
# func: without_thread, just call with for loop
async def without_thread(url: list = []):
start_time = time.time()
for url in url:
await async_download(
url=url,
force=True
)
print(f"Completed With: {time.time() - start_time} Seconds.")
url = [
"https://images8.alphacoders.com/131/1313894.jpeg", # 2.MB
"https://w.wallhaven.cc/full/28/wallhaven-28wzyy.jpg", # 5.9MB
"http://speedtest.ftp.otenet.gr/files/test100k.db",
"http://speedtest.ftp.otenet.gr/files/test1Mb.db"
]
print('-'*10, 'Async with thread start', '-'*10)
asyncio.run(with_thread(url))
print('-'*10, 'Async with NO thread start', '-'*10)
asyncio.run(without_thread(url))
@motebaya
Copy link
Author

motebaya commented Aug 21, 2023

test

tested: just different 3 seconds.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment