Last active
August 26, 2023 03:40
-
-
Save motebaya/810f82acdcff69b09ed46e3c418a5c2b to your computer and use it in GitHub Desktop.
async multi threading downloader with rich progress bar
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# @credit: @gist.github.com/motebaya | |
# @modify: 2023-08-21 10:48:13.274582300 +0700 | |
# @gist: simple async multi threading python for download batch file | |
# @docs: | |
# https://rich.readthedocs.io/en/latest/progress.html | |
# https://github.com/Textualize/rich/discussions/1102 | |
# https://testfiledownload.com/ | |
# https://stackoverflow.com/questions/64282309/aiohttp-download-large-list-of-pdf-files | |
# https://docs.python.org/3/library/functools.html | |
from rich.progress import ( | |
Progress, | |
SpinnerColumn, | |
BarColumn, | |
TextColumn, | |
DownloadColumn, | |
TransferSpeedColumn, | |
TimeRemainingColumn | |
) | |
from rich.console import Console | |
from aiohttp import ClientSession | |
from concurrent.futures import ThreadPoolExecutor | |
from functools import partial | |
import aiofiles, asyncio, os, time | |
async def async_download(**kwargs): | |
if kwargs.get('url'): | |
# get fullpath filename. | |
filename = os.path.join( | |
os.path.dirname('.'), os.path.realpath( | |
kwargs.get('url', '').split('/')[-1] | |
) | |
) | |
# skip existing file when called without ARG force. | |
if (not os.path.exists(filename) and \ | |
not kwargs.get('force')): | |
raise FileExistsError( | |
" file skip exist: {}".format( | |
filename | |
) | |
) | |
async with ClientSession() as session: | |
async with session.get(kwargs.get('url')) as response: | |
async with aiofiles.open(filename, "wb") as f: | |
with Progress( | |
SpinnerColumn(speed=1.5), | |
TextColumn("[green] Downloading..", justify="right"), | |
BarColumn(), | |
"[progress.percentage]{task.percentage:>3.0f}%", | |
DownloadColumn( | |
binary_units=False | |
), | |
TransferSpeedColumn(), | |
TimeRemainingColumn(), | |
console=Console(), | |
transient=True | |
) as progress: | |
task = progress.add_task( | |
"[green] Downloading..", total=int(response.headers.get('content-length', 0)) | |
) | |
async for content in response.content.iter_chunks(): | |
await f.write( | |
content[0] | |
) | |
progress.update( | |
task, advance=len(content[0]) | |
) | |
await f.close() | |
progress.stop() | |
Console().print(f"[green] file saved as: [blue]{filename}") | |
return filename | |
else: | |
raise ValueError( | |
'none spesific url found' | |
) | |
# func: with_thread, execute download from list | |
async def with_thread(url: list = []): | |
start_time = time.time() | |
with ThreadPoolExecutor(max_workers=10) as thread: | |
loop = asyncio.get_event_loop() | |
tasks = list(map( | |
lambda url: loop.run_in_executor( | |
thread, partial( | |
asyncio.run, | |
async_download( | |
url=url, | |
force=True | |
) | |
) | |
), url | |
)) | |
await asyncio.gather(*tasks) | |
print(f"Completed With: {time.time() - start_time} Seconds.") | |
# func: without_thread, just call with for loop | |
async def without_thread(url: list = []): | |
start_time = time.time() | |
for url in url: | |
await async_download( | |
url=url, | |
force=True | |
) | |
print(f"Completed With: {time.time() - start_time} Seconds.") | |
url = [ | |
"https://images8.alphacoders.com/131/1313894.jpeg", # 2.MB | |
"https://w.wallhaven.cc/full/28/wallhaven-28wzyy.jpg", # 5.9MB | |
"http://speedtest.ftp.otenet.gr/files/test100k.db", | |
"http://speedtest.ftp.otenet.gr/files/test1Mb.db" | |
] | |
print('-'*10, 'Async with thread start', '-'*10) | |
asyncio.run(with_thread(url)) | |
print('-'*10, 'Async with NO thread start', '-'*10) | |
asyncio.run(without_thread(url)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
tested: just different 3 seconds.