$ pip show aiohttp
Name: aiohttp
Version: 3.8.4
...
$ pip show requests
Name: requests
Version: 2.28.2
...
$ python
Python 3.7.16 (default, Feb 6 2023, 16:31:19)
[Clang 14.0.0 (clang-1400.0.29.202)] on darwin
$ neofetch
OS: macOS 13.1 22C65 arm64
Host: MacBookPro18,3
Kernel: 22.2.0
Memory: 4521MiB / 32768MiB
Last active
March 23, 2023 11:26
-
-
Save Kylmakalle/60cd5e32cfb94937f87e8f2caf7baa55 to your computer and use it in GitHub Desktop.
Comparing aiohttp vs requests parallel file download methods
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import aiohttp | |
import timeit | |
import os | |
import shutil | |
urls = [ | |
"https://speedtest.london.linode.com/garbage.php?ckSize=20", | |
"https://httpbin.org/get", | |
"https://httpbin.org/uuid", | |
"https://httpbin.org/delay/1", | |
] | |
DOWNLOAD_DIR = "async_downloads" | |
async def download_url(session, url): | |
filename = url.split("/")[-1] + ".txt" | |
async with session.get(url) as response: | |
with open(f"{DOWNLOAD_DIR}/{filename}", "wb") as f: | |
while True: | |
chunk = await response.content.read(1024) | |
if not chunk: | |
break | |
f.write(chunk) | |
async def main(): | |
shutil.rmtree(DOWNLOAD_DIR) | |
os.makedirs(DOWNLOAD_DIR) | |
async with aiohttp.ClientSession() as session: | |
tasks = [download_url(session, url) for url in urls] | |
await asyncio.gather(*tasks) | |
if __name__ == "__main__": | |
print("Asyncio downloader") | |
print(f"Execution time: {timeit.timeit(lambda: asyncio.run(main()), number=1)} seconds") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Running 15 test(s) for each downloader | |
Asyncio downloader median execution time: 3.0570455420000044 seconds | |
ThreadPoolExecutor downloader median execution time: 3.3250206250000005 seconds | |
Asyncio downloader average execution time: 3.184937586066667 seconds | |
ThreadPoolExecutor downloader average execution time: 3.2618439584000014 seconds | |
Comparison based on median execution time: | |
Asyncio downloader is faster. | |
Comparison based on average execution time: | |
Asyncio downloader is faster. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import timeit | |
from asyncio_download import main as asyncio_main | |
from threading_download import main as threadpool_main | |
import asyncio | |
import statistics | |
def run_asyncio_downloader(): | |
asyncio.run(asyncio_main()) | |
def run_threadpool_downloader(): | |
threadpool_main() | |
if __name__ == "__main__": | |
num_tests = 15 | |
print(f"Running {num_tests} test(s) for each downloader ") | |
asyncio_times = [timeit.timeit(run_asyncio_downloader, number=1) for _ in range(num_tests)] | |
threadpool_times = [timeit.timeit(run_threadpool_downloader, number=1) for _ in range(num_tests)] | |
asyncio_median_time = statistics.median(asyncio_times) | |
threadpool_median_time = statistics.median(threadpool_times) | |
asyncio_average_time = statistics.mean(asyncio_times) | |
threadpool_average_time = statistics.mean(threadpool_times) | |
print(f"Asyncio downloader median execution time: {asyncio_median_time} seconds") | |
print(f"ThreadPoolExecutor downloader median execution time: {threadpool_median_time} seconds") | |
print(f"Asyncio downloader average execution time: {asyncio_average_time} seconds") | |
print(f"ThreadPoolExecutor downloader average execution time: {threadpool_average_time} seconds") | |
print("\nComparison based on median execution time:") | |
if asyncio_median_time < threadpool_median_time: | |
print("Asyncio downloader is faster.") | |
else: | |
print("ThreadPoolExecutor downloader is faster.") | |
print() | |
print("\nComparison based on average execution time:") | |
if asyncio_average_time < threadpool_average_time: | |
print("Asyncio downloader is faster.") | |
else: | |
print("ThreadPoolExecutor downloader is faster.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from concurrent.futures import ThreadPoolExecutor | |
import timeit | |
import os | |
import shutil | |
urls = [ | |
"https://speedtest.london.linode.com/garbage.php?ckSize=20", | |
"https://httpbin.org/get", | |
"https://httpbin.org/uuid", | |
"https://httpbin.org/delay/1", | |
] | |
DOWNLOAD_DIR = "threading_downloads" | |
def download_url(url): | |
filename = url.split("/")[-1] + ".txt" | |
response = requests.get(url) | |
with open(f"{DOWNLOAD_DIR}/{filename}", "wb") as f: | |
for chunk in response.iter_content(chunk_size=1024): | |
if chunk: | |
f.write(chunk) | |
def main(): | |
shutil.rmtree(DOWNLOAD_DIR) | |
os.makedirs(DOWNLOAD_DIR) | |
with ThreadPoolExecutor(max_workers=len(urls)) as executor: | |
executor.map(download_url, urls) | |
if __name__ == "__main__": | |
print("ThreadPoolExecutor downloader") | |
print(f"Execution time: {timeit.timeit(main, number=1)} seconds") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment