Skip to content

Instantly share code, notes, and snippets.

@Kylmakalle
Last active March 23, 2023 11:26
Show Gist options
  • Save Kylmakalle/60cd5e32cfb94937f87e8f2caf7baa55 to your computer and use it in GitHub Desktop.
Save Kylmakalle/60cd5e32cfb94937f87e8f2caf7baa55 to your computer and use it in GitHub Desktop.
Comparing aiohttp vs requests parallel file download methods
import asyncio
import aiohttp
import timeit
import os
import shutil
urls = [
"https://speedtest.london.linode.com/garbage.php?ckSize=20",
"https://httpbin.org/get",
"https://httpbin.org/uuid",
"https://httpbin.org/delay/1",
]
DOWNLOAD_DIR = "async_downloads"
async def download_url(session, url):
filename = url.split("/")[-1] + ".txt"
async with session.get(url) as response:
with open(f"{DOWNLOAD_DIR}/{filename}", "wb") as f:
while True:
chunk = await response.content.read(1024)
if not chunk:
break
f.write(chunk)
async def main():
shutil.rmtree(DOWNLOAD_DIR)
os.makedirs(DOWNLOAD_DIR)
async with aiohttp.ClientSession() as session:
tasks = [download_url(session, url) for url in urls]
await asyncio.gather(*tasks)
if __name__ == "__main__":
print("Asyncio downloader")
print(f"Execution time: {timeit.timeit(lambda: asyncio.run(main()), number=1)} seconds")
Running 15 test(s) for each downloader
Asyncio downloader median execution time: 3.0570455420000044 seconds
ThreadPoolExecutor downloader median execution time: 3.3250206250000005 seconds
Asyncio downloader average execution time: 3.184937586066667 seconds
ThreadPoolExecutor downloader average execution time: 3.2618439584000014 seconds
Comparison based on median execution time:
Asyncio downloader is faster.
Comparison based on average execution time:
Asyncio downloader is faster.
$ pip show aiohttp
Name: aiohttp
Version: 3.8.4
...

$ pip show requests
Name: requests
Version: 2.28.2
...

$ python
Python 3.7.16 (default, Feb  6 2023, 16:31:19) 
[Clang 14.0.0 (clang-1400.0.29.202)] on darwin

$ neofetch
OS: macOS 13.1 22C65 arm64
Host: MacBookPro18,3
Kernel: 22.2.0
Memory: 4521MiB / 32768MiB
import timeit
from asyncio_download import main as asyncio_main
from threading_download import main as threadpool_main
import asyncio
import statistics
def run_asyncio_downloader():
asyncio.run(asyncio_main())
def run_threadpool_downloader():
threadpool_main()
if __name__ == "__main__":
num_tests = 15
print(f"Running {num_tests} test(s) for each downloader ")
asyncio_times = [timeit.timeit(run_asyncio_downloader, number=1) for _ in range(num_tests)]
threadpool_times = [timeit.timeit(run_threadpool_downloader, number=1) for _ in range(num_tests)]
asyncio_median_time = statistics.median(asyncio_times)
threadpool_median_time = statistics.median(threadpool_times)
asyncio_average_time = statistics.mean(asyncio_times)
threadpool_average_time = statistics.mean(threadpool_times)
print(f"Asyncio downloader median execution time: {asyncio_median_time} seconds")
print(f"ThreadPoolExecutor downloader median execution time: {threadpool_median_time} seconds")
print(f"Asyncio downloader average execution time: {asyncio_average_time} seconds")
print(f"ThreadPoolExecutor downloader average execution time: {threadpool_average_time} seconds")
print("\nComparison based on median execution time:")
if asyncio_median_time < threadpool_median_time:
print("Asyncio downloader is faster.")
else:
print("ThreadPoolExecutor downloader is faster.")
print()
print("\nComparison based on average execution time:")
if asyncio_average_time < threadpool_average_time:
print("Asyncio downloader is faster.")
else:
print("ThreadPoolExecutor downloader is faster.")
import requests
from concurrent.futures import ThreadPoolExecutor
import timeit
import os
import shutil
urls = [
"https://speedtest.london.linode.com/garbage.php?ckSize=20",
"https://httpbin.org/get",
"https://httpbin.org/uuid",
"https://httpbin.org/delay/1",
]
DOWNLOAD_DIR = "threading_downloads"
def download_url(url):
filename = url.split("/")[-1] + ".txt"
response = requests.get(url)
with open(f"{DOWNLOAD_DIR}/{filename}", "wb") as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
def main():
shutil.rmtree(DOWNLOAD_DIR)
os.makedirs(DOWNLOAD_DIR)
with ThreadPoolExecutor(max_workers=len(urls)) as executor:
executor.map(download_url, urls)
if __name__ == "__main__":
print("ThreadPoolExecutor downloader")
print(f"Execution time: {timeit.timeit(main, number=1)} seconds")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment