Skip to content

Instantly share code, notes, and snippets.

@haampie
Last active July 25, 2023 22:52
Show Gist options
  • Save haampie/82707307efa06687cf788e95d9189c13 to your computer and use it in GitHub Desktop.
Save haampie/82707307efa06687cf788e95d9189c13 to your computer and use it in GitHub Desktop.
test.py
$ python test.py 20 https://mirror.spack.io/_source-cache/archive/00/0081ee4c4242e635a8113b32f655910ada057c59043f29af4b613508a762f3ac.tar.gz
Fetching mirror.spack.io/_source-cache/archive/00/0081ee4c4242e635a8113b32f655910ada057c59043f29af4b613508a762f3ac.tar.gz 20 times
http client 13.58 ms/request 1.48 MB/s
urllib 48.35 ms/request 0.42 MB/s
import sys
import time
import hashlib
import http.client
import urllib.request
import urllib.parse
def with_http_client(domain, path, n = 10):
connection = http.client.HTTPSConnection(domain)
checksum = hashlib.md5()
size = 0
for i in range(n):
connection.request("GET", path)
data = connection.getresponse().read()
checksum.update(data)
size += len(data)
connection.close()
return checksum.hexdigest(), size
def with_urllib(domain, path, n = 10):
checksum = hashlib.md5()
size = 0
for i in range(n):
data = urllib.request.urlopen(f"https://{domain}{path}").read()
checksum.update(data)
size += len(data)
return checksum.hexdigest(), size
n = 100 if len(sys.argv) < 2 else int(sys.argv[1])
if len(sys.argv) < 3:
url = "https://mirror.spack.io/_source-cache/archive/00/00040cad9b6d6bb817ebd5853ff6dda23f9957153d8c4eedf85def0c9e787c42.tar.gz"
else:
url = sys.argv[2]
assert url.startswith("https://")
url = url[8:]
slash_index = url.index("/")
domain, path = url[:slash_index], url[slash_index:]
print(f"Fetching {url} {n} times")
# warmup (may populate cache on server)
with_http_client(domain, path, 1)
with_urllib(domain, path, 1)
start = time.time()
checksum_a, size_a = with_http_client(domain, path, n)
total = time.time() - start
print("http client", round(1000 * total / n, 2), "ms/request", round(size_a / 1024 / 1024 / total, 2), "MB/s")
start = time.time()
checksum_b, size_b = with_urllib(domain, path, n)
total = time.time() - start
print("urllib", round(1000 * total / n, 2), "ms/request", round(size_b / 1024 / 1024 / total, 2), "MB/s")
if checksum_a != checksum_b:
raise Exception("mismatch in checksum")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment