Last active
July 10, 2020 16:41
-
-
Save djinn/041a3dee40cfbfed58ce881b9584b013 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from requests import get | |
from shutil import copyfileobj | |
from tempfile import NamedTemporaryFile as TemporaryFile | |
import time | |
from os import sendfile | |
import hashlib | |
import os.path | |
WIKIDUMPURL = 'https://dumps.wikimedia.org/enwikinews/20200701/enwikinews-20200701-pages-articles-multistream.xml.bz2' | |
def downloadFileRequests(url): | |
total = None | |
with get(url, stream=True) as r: | |
total = int(r.headers.get('Content-Length')) | |
with TemporaryFile(delete=False) as tf: | |
for chunk in r.iter_content(chunk_size=8192): | |
tf.write(chunk) | |
tf.close() | |
return total, tf.name | |
def downloadFileRequestsBigBlock(url): | |
total = None | |
with get(url, stream=True) as r: | |
total = int(r.headers.get('Content-Length')) | |
with TemporaryFile(delete=False) as tf: | |
for chunk in r.iter_content(chunk_size=100000): | |
tf.write(chunk) | |
tf.close() | |
return total, tf.name | |
def downloadFileRequestsFDDirect(url): | |
total = None | |
with get(url, stream=True) as r: | |
total = int(r.headers.get('Content-Length')) | |
with TemporaryFile(delete=False) as tf: | |
copyfileobj(r.raw, tf, 100 * 1024 * 1024) | |
tf.close() | |
return total, tf.name | |
def hash(name): | |
hash_test = hashlib.md5(open(name, 'rb').read()).hexdigest() | |
print("File digest is {}".format(hash_test)) | |
def guage(url, df): | |
start = time.perf_counter() | |
total, name = df(url) | |
lap = time.perf_counter() - start | |
print("Speed is {} Mbps".format(total/(lap * 1000000))) | |
hash(name) | |
if __name__ == '__main__': | |
print("Download Chunked 8192 blocksize") | |
guage(WIKIDUMPURL, downloadFileRequests) | |
print("Download Chunked 100000 blocksize") | |
guage(WIKIDUMPURL, downloadFileRequestsBigBlock) | |
print("Download Chunked FD to FD Copy") | |
guage(WIKIDUMPURL, downloadFileRequestsFDDirect) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment