Skip to content

Instantly share code, notes, and snippets.

@djinn
Last active July 10, 2020 16:41
Show Gist options
  • Save djinn/041a3dee40cfbfed58ce881b9584b013 to your computer and use it in GitHub Desktop.
Save djinn/041a3dee40cfbfed58ce881b9584b013 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from requests import get
from shutil import copyfileobj
from tempfile import NamedTemporaryFile as TemporaryFile
import time
from os import sendfile
import hashlib
import os.path
WIKIDUMPURL = 'https://dumps.wikimedia.org/enwikinews/20200701/enwikinews-20200701-pages-articles-multistream.xml.bz2'
def downloadFileRequests(url):
total = None
with get(url, stream=True) as r:
total = int(r.headers.get('Content-Length'))
with TemporaryFile(delete=False) as tf:
for chunk in r.iter_content(chunk_size=8192):
tf.write(chunk)
tf.close()
return total, tf.name
def downloadFileRequestsBigBlock(url):
total = None
with get(url, stream=True) as r:
total = int(r.headers.get('Content-Length'))
with TemporaryFile(delete=False) as tf:
for chunk in r.iter_content(chunk_size=100000):
tf.write(chunk)
tf.close()
return total, tf.name
def downloadFileRequestsFDDirect(url):
total = None
with get(url, stream=True) as r:
total = int(r.headers.get('Content-Length'))
with TemporaryFile(delete=False) as tf:
copyfileobj(r.raw, tf, 100 * 1024 * 1024)
tf.close()
return total, tf.name
def hash(name):
hash_test = hashlib.md5(open(name, 'rb').read()).hexdigest()
print("File digest is {}".format(hash_test))
def guage(url, df):
start = time.perf_counter()
total, name = df(url)
lap = time.perf_counter() - start
print("Speed is {} Mbps".format(total/(lap * 1000000)))
hash(name)
if __name__ == '__main__':
print("Download Chunked 8192 blocksize")
guage(WIKIDUMPURL, downloadFileRequests)
print("Download Chunked 100000 blocksize")
guage(WIKIDUMPURL, downloadFileRequestsBigBlock)
print("Download Chunked FD to FD Copy")
guage(WIKIDUMPURL, downloadFileRequestsFDDirect)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment