Skip to content

Instantly share code, notes, and snippets.

@jayzeng
Created August 1, 2013 00:58
Show Gist options
  • Save jayzeng/6127627 to your computer and use it in GitHub Desktop.
Save jayzeng/6127627 to your computer and use it in GitHub Desktop.
various ways to download a file, compare their time
import urllib2
import httplib
import os
import time
from urllib2 import Request, urlopen, URLError
def save_file(file):
output_filename = file.split("/")[-1]
if os.path.exists(output_filename):
return
try:
print "downloading %s" % file
request = urllib2.Request(file)
request.add_header('Accept-encoding', 'gzip')
request.add_header('User-agent', 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36')
opener = urllib2.build_opener()
try:
open(output_filename, "wb+").write(opener.open(request).read())
except URLError, e:
pass
except URLError, e:
pass
def download():
start = time.time()
file = "http://data.githubarchive.org/2012-04-11-%d.json.gz"
[save_file(file % i) for i in xrange(1,5)]
print time.time()-start, "seconds"
import multiprocessing
def download2():
start = time.time()
file = "http://data.githubarchive.org/2012-04-11-%d.json.gz"
p = multiprocessing.Pool(4)
result = (p.apply_async(save_file, (file % i,)) for i in xrange(1,5))
[i.get() for i in result]
print time.time()-start, "seconds"
from multiprocessing import Process, Queue
def download3():
start = time.time()
workers = 10
work_queue = Queue()
done_queue = Queue()
processes = []
file = "http://data.githubarchive.org/2012-04-11-%d.json.gz"
for i in xrange(1,5):
process = Process(target=save_file,
args=(file%i,))
process.start()
processes.append(process)
work_queue.put('STOP')
for process in processes:
process.join()
done_queue.put('STOP')
for status in iter(done_queue.get, 'STOP'):
print status
print time.time()-start, "seconds"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment