Skip to content

Instantly share code, notes, and snippets.

@josephok
Last active August 29, 2015 14:14
Show Gist options
  • Save josephok/d7167a4a8650c3a6fe80 to your computer and use it in GitHub Desktop.
Save josephok/d7167a4a8650c3a6fe80 to your computer and use it in GitHub Desktop.
requests test
fs = require "fs"
request = require "request"
URLS = ("http://www.fanjian.net/page/#{i}" for i in [1..50])
for url in URLS
pos = url.lastIndexOf "/"
n = url.slice pos + 1
console.log n
request(url).pipe(fs.createWriteStream("output/#{n}.html"))
import gevent
import requests
import gevent.monkey
gevent.monkey.patch_socket()
URLS = ["http://www.fanjian.net/page/{}".format(i) for i in range(1, 51)]
def crawl(url, i):
r = requests.get(url)
with open("requests_gevent/{}.txt".format(i), "wb") as f:
f.write(r.content)
threads = []
for i, url in enumerate(URLS, start=1):
thread = gevent.spawn(crawl, url, i)
threads.append(thread)
gevent.joinall(threads)
import requests
from threading import Thread
URLS = ["http://www.fanjian.net/page/{}".format(i) for i in range(1, 51)]
def crawl(url, i):
r = requests.get(url)
with open("requests_multithread/{}.txt".format(i), "wt") as f:
f.write(r.text)
for i, url in enumerate(URLS, start=1):
t = Thread(target=crawl, args=(url, i))
t.start()
import requests
from multiprocessing import Pool
URLS = [("http://www.fanjian.net/page/{}".format(i), i) for i in range(1, 51)]
def crawl(url_i):
url = url_i[0]
i = url_i[1]
r = requests.get(url)
with open("requests_pool/{}.txt".format(i), "wt") as f:
f.write(r.text)
p = Pool(50)
p.map(crawl, URLS)
import requests
URLS = ["http://www.fanjian.net/page/{}".format(i) for i in range(1, 51)]
for i, url in enumerate(URLS, start=1):
r = requests.get(url)
with open("requests/{}.txt".format(i), "wt") as f:
f.write(r.text)
joseph@joe:~/git/crawl$ time python3 requests_single_thread.py
real 13m31.470s
user 0m0.380s
sys 0m0.084s
=================================================================
joseph@joe:~/git/crawl$ time python3 requests_multithread.py
real 0m21.942s
user 0m0.304s
sys 0m0.104s
=================================================================
joseph@joe:~/git/crawl$ time python3 requests_pool.py
real 3m42.518s
user 0m0.608s
sys 0m0.160s
=================================================================
joseph@joe:~/git/crawl$ time python3 requests_pool.py
real 0m26.195s
user 0m0.724s
sys 0m0.392s
=================================================================
joseph@joe:~/git/crawl$ time python3 requests_gevent.py
real 1m17.492s
user 0m0.328s
sys 0m0.060s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment