Last active
August 29, 2015 14:14
-
-
Save josephok/d7167a4a8650c3a6fe80 to your computer and use it in GitHub Desktop.
requests test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fs = require "fs" | |
request = require "request" | |
URLS = ("http://www.fanjian.net/page/#{i}" for i in [1..50]) | |
for url in URLS | |
pos = url.lastIndexOf "/" | |
n = url.slice pos + 1 | |
console.log n | |
request(url).pipe(fs.createWriteStream("output/#{n}.html")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gevent | |
import requests | |
import gevent.monkey | |
gevent.monkey.patch_socket() | |
URLS = ["http://www.fanjian.net/page/{}".format(i) for i in range(1, 51)] | |
def crawl(url, i): | |
r = requests.get(url) | |
with open("requests_gevent/{}.txt".format(i), "wb") as f: | |
f.write(r.content) | |
threads = [] | |
for i, url in enumerate(URLS, start=1): | |
thread = gevent.spawn(crawl, url, i) | |
threads.append(thread) | |
gevent.joinall(threads) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from threading import Thread | |
URLS = ["http://www.fanjian.net/page/{}".format(i) for i in range(1, 51)] | |
def crawl(url, i): | |
r = requests.get(url) | |
with open("requests_multithread/{}.txt".format(i), "wt") as f: | |
f.write(r.text) | |
for i, url in enumerate(URLS, start=1): | |
t = Thread(target=crawl, args=(url, i)) | |
t.start() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from multiprocessing import Pool | |
URLS = [("http://www.fanjian.net/page/{}".format(i), i) for i in range(1, 51)] | |
def crawl(url_i): | |
url = url_i[0] | |
i = url_i[1] | |
r = requests.get(url) | |
with open("requests_pool/{}.txt".format(i), "wt") as f: | |
f.write(r.text) | |
p = Pool(50) | |
p.map(crawl, URLS) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
URLS = ["http://www.fanjian.net/page/{}".format(i) for i in range(1, 51)] | |
for i, url in enumerate(URLS, start=1): | |
r = requests.get(url) | |
with open("requests/{}.txt".format(i), "wt") as f: | |
f.write(r.text) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
joseph@joe:~/git/crawl$ time python3 requests_single_thread.py | |
real 13m31.470s | |
user 0m0.380s | |
sys 0m0.084s | |
================================================================= | |
joseph@joe:~/git/crawl$ time python3 requests_multithread.py | |
real 0m21.942s | |
user 0m0.304s | |
sys 0m0.104s | |
================================================================= | |
joseph@joe:~/git/crawl$ time python3 requests_pool.py | |
real 3m42.518s | |
user 0m0.608s | |
sys 0m0.160s | |
================================================================= | |
joseph@joe:~/git/crawl$ time python3 requests_pool.py | |
real 0m26.195s | |
user 0m0.724s | |
sys 0m0.392s | |
================================================================= | |
joseph@joe:~/git/crawl$ time python3 requests_gevent.py | |
real 1m17.492s | |
user 0m0.328s | |
sys 0m0.060s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment