Skip to content

Instantly share code, notes, and snippets.

@borgle
Created March 17, 2017 18:29
Show Gist options
  • Save borgle/9a248dc83f147264f267dcfb0514930b to your computer and use it in GitHub Desktop.
Save borgle/9a248dc83f147264f267dcfb0514930b to your computer and use it in GitHub Desktop.
一个最基本的多线程抓取页面代码
#coding: utf-8
import struct
import requests,json,random
from gevent import monkey, spawn
monkey.patch_all()
proxies = {'all': 'http://127.0.0.1:8087'}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/168 Safari/537.36',
'Accept-Language':'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2',
'Accept-Encoding':'gzip, deflate, sdch',
'Cache-Control': 'no-cache',
}
data = {
'pageIndex':'2',
'pageSize':'20'
}
lock = threading.Lock()
err = list()
def crawle(i):
ipaddr = '{}.{}.{}.{}'.format(
random.randint(11,192),
random.randint(1,254),
random.randint(1,254),
random.randint(1,254)
)
# headers['via'] = ipaddr
headers['X-Forwarded-For'] = ipaddr
data['pageIndex'] = i
try:
r = requests.post("http://www.abcdefg.com/FundHandler.ashx", data=data, headers=headers, proxies=proxies)
j = r.json()
r.close()
print i, j['Code'], ipaddr
except Exception, what:
lock.acquire()
err.append(ipaddr + ',' + repr(what))
lock.release()
ts = list()
for i in range(200):
t = spawn(crawle, i)
ts.append(t)
t.start()
for t in ts:
t.join()
print 'error', err
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment