Created
March 13, 2016 09:13
-
-
Save borgle/72e8971e1f83768f6824 to your computer and use it in GitHub Desktop.
杭州市机动车驾驶人考试互联网预约平台预约考试人员统计脚本
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding: utf-8 | |
import requests,re | |
headers = {'User-Agent': 'Mozilla/4.0', | |
'Accept-Language':'en-US,en;q=0.8', | |
'Accept-Encoding':'gzip, deflate, sdch', | |
'Cache-Control': 'max-age=0', | |
'Accept': 'text/html;q=0.9,*/*;q=0.8' | |
} | |
result = dict() | |
pageindex, totalpage = 1, 0 | |
while(True): | |
# print 'page {}'.format(pageindex) | |
data = {'currentPage':pageindex,'kskm':2,'pageSize':20,'ykrq':'2016-03-20'} | |
r = requests.post("http://www.hzti.com:9004/drv_web/queryKsmd.do", data=data, headers=headers) | |
html = re.sub('\r|\n', '', r.text) | |
html = re.sub('\s{2,}', ' ', html) | |
html = re.sub('>\s+<', '><', html) | |
html = re.sub('\s*>', '>', html) | |
html = re.sub(' ', '', html) | |
r.close() | |
s = re.findall('<tr onMouseOver="mouseover\(this\);" onMouseOut="mouseout\(this\);"><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td></tr>', html) | |
if len(s) < 1: | |
print 'redo', pageindex | |
time.sleep(2) | |
continue | |
for x in s: | |
#print x[0].strip(),x[1].strip(),x[2].strip() | |
m = x[2].strip() | |
n = x[3].strip() | |
if not result.has_key(m): | |
result[m] = dict() | |
if not result[m].has_key(n): | |
result[m][n] = 0 | |
result[m][n] = result[m][n]+1 | |
if totalpage < 1: | |
s = re.findall(u'共(\d+)页', html) | |
totalpage = int(s[0]) | |
if pageindex+1>totalpage: | |
break | |
pageindex = pageindex + 1 | |
time.sleep(0.8) | |
print u'科目二 2016-03-20 统计结果:' | |
total = 0 | |
for m in result: | |
for n in result[m]: | |
total = total + result[m][n] | |
print "{:<20}{:<16}{}".format(m.encode('gbk'), n.encode('gbk'), result[m][n]), u'人' | |
print u'总共{}人'.format(total) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment