Last active
January 21, 2021 07:27
-
-
Save luzihang123/eb06a42e3fa144fa80f6903e4e7125f5 to your computer and use it in GitHub Desktop.
国家企业信用公示系统-经营异常名录(北京) demo http://www.gsxt.gov.cn/corp-query-entprise-info-xxgg-100000.html 参考https://blog.csdn.net/weixin_43242352/article/details/111562174
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib | |
import json | |
import re | |
import execjs | |
import requests | |
def get_hash256(data: str): | |
hash256 = hashlib.sha256(data.encode('utf-8')) | |
return hash256.hexdigest() | |
def get_hashsha1(data: str): | |
sha = hashlib.sha1(data.encode('utf-8')) | |
return sha.hexdigest() | |
def get_hashmd5(data: str): | |
hl = hashlib.md5(data.encode('utf-8')) | |
return hl.hexdigest() | |
def get_cookies(): | |
headers = { | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', | |
'Accept-Encoding': 'gzip, deflate', | |
'Accept-Language': 'zh-CN,zh;q=0.9', | |
'Host': 'www.gsxt.gov.cn', | |
'Proxy-Connection': 'keep-alive', | |
'Upgrade-Insecure-Requests': '1', | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36', | |
} | |
url1 = 'http://www.gsxt.gov.cn/index.html' | |
# proxies = {'http': 'http://'+host+':'+port+''} | |
# print(proxies) | |
r1 = requests.get(url1, headers=headers, | |
# proxies=proxies, | |
verify=False) | |
js = r1.text | |
js = js.replace('<script>document.cookie=', '').replace( | |
';location.href=location.pathname+location.search</script>', '') | |
result = execjs.eval(js) | |
__jsluid_h = r1.headers['Set-Cookie'] | |
__jsl_clearance = result | |
cookies = { | |
'__jsluid_h': __jsluid_h.replace('__jsluid_h=', ''), | |
'__jsl_clearance': __jsl_clearance.replace('__jsl_clearance=', '') | |
} | |
url2 = 'http://www.gsxt.gov.cn/index.html' | |
r2 = requests.get(url2, headers=headers, | |
# proxies=proxies, | |
verify=False, cookies=cookies) | |
data_org = re.findall(';go(.*)</script>', r2.text)[0] | |
data = data_org.replace('(', '').replace(')', '') | |
data = json.loads(data) | |
bts = data['bts'] | |
chars = data['chars'] | |
ct = data['ct'] | |
ha = data['ha'] | |
vt = data['vt'] | |
counti = 0 | |
countj = 0 | |
if ha == 'sha256': | |
print('加密算法-' + ha) | |
for i in range(len(bts[0])): | |
for j in range(len(chars)): | |
cookieTemp = bts[0] + chars[i] + chars[j] + bts[1] | |
if get_hash256(cookieTemp) == ct: | |
counti = i | |
countj = j | |
break | |
elif ha == 'sha1': | |
print('加密算法-' + ha) | |
for i in range(len(bts[0])): | |
for j in range(len(chars)): | |
cookieTemp = bts[0] + chars[i] + chars[j] + bts[1] | |
if get_hashsha1(cookieTemp) == ct: | |
counti = i | |
countj = j | |
break | |
elif ha == 'md5': | |
print('加密算法-' + ha) | |
for i in range(len(bts[0])): | |
for j in range(len(chars)): | |
cookieTemp = bts[0] + chars[i] + chars[j] + bts[1] | |
if get_hashmd5(cookieTemp) == ct: | |
counti = i | |
countj = j | |
break | |
__jsl_clearance = bts[0] + chars[counti] + chars[countj] + bts[1] | |
__jsl_clearance = __jsl_clearance + ';max-age='+vt + ';' | |
cookies1 = { | |
'__jsluid_h': __jsluid_h.replace('__jsluid_h=', ''), | |
'__jsl_clearance': __jsl_clearance, | |
} | |
url3 = 'http://www.gsxt.gov.cn/index.html' | |
r3 = requests.get(url3, headers=headers, | |
# proxies=proxies, | |
verify=False, cookies=cookies1) | |
r3cookies = r3.headers['Set-Cookie'] | |
r3cookies = r3cookies.split(',') | |
JSESSIONID = '' | |
SECTOKEN = '' | |
tlb_cookie = '' | |
for i in r3cookies: | |
if 'JSESSIONID' in i: | |
JSESSIONID = i.replace('JSESSIONID=', '').strip() | |
if 'SECTOKEN' in i: | |
SECTOKEN = i.replace('SECTOKEN=', '').strip() | |
if 'tlb_cookie' in i: | |
tlb_cookie = i.replace('tlb_cookie=', '').strip() | |
# cookies2为取数据所需要的cookie | |
cookies2 = { | |
'__jsluid_h': __jsluid_h.replace('__jsluid_h=', ''), | |
'__jsl_clearance': __jsl_clearance, | |
'JSESSIONID': JSESSIONID, | |
'SECTOKEN': SECTOKEN, | |
'tlb_cookie': tlb_cookie, | |
} | |
print(cookies2) | |
url7 = 'http://www.gsxt.gov.cn/affiche-query-area-info-paperall.html?noticeType=11&areaid=100000¬iceTitle=®Org=110000' | |
form_data = { | |
"draw": 1, | |
"start": 0, | |
"length": 10 | |
} | |
r7 = requests.post(url7, headers=headers, data=form_data, | |
# proxies=proxies, | |
verify=False, cookies=cookies2) | |
print(r7.text) | |
if __name__ == '__main__': | |
get_cookies() |
Author
luzihang123
commented
Jan 21, 2021
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment