Created
September 4, 2012 14:40
-
-
Save mrluanma/3621775 to your computer and use it in GitHub Desktop.
Python 登录新浪微博(requests 真的比 urllib2 强了 2^^32 倍 pip install requests)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
requests==2.4.3 | |
rsa==3.1.4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from __future__ import absolute_import, division, print_function, unicode_literals | |
import re | |
import json | |
import base64 | |
import binascii | |
import rsa | |
import requests | |
import logging | |
logging.basicConfig(level=logging.DEBUG) | |
WBCLIENT = 'ssologin.js(v1.4.5)' | |
user_agent = ( | |
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.11 (KHTML, like Gecko) ' | |
'Chrome/20.0.1132.57 Safari/536.11' | |
) | |
session = requests.session() | |
session.headers['User-Agent'] = user_agent | |
def encrypt_passwd(passwd, pubkey, servertime, nonce): | |
key = rsa.PublicKey(int(pubkey, 16), int('10001', 16)) | |
message = str(servertime) + '\t' + str(nonce) + '\n' + str(passwd) | |
passwd = rsa.encrypt(message.encode('utf-8'), key) | |
return binascii.b2a_hex(passwd) | |
def wblogin(username, password): | |
resp = session.get( | |
'http://login.sina.com.cn/sso/prelogin.php?' | |
'entry=sso&callback=sinaSSOController.preloginCallBack&' | |
'su=%s&rsakt=mod&client=%s' % | |
(base64.b64encode(username.encode('utf-8')), WBCLIENT) | |
) | |
pre_login_str = re.match(r'[^{]+({.+?})', resp.text).group(1) | |
pre_login = json.loads(pre_login_str) | |
pre_login = json.loads(pre_login_str) | |
data = { | |
'entry': 'weibo', | |
'gateway': 1, | |
'from': '', | |
'savestate': 7, | |
'userticket': 1, | |
'ssosimplelogin': 1, | |
'su': base64.b64encode(requests.utils.quote(username).encode('utf-8')), | |
'service': 'miniblog', | |
'servertime': pre_login['servertime'], | |
'nonce': pre_login['nonce'], | |
'vsnf': 1, | |
'vsnval': '', | |
'pwencode': 'rsa2', | |
'sp': encrypt_passwd(password, pre_login['pubkey'], | |
pre_login['servertime'], pre_login['nonce']), | |
'rsakv' : pre_login['rsakv'], | |
'encoding': 'UTF-8', | |
'prelt': '115', | |
'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.si' | |
'naSSOController.feedBackUrlCallBack', | |
'returntype': 'META' | |
} | |
resp = session.post( | |
'http://login.sina.com.cn/sso/login.php?client=%s' % WBCLIENT, | |
data=data | |
) | |
login_url = re.search(r'replace\([\"\']([^\'\"]+)[\"\']', | |
resp.text).group(1) | |
resp = session.get(login_url) | |
login_str = re.match(r'[^{]+({.+?}})', resp.text).group(1) | |
return json.loads(login_str) | |
if __name__ == '__main__': | |
print(json.dumps(wblogin('[email protected]', 'XXXXXXXX'), ensure_ascii=False)) | |
# timeline | |
print(session.get('http://weibo.com/').text) |
thanks, 很有帮助.
另外,似乎第75行的正则需要改一下,
现在没法匹配到'}}'
不过问题不大
报错2092什么意思
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@wgchen requests.session 会自动使用 cookie 维护 session,wblogin 之后直接使用 session.get(URL) 访问需要抓取的新浪微博页面即可。