Skip to content

Instantly share code, notes, and snippets.

@mrluanma
Created May 10, 2011 16:15
Show Gist options
  • Save mrluanma/964785 to your computer and use it in GitHub Desktop.
Save mrluanma/964785 to your computer and use it in GitHub Desktop.
Python 用 httplib2 登录并抓去人人网好友页面的例子, 需要自己管理重定向和 Cookie.
# -*- Encoding: utf-8 -*-
import urllib
from httplib2 import Http
LOGIN_URL = 'http://www.renren.com/PLogin.do'
FRIENDS_URL = 'http://friend.renren.com/myfriendlistx.do'
h = Http()
h.follow_redirects = False
login_data = {
'email': '[email protected]',
'password': 'ZZZZZZ',
'origURL': 'http://www.renren.com/home',
'domain': 'renren.com',
}
headers_template = {
'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Host': 'www.renren.com',
'Referer': 'http://www.renren.com/Home.do',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.65 Safari/534.24',
}
headers = headers_template.copy()
headers['Content-type'] = 'application/x-www-form-urlencoded'
resp, content = h.request(LOGIN_URL, 'POST', headers=headers,
body=urllib.urlencode(login_data))
if resp['status'] == '302':
headers = headers_template.copy()
headers['Cookie'] = resp['set-cookie']
resp, content = h.request(resp['location'], headers=headers)
headers = headers_template.copy()
headers['Host'] = 'friend.renren.com'
headers['Cookie'] = resp['set-cookie']
resp, content = h.request(FRIENDS_URL, headers=headers)
print content
@LaoLiulaoliu
Copy link

httplib2 的自动灌水,但是cookie总是有问题,程序设置的是无效的cookie,每次post 帖子时cookie都不一样。可以手动设置一个有效的听取的cookie,但没有找到自动设置有效cookie的方法。请问楼主对httplib2的cookie是否很熟悉,可以帮忙想想自动设置cookie的方法?

def login():
login_url = 'http://secure.verycd.com/signin/*/http://www.verycd.com/'
header_template = {
'Host': 'secure.verycd.com',
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:12.0) Gecko/20100101 Firefox/12.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
'Referer': 'http://secure.verycd.com/signin/*/http://www.verycd.com/',
}

login_data = {'username': username,
              'password': password,
              'continue': 'http://www.verycd.com/', 
              'fk': '',
              'save_cookie': '1',
              'login_submit': '登录'}


h = httplib2.Http("cache")
headers = header_template.copy()
headers['Content-Type'] = 'application/x-www-form-urlencoded'
resp, content = h.request(login_url, 'POST', body=urllib.parse.urlencode(login_data), headers=headers)

if resp['status'] == '302':
    headers = header_template.copy()
    headers['Cookie'] = resp['set-cookie']
    resp, content = h.request(resp['location'], headers=headers)

content = content.decode(encoding)
topics = re.compile(r'/topics/(\d+)').findall(content)
topics = set(topics)

headers['Cookie'] = '这里需要手动输入一个有效的cookie字符串,我是听正常会话然后填入的,但是还没有找到自动处理的方法'
headers['Host'] = 'www.verycd.com'
headers['Content-Type'] = 'application/x-www-form-urlencoded'
for topic in topics:
    referer = 'http://www.verycd.com/topics/' + str(topic) + '/reply'
    url = referer + '#fast-replay'
    print('\n\n--', url)
    postData = { 
        'contents': random.choice(msg),
        'use_bbcode': '1',
        'tid': str(topic),
        'qcid': '',
        'showAll': '',
        'Action': 'FolderCommentOperate:doReplyFolder'
    }
    headers['Referer'] = referer
    resp, content = h.request(url, 'POST', body=urllib.parse.urlencode(postData), headers=headers)
    print(resp['set-cookie']) # 有效cookie,每次返回只有时间增长。
                              # 无效cookie,每次返回id都不相同。
    time.sleep(random.randint(1,10))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment