Last active
December 17, 2019 11:35
-
-
Save greyli/6abedbe97e99afc2e2726030e06256c1 to your computer and use it in GitHub Desktop.
知乎文章抽奖脚本(某次抽奖某位读者提供的)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import requests | |
import json | |
import re | |
import time | |
import random | |
import webbrowser | |
# 补充自己的 COOKIE _xsrf 的值 | |
COOKIE = '_xsrf=XXX' | |
# 抽奖的文章 | |
ARTICLE_URL = 'https://zhuanlan.zhihu.com/p/97139851' | |
# 抽奖总数 | |
CHOICE_TOTAL = 3 | |
def get_comments(zhuanlan_url, cookie): | |
print('正在获取所有评论……') | |
headers = { | |
'accept': '*/*', | |
'accept-encoding': 'gzip, deflate, br', | |
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', | |
'Accept-Encoding': 'gzip', | |
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) ' | |
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36', | |
'origin': 'https://zhuanlan.zhihu.com', | |
'referer': zhuanlan_url, | |
'cookie': cookie, | |
} | |
params = { | |
'include': 'data[*].author,collapsed,reply_to_author,disliked,' | |
'content,voting,vote_count,is_parent_author,is_author,algorithm_right', | |
'order': 'normal', | |
'limit': 20, | |
'offset': 0, | |
'status': 'open' | |
} | |
code = re.search('\d{4,}', zhuanlan_url)[0] | |
api_url = 'https://www.zhihu.com/api/v4/articles/{}/comments'.format(code) | |
comments = list() | |
while True: | |
resp = requests.get(api_url, params=params, headers=headers) | |
resp.encoding = resp.apparent_encoding | |
resp_data = json.loads(resp.text) | |
if resp_data['paging']['is_end'] is True: | |
break | |
comment_page = resp_data['data'] | |
# print(comment_page) | |
comments.extend(comment_page) | |
params['offset'] += 20 | |
time.sleep(1) | |
count = len(comments) | |
print('一共%s条评论' % count) | |
return comments | |
def parse_authors(comments): | |
""" | |
解析API的内容,返回评论用户列表 | |
""" | |
print('正在去除重复评论,获取所有参与评论的用户……') | |
authors = list() | |
url_tokens = set() | |
for idx, cm in enumerate(comments): | |
# 跳过精彩评论 | |
if cm['featured'] is True: | |
continue | |
url_token = cm['author']['member']['url_token'] | |
# 跳过重复的评论用户 | |
if url_token in url_tokens: | |
continue | |
author = { | |
# 用户主页 | |
'user_url': 'https://www.zhihu.com/people/' + url_token, | |
# 评论所在页数 | |
'page': idx // 20 + 1, | |
# 评论在页数里的顺序(精彩评论也计入顺序) | |
'order': idx % 20 + 1, | |
} | |
authors.append(author) | |
url_tokens.add(url_token) | |
count = len(authors) | |
print('一共%s名用户' % count) | |
return authors | |
def choice(chosen, num): | |
print('正在随机抽取%d名幸运用户……' % CHOICE_TOTAL) | |
res = list() | |
random.seed() | |
for _ in range(num): | |
res.append(random.choice(chosen)) | |
return res | |
def run(): | |
comments = get_comments(ARTICLE_URL, COOKIE) | |
authors = parse_authors(comments) | |
# print(authors) | |
res = choice(authors, CHOICE_TOTAL) | |
# print(res) | |
luckylist = [] | |
print('获赠名单如下:\n') | |
for user in res: | |
url = user['user_url'] | |
page = user['page'] | |
order = user['order'] | |
print('* 用户URL:' + url + ' | 页数:'+ str(page) + ' | 序号:' + str(order) + '\n') | |
luckylist.append(url) | |
# print(res) | |
# time.sleep(5) | |
# for url in luckylist: | |
# webbrowser.open(url) | |
if __name__ == '__main__': | |
print('当前时间为:' + time.strftime('%Y-%m-%d %A %X',time.localtime(time.time()))) | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment