Last active
January 9, 2017 17:01
-
-
Save metaphox/ae369deb869061d49ab7536f0d24a83b to your computer and use it in GitHub Desktop.
grab all my zhihu.com answers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, json, datetime | |
if len(sys.argv) < 2: | |
filename = 'myzhihuanswers.json' | |
else: | |
filename = sys.argv[1] | |
with open(filename) as jsonfile: | |
lines = jsonfile.readlines() | |
i = 0 | |
for pageline in lines: | |
page = json.loads(pageline) | |
if len(page) == 2: #paging & data | |
data = page['data'] | |
for d in data: | |
i+=1 | |
print i | |
print d.keys() | |
print datetime.datetime.fromtimestamp(d['created_time']).strftime('%Y-%m-%d %H:%M:%S') | |
print d['question']['title'] | |
print len(d['content']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
ZHIHUID='' | |
ZHIHU_X_UDID='""' | |
ZHIHU_AUTH='' | |
ANSWER_URL='https://www.zhihu.com/api/v4/members/{}/answers'.format(ZHIHUID) | |
PAYLOAD={'include': 'data[*].is_normal,content,created_time;'} | |
HEADERS= { | |
'x-udid': ZHIHU_X_UDID, | |
'authorization': ZHIHU_AUTH, | |
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36' | |
} | |
SAVETOFILE='myzhihuanswers.json' | |
r = requests.get(ANSWER_URL, params=PAYLOAD, headers=HEADERS) | |
if not r.status_code == 200: | |
print 'failed: {}'.format(next_url) | |
exit(1) | |
f = open(SAVETOFILE, 'w') | |
c = r.json() | |
while not c['paging']['is_end']: | |
f.write(r.text+"\n") | |
next_url = c['paging']['next'] | |
r = requests.get(next_url, headers=HEADERS) | |
if not r.status_code == 200: | |
print 'failed: {}'.format(next_url) | |
break | |
c = r.json() | |
f.write(r.text+"\n") | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment