Skip to content

Instantly share code, notes, and snippets.

@metaphox
Last active January 9, 2017 17:01
Show Gist options
  • Save metaphox/ae369deb869061d49ab7536f0d24a83b to your computer and use it in GitHub Desktop.
Save metaphox/ae369deb869061d49ab7536f0d24a83b to your computer and use it in GitHub Desktop.
grab all my zhihu.com answers
import sys, json, datetime
if len(sys.argv) < 2:
filename = 'myzhihuanswers.json'
else:
filename = sys.argv[1]
with open(filename) as jsonfile:
lines = jsonfile.readlines()
i = 0
for pageline in lines:
page = json.loads(pageline)
if len(page) == 2: #paging & data
data = page['data']
for d in data:
i+=1
print i
print d.keys()
print datetime.datetime.fromtimestamp(d['created_time']).strftime('%Y-%m-%d %H:%M:%S')
print d['question']['title']
print len(d['content'])
import requests
ZHIHUID=''
ZHIHU_X_UDID='""'
ZHIHU_AUTH=''
ANSWER_URL='https://www.zhihu.com/api/v4/members/{}/answers'.format(ZHIHUID)
PAYLOAD={'include': 'data[*].is_normal,content,created_time;'}
HEADERS= {
'x-udid': ZHIHU_X_UDID,
'authorization': ZHIHU_AUTH,
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36'
}
SAVETOFILE='myzhihuanswers.json'
r = requests.get(ANSWER_URL, params=PAYLOAD, headers=HEADERS)
if not r.status_code == 200:
print 'failed: {}'.format(next_url)
exit(1)
f = open(SAVETOFILE, 'w')
c = r.json()
while not c['paging']['is_end']:
f.write(r.text+"\n")
next_url = c['paging']['next']
r = requests.get(next_url, headers=HEADERS)
if not r.status_code == 200:
print 'failed: {}'.format(next_url)
break
c = r.json()
f.write(r.text+"\n")
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment