Created
January 26, 2017 07:31
-
-
Save wrfly/e499cab9976be92c2824d8c5a2d0dde7 to your computer and use it in GitHub Desktop.
获取知乎上 你关注的人,关注你的人,和,你关注他但他却没关注你的人
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding:utf-8 -*- | |
# A script to get zhihu followers and followees. | |
import urllib2 | |
import json | |
HEADERS = dict() | |
HEADERS["Pragma"] = "no-cache" | |
HEADERS["DNT"] = "1" | |
HEADERS["Accept-Language"] = "zh-CN,zh;q=0.8,en;q=0.6,en-US;q=0.4" | |
HEADERS["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" | |
HEADERS["Accept"] = "*/*" | |
HEADERS["Cache-Control"] = "no-cache" | |
HEADERS["authorization"] = "Bearer Mi4wQUlDQzZlaDhOZ3NBVU1MbnNudzJDeGNBQUFCaEFsVk5FREN4V0FBc01PeDUzb3pMRVV6V0JsUmpWV3dLRU5DRU13|1485415204|ceaa883bc63bc9bf156e2f13053c4ace932ff38a" | |
HEADERS["Connection"] = "keep-alive" | |
HEADERS["Referer"] = "https://www.zhihu.com/people/wrfly/followers" | |
def get_url(user, follow_type): | |
url = "https://www.zhihu.com/api/v4/members/"+user+"/"+ follow_type +"?include=data%5B*%5D.answer_count%2Carticles_count%2Cfollower_count%2Cis_followed%2Cis_following%2Cbadge%5B%3F(type%3Dbest_answerer)%5D.topics&offset=0&limit=20" | |
return url | |
def decode_json(data): | |
r = json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')) | |
print r | |
def get_followers(url): | |
req = urllib2.Request(url) | |
for key in HEADERS: | |
val = HEADERS[key] | |
req.add_header(key, val) | |
resp = urllib2.urlopen(req) | |
content = resp.read() | |
jresp = json.loads(content) | |
next_url = jresp["paging"]["next"] | |
is_end = jresp["paging"]["is_end"] | |
data = jresp["data"] | |
return is_end, next_url, data | |
def calculte_followers(url): | |
followers = [] | |
simple_followers = [] | |
url_followers = [] | |
is_end = False | |
while is_end == False: | |
is_end, next_url, data = get_followers(url) | |
url = next_url | |
for people in data: | |
followers.append(people) | |
simple_followers.append(people["name"]) | |
url_followers.append(people["url_token"]) | |
return followers, simple_followers, url_followers | |
def main(user = 'wrfly'): | |
follow_type = "followers" | |
url = get_url(user, follow_type) | |
followers, simple_followers, url_followers = calculte_followers(url) | |
print "Followers Counts: %d" % len(simple_followers) | |
follow_type = "followees" | |
url = get_url(user, follow_type) | |
followees, simple_followees, url_followees = calculte_followers(url) | |
print "Followees Counts: %d" % len(simple_followees) | |
Diffs = list(set(url_followees) - set(url_followers)) | |
print "People who you are followed but not following you: %d" % len(Diffs) | |
for user_url in Diffs: | |
print user_url+",", | |
# All keys | |
# ["answer_count","articles_count","avatar_url","avatar_url_template","badge","follower_count","headline","id","is_advertiser","is_followed","is_following","is_org","name","type","url","url_token","user_type"] | |
if __name__ == '__main__': | |
username = "wrfly" | |
main(username) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment