Created
January 5, 2017 20:19
-
-
Save tomkdickinson/a093d30523dd77ae970f3ffcf26e1344 to your computer and use it in GitHub Desktop.
Followers Extraction Instagram
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import requests | |
import logging as log | |
log.basicConfig(level=log.DEBUG) | |
class FollowerExtractor(): | |
""" | |
Extracts followers for a given profile | |
""" | |
def __init__(self, username, password): | |
self.csrf_token, self.cookie_string = FollowerExtractor.login_instagram(username, password) | |
log.info("CSRF Token set to %s", self.csrf_token) | |
log.info("Cookie String set to %s" % self.cookie_string) | |
@staticmethod | |
def get_csrf_and_cookie_string(): | |
resp = requests.head("https://www.instagram.com") | |
return resp.cookies['csrftoken'], resp.headers['set-cookie'] | |
@staticmethod | |
def login_instagram(username, password): | |
csrf_token, cookie_string = FollowerExtractor.get_csrf_and_cookie_string() | |
data = {"username": username, "password": password} | |
resp = requests.post("https://www.instagram.com/accounts/login/ajax/", | |
data=data, | |
headers={ | |
"referer": "https://www.instagram.com/", | |
"accept": "*/*", | |
"Accept-Language": "en-GB,en;q=0.8", | |
"cache-control": "no-cache", | |
"content-length": "40", | |
"Content-Type": "application/x-www-form-urlencoded", | |
"cookie": cookie_string, | |
"origin": "https://www.instagram.com", | |
"pragma": "no-cache", | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36", | |
"x-csrftoken": csrf_token, | |
"x-instagram-ajax": "1", | |
"X-Requested-With": "XMLHttpRequest" | |
}) | |
return resp.cookies['csrftoken'], resp.headers['set-cookie'] | |
def extract_followed_by(self, username, user_id=None): | |
if user_id is None: | |
user_id = json.loads(requests.get("https://www.instagram.com/%s?__a=1" % username).text)['user']['id'] | |
resp = self.query_followed_by(username, user_id) | |
followers = resp['followed_by']['nodes'] | |
self.save_followed_by(followers) | |
while resp['followed_by']['page_info']['has_next_page']: | |
resp = self.query_followed_by(username, user_id, resp['followed_by']['page_info']['end_cursor']) | |
followers = resp['followed_by']['nodes'] | |
self.save_followed_by(followers) | |
followers += resp['followed_by']['nodes'] | |
return followers | |
def extract_following(self, username, user_id=None): | |
if user_id is None: | |
user_id = json.loads(requests.get("https://www.instagram.com/%s?__a=1" % username).text)['user']['id'] | |
resp = self.query_following(username, user_id) | |
followers = resp['follows']['nodes'] | |
self.save_following(followers) | |
while resp['follows']['page_info']['has_next_page']: | |
resp = self.query_following(username, user_id, resp['follows']['page_info']['end_cursor']) | |
followers = resp['follows']['nodes'] | |
self.save_following(followers) | |
followers += resp['follows']['nodes'] | |
return followers | |
def query_following(self, username, user_id, end_cursor=None): | |
headers = self.get_headers("https://www.instagram.com/%s" % username) | |
post_data = self.get_following_params(user_id, end_cursor) | |
req = requests.post("https://www.instagram.com/query/", data=post_data, headers=headers) | |
return json.loads(req.text) | |
def query_followed_by(self, username, user_id, end_cursor=None): | |
headers = self.get_headers("https://www.instagram.com/%s" % username) | |
post_data = self.get_followed_by_params(user_id, end_cursor) | |
req = requests.post("https://www.instagram.com/query/", data=post_data, headers=headers) | |
return json.loads(req.text) | |
def get_headers(self, referrer): | |
""" | |
Returns a bunch of headers we need to use when querying Instagram | |
:param referrer: The page referrer URL | |
:return: A dict of headers | |
""" | |
return { | |
"referer": referrer, | |
"accept": "application/json, text/javascript, */*; q=0.01", | |
"Accept-Language": "en-GB,en;q=0.8,en-US;q=0.6", | |
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", | |
"cookie": self.cookie_string, | |
"origin": "https://www.instagram.com", | |
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) " | |
"Chrome/49.0.2623.87 Safari/537.36", | |
"x-csrftoken": self.csrf_token, | |
"x-instagram-ajax": "1", | |
"X-Requested-With": "XMLHttpRequest" | |
} | |
@staticmethod | |
def get_followed_by_params(user_id, end_cursor=None): | |
""" | |
Returns the query params required to load next page on Instagram. | |
This can be modified to return less information. | |
:param tag: Tag we're querying | |
:param end_cursor: The end cursor Instagram specifies | |
:return: A dict of request parameters | |
""" | |
if end_cursor is None: | |
start_query = "ig_user(%s) { followed_by.first(20) {" % user_id | |
else: | |
start_query = "ig_user(%s) { followed_by.after(%s, 20) {" % (user_id, end_cursor) | |
return { | |
'q': | |
start_query + | |
" count," + | |
" nodes {" + | |
" id," + | |
" is_verified," + | |
" followed_by_viewer," + | |
" requested_by_viewer," + | |
" full_name," + | |
" profile_pic_url," + | |
" username" + | |
" }," + | |
" page_info {" + | |
" end_cursor," + | |
" has_next_page" + | |
" }" + | |
"}" + | |
" }", | |
"ref": "relationships::follow_list" | |
} | |
@staticmethod | |
def get_following_params(user_id, end_cursor=None): | |
""" | |
Returns the query params required to load next page on Instagram. | |
This can be modified to return less information. | |
:param tag: Tag we're querying | |
:param end_cursor: The end cursor Instagram specifies | |
:return: A dict of request parameters | |
""" | |
if end_cursor is None: | |
start_query = "ig_user(%s) { follows.first(20) {" % user_id | |
else: | |
start_query = "ig_user(%s) { follows.after(%s, 20) {" % (user_id, end_cursor) | |
return { | |
'q': | |
start_query + | |
" count," + | |
" nodes {" + | |
" id," + | |
" is_verified," + | |
" followed_by_viewer," + | |
" requested_by_viewer," + | |
" full_name," + | |
" profile_pic_url," + | |
" username" + | |
" }," + | |
" page_info {" + | |
" end_cursor," + | |
" has_next_page" + | |
" }" + | |
"}" + | |
" }", | |
"ref": "relationships::follow_list" | |
} | |
def save_following(self, following): | |
""" | |
Called when a new batch of following users has been extracted from Instagram | |
:param following: Users who are following user | |
""" | |
for user in following: | |
print("Following: %s" % user['username']) | |
def save_followed_by(self, followed_by): | |
""" | |
Called when a new batch of followed_by users has been extracted from Instagram | |
:param following: Users who are followed_by | |
""" | |
for user in followed_by: | |
print("Followed By: %s" % user['username']) | |
if __name__ == '__main__': | |
instagram_username = "your_username" | |
instagram_password = "your_password" | |
followed_extractor = FollowerExtractor(instagram_username, instagram_password) | |
followed_extractor.extract_following("justintimberlake") | |
followed_extractor.extract_followed_by("justintimberlake") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Seems i can't even log in.
What I'm doing wrong?