Skip to content

Instantly share code, notes, and snippets.

@tomkdickinson
Created January 5, 2017 20:19
Show Gist options
  • Save tomkdickinson/a093d30523dd77ae970f3ffcf26e1344 to your computer and use it in GitHub Desktop.
Save tomkdickinson/a093d30523dd77ae970f3ffcf26e1344 to your computer and use it in GitHub Desktop.
Followers Extraction Instagram
import json
import requests
import logging as log
log.basicConfig(level=log.DEBUG)
class FollowerExtractor():
"""
Extracts followers for a given profile
"""
def __init__(self, username, password):
self.csrf_token, self.cookie_string = FollowerExtractor.login_instagram(username, password)
log.info("CSRF Token set to %s", self.csrf_token)
log.info("Cookie String set to %s" % self.cookie_string)
@staticmethod
def get_csrf_and_cookie_string():
resp = requests.head("https://www.instagram.com")
return resp.cookies['csrftoken'], resp.headers['set-cookie']
@staticmethod
def login_instagram(username, password):
csrf_token, cookie_string = FollowerExtractor.get_csrf_and_cookie_string()
data = {"username": username, "password": password}
resp = requests.post("https://www.instagram.com/accounts/login/ajax/",
data=data,
headers={
"referer": "https://www.instagram.com/",
"accept": "*/*",
"Accept-Language": "en-GB,en;q=0.8",
"cache-control": "no-cache",
"content-length": "40",
"Content-Type": "application/x-www-form-urlencoded",
"cookie": cookie_string,
"origin": "https://www.instagram.com",
"pragma": "no-cache",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
"x-csrftoken": csrf_token,
"x-instagram-ajax": "1",
"X-Requested-With": "XMLHttpRequest"
})
return resp.cookies['csrftoken'], resp.headers['set-cookie']
def extract_followed_by(self, username, user_id=None):
if user_id is None:
user_id = json.loads(requests.get("https://www.instagram.com/%s?__a=1" % username).text)['user']['id']
resp = self.query_followed_by(username, user_id)
followers = resp['followed_by']['nodes']
self.save_followed_by(followers)
while resp['followed_by']['page_info']['has_next_page']:
resp = self.query_followed_by(username, user_id, resp['followed_by']['page_info']['end_cursor'])
followers = resp['followed_by']['nodes']
self.save_followed_by(followers)
followers += resp['followed_by']['nodes']
return followers
def extract_following(self, username, user_id=None):
if user_id is None:
user_id = json.loads(requests.get("https://www.instagram.com/%s?__a=1" % username).text)['user']['id']
resp = self.query_following(username, user_id)
followers = resp['follows']['nodes']
self.save_following(followers)
while resp['follows']['page_info']['has_next_page']:
resp = self.query_following(username, user_id, resp['follows']['page_info']['end_cursor'])
followers = resp['follows']['nodes']
self.save_following(followers)
followers += resp['follows']['nodes']
return followers
def query_following(self, username, user_id, end_cursor=None):
headers = self.get_headers("https://www.instagram.com/%s" % username)
post_data = self.get_following_params(user_id, end_cursor)
req = requests.post("https://www.instagram.com/query/", data=post_data, headers=headers)
return json.loads(req.text)
def query_followed_by(self, username, user_id, end_cursor=None):
headers = self.get_headers("https://www.instagram.com/%s" % username)
post_data = self.get_followed_by_params(user_id, end_cursor)
req = requests.post("https://www.instagram.com/query/", data=post_data, headers=headers)
return json.loads(req.text)
def get_headers(self, referrer):
"""
Returns a bunch of headers we need to use when querying Instagram
:param referrer: The page referrer URL
:return: A dict of headers
"""
return {
"referer": referrer,
"accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Language": "en-GB,en;q=0.8,en-US;q=0.6",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"cookie": self.cookie_string,
"origin": "https://www.instagram.com",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/49.0.2623.87 Safari/537.36",
"x-csrftoken": self.csrf_token,
"x-instagram-ajax": "1",
"X-Requested-With": "XMLHttpRequest"
}
@staticmethod
def get_followed_by_params(user_id, end_cursor=None):
"""
Returns the query params required to load next page on Instagram.
This can be modified to return less information.
:param tag: Tag we're querying
:param end_cursor: The end cursor Instagram specifies
:return: A dict of request parameters
"""
if end_cursor is None:
start_query = "ig_user(%s) { followed_by.first(20) {" % user_id
else:
start_query = "ig_user(%s) { followed_by.after(%s, 20) {" % (user_id, end_cursor)
return {
'q':
start_query +
" count," +
" nodes {" +
" id," +
" is_verified," +
" followed_by_viewer," +
" requested_by_viewer," +
" full_name," +
" profile_pic_url," +
" username" +
" }," +
" page_info {" +
" end_cursor," +
" has_next_page" +
" }" +
"}" +
" }",
"ref": "relationships::follow_list"
}
@staticmethod
def get_following_params(user_id, end_cursor=None):
"""
Returns the query params required to load next page on Instagram.
This can be modified to return less information.
:param tag: Tag we're querying
:param end_cursor: The end cursor Instagram specifies
:return: A dict of request parameters
"""
if end_cursor is None:
start_query = "ig_user(%s) { follows.first(20) {" % user_id
else:
start_query = "ig_user(%s) { follows.after(%s, 20) {" % (user_id, end_cursor)
return {
'q':
start_query +
" count," +
" nodes {" +
" id," +
" is_verified," +
" followed_by_viewer," +
" requested_by_viewer," +
" full_name," +
" profile_pic_url," +
" username" +
" }," +
" page_info {" +
" end_cursor," +
" has_next_page" +
" }" +
"}" +
" }",
"ref": "relationships::follow_list"
}
def save_following(self, following):
"""
Called when a new batch of following users has been extracted from Instagram
:param following: Users who are following user
"""
for user in following:
print("Following: %s" % user['username'])
def save_followed_by(self, followed_by):
"""
Called when a new batch of followed_by users has been extracted from Instagram
:param following: Users who are followed_by
"""
for user in followed_by:
print("Followed By: %s" % user['username'])
if __name__ == '__main__':
instagram_username = "your_username"
instagram_password = "your_password"
followed_extractor = FollowerExtractor(instagram_username, instagram_password)
followed_extractor.extract_following("justintimberlake")
followed_extractor.extract_followed_by("justintimberlake")
@OwlGreenApple
Copy link

hi can this code still works ? Instagram give some changes recently. And it need some variable like query_id

@HaiGenkiDes
Copy link

@samequefarias - geolocation instagram
This is one workaround but i'm sure there is a better way,

  1. make a call to facebook graph API with lat lon of city -> get IDs of locations
    https://graph.facebook.com/search?q=&type=place&center=51.5074,-0.1278&distance=5000&access_token=ACCESS-TOKEN
  2. make a call to instagramI with facebook location IDs -> get Instagram location ID's
    https://api.instagram.com/v1/locations/search?facebook_places_id=273471170716&access_token=ACCESS-TOKEN
  3. make a call to instagram api with location ID -> get recent media
    https://api.instagram.com/v1/locations/30824484/media/recent?access_token=ACCESS-TOKEN

@RomanKlimov
Copy link

How You fixed it? I have the same error now..

@fnbrs
Copy link

fnbrs commented May 5, 2018

Seems i can't even log in.

send: b'password=string&username=string'
reply: 'HTTP/1.1 403 Forbidden\r\n'

What I'm doing wrong?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment