-
-
Save tomkdickinson/a093d30523dd77ae970f3ffcf26e1344 to your computer and use it in GitHub Desktop.
import json | |
import requests | |
import logging as log | |
log.basicConfig(level=log.DEBUG) | |
class FollowerExtractor(): | |
""" | |
Extracts followers for a given profile | |
""" | |
def __init__(self, username, password): | |
self.csrf_token, self.cookie_string = FollowerExtractor.login_instagram(username, password) | |
log.info("CSRF Token set to %s", self.csrf_token) | |
log.info("Cookie String set to %s" % self.cookie_string) | |
@staticmethod | |
def get_csrf_and_cookie_string(): | |
resp = requests.head("https://www.instagram.com") | |
return resp.cookies['csrftoken'], resp.headers['set-cookie'] | |
@staticmethod | |
def login_instagram(username, password): | |
csrf_token, cookie_string = FollowerExtractor.get_csrf_and_cookie_string() | |
data = {"username": username, "password": password} | |
resp = requests.post("https://www.instagram.com/accounts/login/ajax/", | |
data=data, | |
headers={ | |
"referer": "https://www.instagram.com/", | |
"accept": "*/*", | |
"Accept-Language": "en-GB,en;q=0.8", | |
"cache-control": "no-cache", | |
"content-length": "40", | |
"Content-Type": "application/x-www-form-urlencoded", | |
"cookie": cookie_string, | |
"origin": "https://www.instagram.com", | |
"pragma": "no-cache", | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36", | |
"x-csrftoken": csrf_token, | |
"x-instagram-ajax": "1", | |
"X-Requested-With": "XMLHttpRequest" | |
}) | |
return resp.cookies['csrftoken'], resp.headers['set-cookie'] | |
def extract_followed_by(self, username, user_id=None): | |
if user_id is None: | |
user_id = json.loads(requests.get("https://www.instagram.com/%s?__a=1" % username).text)['user']['id'] | |
resp = self.query_followed_by(username, user_id) | |
followers = resp['followed_by']['nodes'] | |
self.save_followed_by(followers) | |
while resp['followed_by']['page_info']['has_next_page']: | |
resp = self.query_followed_by(username, user_id, resp['followed_by']['page_info']['end_cursor']) | |
followers = resp['followed_by']['nodes'] | |
self.save_followed_by(followers) | |
followers += resp['followed_by']['nodes'] | |
return followers | |
def extract_following(self, username, user_id=None): | |
if user_id is None: | |
user_id = json.loads(requests.get("https://www.instagram.com/%s?__a=1" % username).text)['user']['id'] | |
resp = self.query_following(username, user_id) | |
followers = resp['follows']['nodes'] | |
self.save_following(followers) | |
while resp['follows']['page_info']['has_next_page']: | |
resp = self.query_following(username, user_id, resp['follows']['page_info']['end_cursor']) | |
followers = resp['follows']['nodes'] | |
self.save_following(followers) | |
followers += resp['follows']['nodes'] | |
return followers | |
def query_following(self, username, user_id, end_cursor=None): | |
headers = self.get_headers("https://www.instagram.com/%s" % username) | |
post_data = self.get_following_params(user_id, end_cursor) | |
req = requests.post("https://www.instagram.com/query/", data=post_data, headers=headers) | |
return json.loads(req.text) | |
def query_followed_by(self, username, user_id, end_cursor=None): | |
headers = self.get_headers("https://www.instagram.com/%s" % username) | |
post_data = self.get_followed_by_params(user_id, end_cursor) | |
req = requests.post("https://www.instagram.com/query/", data=post_data, headers=headers) | |
return json.loads(req.text) | |
def get_headers(self, referrer): | |
""" | |
Returns a bunch of headers we need to use when querying Instagram | |
:param referrer: The page referrer URL | |
:return: A dict of headers | |
""" | |
return { | |
"referer": referrer, | |
"accept": "application/json, text/javascript, */*; q=0.01", | |
"Accept-Language": "en-GB,en;q=0.8,en-US;q=0.6", | |
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", | |
"cookie": self.cookie_string, | |
"origin": "https://www.instagram.com", | |
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) " | |
"Chrome/49.0.2623.87 Safari/537.36", | |
"x-csrftoken": self.csrf_token, | |
"x-instagram-ajax": "1", | |
"X-Requested-With": "XMLHttpRequest" | |
} | |
@staticmethod | |
def get_followed_by_params(user_id, end_cursor=None): | |
""" | |
Returns the query params required to load next page on Instagram. | |
This can be modified to return less information. | |
:param tag: Tag we're querying | |
:param end_cursor: The end cursor Instagram specifies | |
:return: A dict of request parameters | |
""" | |
if end_cursor is None: | |
start_query = "ig_user(%s) { followed_by.first(20) {" % user_id | |
else: | |
start_query = "ig_user(%s) { followed_by.after(%s, 20) {" % (user_id, end_cursor) | |
return { | |
'q': | |
start_query + | |
" count," + | |
" nodes {" + | |
" id," + | |
" is_verified," + | |
" followed_by_viewer," + | |
" requested_by_viewer," + | |
" full_name," + | |
" profile_pic_url," + | |
" username" + | |
" }," + | |
" page_info {" + | |
" end_cursor," + | |
" has_next_page" + | |
" }" + | |
"}" + | |
" }", | |
"ref": "relationships::follow_list" | |
} | |
@staticmethod | |
def get_following_params(user_id, end_cursor=None): | |
""" | |
Returns the query params required to load next page on Instagram. | |
This can be modified to return less information. | |
:param tag: Tag we're querying | |
:param end_cursor: The end cursor Instagram specifies | |
:return: A dict of request parameters | |
""" | |
if end_cursor is None: | |
start_query = "ig_user(%s) { follows.first(20) {" % user_id | |
else: | |
start_query = "ig_user(%s) { follows.after(%s, 20) {" % (user_id, end_cursor) | |
return { | |
'q': | |
start_query + | |
" count," + | |
" nodes {" + | |
" id," + | |
" is_verified," + | |
" followed_by_viewer," + | |
" requested_by_viewer," + | |
" full_name," + | |
" profile_pic_url," + | |
" username" + | |
" }," + | |
" page_info {" + | |
" end_cursor," + | |
" has_next_page" + | |
" }" + | |
"}" + | |
" }", | |
"ref": "relationships::follow_list" | |
} | |
def save_following(self, following): | |
""" | |
Called when a new batch of following users has been extracted from Instagram | |
:param following: Users who are following user | |
""" | |
for user in following: | |
print("Following: %s" % user['username']) | |
def save_followed_by(self, followed_by): | |
""" | |
Called when a new batch of followed_by users has been extracted from Instagram | |
:param following: Users who are followed_by | |
""" | |
for user in followed_by: | |
print("Followed By: %s" % user['username']) | |
if __name__ == '__main__': | |
instagram_username = "your_username" | |
instagram_password = "your_password" | |
followed_extractor = FollowerExtractor(instagram_username, instagram_password) | |
followed_extractor.extract_following("justintimberlake") | |
followed_extractor.extract_followed_by("justintimberlake") |
Not sure if it's the redirect that's causing it. I get that as well, and my response is fine.
I'll see if I can replicate your issue over the weekend. It could be a geo location thing, so I'll try it out on a VPN as well.
301 was due a couple missing slashes. I tried to submit a fix to you showing where but I am not sure if it was pushed properly (I actually have never collaborated in anything on github).
I tried using a VPN to UK and got same results. I also tried on linux. Same. Maybe a user geotag? I might try creating a usr through VPN and testing..
Has anyone got this working? When I run it, it manages to get the token, but I have a 403 response from the authenticated requests. Any advice?
I cannot work it out either, I also got 403 response. I assuem Instagram is forbidden some personal applications.
Good Morning,
I'm trying to find some algorithm in python that can collect geolocation data from instagram here in my city. Do you know any way? Thank you very much in advance.
hi can this code still works ? Instagram give some changes recently. And it need some variable like query_id
@samequefarias - geolocation instagram
This is one workaround but i'm sure there is a better way,
- make a call to facebook graph API with lat lon of city -> get IDs of locations
https://graph.facebook.com/search?q=&type=place¢er=51.5074,-0.1278&distance=5000&access_token=ACCESS-TOKEN - make a call to instagramI with facebook location IDs -> get Instagram location ID's
https://api.instagram.com/v1/locations/search?facebook_places_id=273471170716&access_token=ACCESS-TOKEN - make a call to instagram api with location ID -> get recent media
https://api.instagram.com/v1/locations/30824484/media/recent?access_token=ACCESS-TOKEN
How You fixed it? I have the same error now..
Seems i can't even log in.
send: b'password=string&username=string'
reply: 'HTTP/1.1 403 Forbidden\r\n'
What I'm doing wrong?
Until the login is fine:
Then this appears:
## reply: 'HTTP/1.1 301 Moved Permanently\r\n'
But apparenty it sent the POST fine:
header: Strict-Transport-Security header: Content-Type header: Vary header: Content-Encoding header: Cache-Control header: Expires header: Content-Language header: Date header: Pragma header: Set-Cookie header: Set-Cookie header: Set-Cookie header: Connection header: Content-Length send: b'POST /query/ HTTP/1.1\r\nHost: www.instagram.com\r\nUser-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36\r\nAccept-Encoding: gzip, deflate\r\naccept: application/json, text/javascript, */*; q=0.01\r\nConnection: keep-alive\r\nreferer: https://www.instagram.com/justintimberlake\r\nAccept-Language: en-GB,en;q=0.8,en-US;q=0.6\r\nContent-Type: application/x-www-form-urlencoded; charset=UTF-8\r\ncookie: ds_user_id=3667668898; expires=Wed, 05-Apr-2017 22:14:58 GMT; Max-Age=7776000; Path=/, csrftoken=**XXXX**; expires=Thu, 04-Jan-2018 22:14:58 GMT; Max-Age=31449600; Path=/; Secure, target=""; expires=Thu, 01-Jan-1970 00:00:00 GMT; Max-Age=0; Path=/, sessionid=**ZZZ**; expires=Wed, 05-Apr-2017 22:14:58 GMT; HttpOnly; Max-Age=7776000; Path=/; Secure\r\norigin: https://www.instagram.com\r\nx-csrftoken: **XXXX**\r\nx-instagram-ajax: 1\r\nX-Requested-With: XMLHttpRequest\r\nContent-Length: 307\r\n\r\n'