Last active
December 28, 2021 08:09
-
-
Save foresmac/8059013 to your computer and use it in GitHub Desktop.
Download a particular hashtag from InstagramThis script will download all the images with a particular hashtag from Instagram. It's not very clever, and will try to suck down every image it can find; if you use a particularly popular tag, it will take a **long time** to run. Simply run it from the command line like so:`./get_instagram_tag.py <ta…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
## get_instagram_tag | |
## by Chris Foresman | |
## @foresmac | |
## | |
## Instagram API: http://instagram.com/developer/ | |
## Requires the requests Python library | |
## (sudo) pip install requests | |
from __future__ import print_function | |
import requests | |
import json | |
import sys | |
import os | |
ACCESS_TOKEN = os.getenv('INSTAGRAM_ACCESS_TOKEN', '') | |
def get_image_urls(start_url): | |
api_response = json.loads(requests.get(start_url).content) | |
img_urls = [ | |
pic['images']['standard_resolution']['url'] | |
for pic in api_response['data'] if pic['type'] == 'image'] | |
vid_urls = [ | |
vid['videos']['standard_resolution']['url'] | |
for vid in api_response['data'] if vid['type'] == 'video'] | |
try: | |
next_url = api_response['pagination']['next_url'] | |
except: | |
next_url = '' | |
while next_url: | |
api_response = json.loads(requests.get(next_url).content) | |
img_urls += [ | |
pic['images']['standard_resolution']['url'] | |
for pic in api_response['data'] if pic['type'] == 'image'] | |
vid_urls += [ | |
vid['videos']['standard_resolution']['url'] | |
for vid in api_response['data'] if vid['type'] == 'video'] | |
try: | |
next_url = api_response['pagination']['next_url'] | |
except: | |
next_url = '' | |
print('\n\nGot {0} URLs for images from Instagram API.'.format( | |
len(img_urls))) | |
print('Got {0} URLs for videos from Instagram API.\n'.format( | |
len(vid_urls))) | |
img_urls.reverse() | |
vid_urls.reverse() | |
return img_urls, vid_urls | |
def download_file_from_url(url, filename): | |
with open(filename, 'wb') as f: | |
f.write(requests.get(url).content) | |
if __name__ == '__main__': | |
if len(sys.argv) < 2: | |
print('Usage: get_instagram_tag.py <tag> <path_to_folder>') | |
sys.exit(2) | |
if not ACCESS_TOKEN: | |
print( | |
'You need to store your Instagram API access token in an' | |
'environment variable named INSTAGRAM_ACCESS_TOKEN.') | |
sys.exit(1) | |
hashtag = sys.argv[1] | |
base_filepath = ( | |
os.path.expanduser(sys.argv[2]) if sys.argv[2] else os.getcwd()) | |
start_url_string = ( | |
'https://api.instagram.com/v1/tags/{0}/media/recent?access_token={1}') | |
start_url = start_url_string.format(hashtag, ACCESS_TOKEN) | |
img_urls, vid_urls = get_image_urls(start_url) | |
img_number, vid_number = 0, 0 | |
if not os.path.exists(base_filepath): | |
os.makedirs(base_filepath) | |
if img_urls: | |
for url in img_urls: | |
img_number += 1 | |
filename = '{0}-{1:03d}.jpg'.format(hashtag, img_number) | |
full_filename = os.path.join(base_filepath, filename) | |
try: | |
download_file_from_url(url, full_filename) | |
print('.', end='') | |
sys.stdout.flush() | |
except: | |
print('X', end='') | |
sys.stdout.flush() | |
img_number -= 1 | |
print('\n\nDownloaded {0} images tagged #{1}\n'.format(img_number, hashtag)) | |
if vid_urls: | |
base_filepath += '/videos' | |
if not os.path.exists(base_filepath): | |
os.makedirs(base_filepath) | |
for url in vid_urls: | |
vid_number += 1 | |
filename = '{0}-{1:03d}.mp4'.format(hashtag, vid_number) | |
full_filename = os.path.join(base_filepath, filename) | |
try: | |
download_file_from_url(url, full_filename) | |
print('.', end='') | |
sys.stdout.flush() | |
except: | |
print('X', end='') | |
sys.stdout.flush() | |
vid_number -= 1 | |
print('\n\nDownloaded {0} videos tagged #{1}'.format(vid_number, hashtag)) | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment