Skip to content

Instantly share code, notes, and snippets.

@j-mcc1993
Last active July 16, 2020 17:12
Show Gist options
  • Save j-mcc1993/0880415bd92d52eddaac to your computer and use it in GitHub Desktop.
Save j-mcc1993/0880415bd92d52eddaac to your computer and use it in GitHub Desktop.
Reddit Scraper 2.0 with Imgur API
#!/usr/local/bin/python3.5
import datetime
import os
import praw
import pprint
import urllib2
from imgurpython import ImgurClient
from sys import argv
# Set Imgur authentication fields
client_id = REDACTED
client_secret = REDACTED
# Reddit authentication
r_client_id = REDACTED
r_client_secret = REDACTED
# Get arguments from command line
script, sub, lim, debug = argv;
# Downloads image to directory given imgur ID
def download_image(img_id, directory, author):
print(img_id + ': downloading...')
response = urllib2.urlopen('https://i.imgur.com/%s.png' % (img_id))
html = response.read()
path = directory + author + '.png'
group = open(path, 'wb')
group.write(html)
group.close()
# Downloads an album of images given an imgur album ID
def download_album(album_id, directory, author):
# create list of image objects
image_obj_list = client.get_album_images(album_id)
# empty list for image id's
id_list = []
i = 0;
for img in image_obj_list:
print(img.id + ': downloading...')
response = urllib2.urlopen('https://i.imgur.com/%s.png' % (img.id))
html = response.read()
path = directory + author + ' ' + str(i) + '.png'
group = open(path, 'wb')
group.write(html)
group.close()
id_list.append(image.id)
i += 1
# Optionally print list of ID's
if debug == '1':
print(id_list)
# Initialize Imgur client and PRAW object
client = ImgurClient(client_id, client_secret)
r = praw.Reddit(client_id = r_client_id,
client_secret = r_client_secret,
user_agent = 'RedditScraper2.0 by /u/I_Am_Treebeard')
# Generate list of urls
subreddit = r.subreddit(sub)
link_list = subreddit.hot(limit = int(lim))
# Make directory for photos
directory = '/Users/j.mcc3093/Desktop/%s (%s)/' % (sub, datetime.date.today())
if not os.path.exists(directory): os.makedirs(directory)
# Main loop to scrape URLs
for link in link_list:
# Get URL and author name
author = link.author.name
url = link.url
# Don't download gifs
if '.gif' in url:
continue
# Check whether PRAW inserted a question mark
if '?' in url:
url = url[:url.rfind('?')]
# Check if link is an album
if '/a/' in url:
album_id = url[url.rfind('/a/')+3:]
download_album(album_id, directory, author)
# Check if link is a single image
elif 'i.imgur' in url or 'imgur' in url:
img_id = url[url.rfind('.com/')+5:].replace('.jpg', '')
download_image(img_id, directory, author)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment