Skip to content

Instantly share code, notes, and snippets.

@peteyreplies
Created September 27, 2013 22:49
Show Gist options
  • Save peteyreplies/6736315 to your computer and use it in GitHub Desktop.
Save peteyreplies/6736315 to your computer and use it in GitHub Desktop.
a python script, using PRAW, to scrape the top 25 submissions in r/pics
# pull a list of the top 25 submissions in r/pics
import praw
import json
from pprint import pprint
import sqlite3
import new_redditdb
#set user agent to identify to reddit
my_user_agent = ("tracking new submissions to /r/pics by /u/peteyMIT - email petey [at] mit [dot] edu")
#login to reddit
r = praw.Reddit(user_agent=my_user_agent)
r.login('username','password')
#create a top submissions bin
new_submissions_generator = r.get_subreddit('pics').get_new(limit=100)
#connect to & prep database
new_redditdb.connect()
for submission in new_submissions_generator:
#store attributes of each submission in a dictionary
info = {
'author_name': submission.author.name,
'created_time': submission.created_utc,
'reddit_id': submission.id,
#'downvotes': submission.downs,
#'upvotes': submission.ups,
#'comment_count': submission.num_comments,
#'score': submission.score,
'title': submission.title,
'submitted_url': submission.url,
'domain': submission.domain,
'reddit_shortlink': submission.short_link,
'imgur': None
}
#parse imgur ids to catch direct links
if "http://i.imgur.com" in submission.url:
this_url = submission.url[19:]
imgur_id = this_url[:-4]
info['imgur'] = imgur_id
if "http://imgur.com/" in submission.url:
if "/a/" in submission.url:
imgur_id = submission.url[19:]
info['imgur'] = imgur_id
elif "/gallery/" in submission.url:
imgur_id = submission.url[24:]
else:
imgur_id = submission.url[17:]
info['imgur'] = imgur_id
#write to database
new_redditdb.insert(info)
new_redditdb.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment