Created
September 27, 2013 22:49
-
-
Save peteyreplies/6736315 to your computer and use it in GitHub Desktop.
a python script, using PRAW, to scrape the top 25 submissions in r/pics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pull a list of the top 25 submissions in r/pics | |
import praw | |
import json | |
from pprint import pprint | |
import sqlite3 | |
import new_redditdb | |
#set user agent to identify to reddit | |
my_user_agent = ("tracking new submissions to /r/pics by /u/peteyMIT - email petey [at] mit [dot] edu") | |
#login to reddit | |
r = praw.Reddit(user_agent=my_user_agent) | |
r.login('username','password') | |
#create a top submissions bin | |
new_submissions_generator = r.get_subreddit('pics').get_new(limit=100) | |
#connect to & prep database | |
new_redditdb.connect() | |
for submission in new_submissions_generator: | |
#store attributes of each submission in a dictionary | |
info = { | |
'author_name': submission.author.name, | |
'created_time': submission.created_utc, | |
'reddit_id': submission.id, | |
#'downvotes': submission.downs, | |
#'upvotes': submission.ups, | |
#'comment_count': submission.num_comments, | |
#'score': submission.score, | |
'title': submission.title, | |
'submitted_url': submission.url, | |
'domain': submission.domain, | |
'reddit_shortlink': submission.short_link, | |
'imgur': None | |
} | |
#parse imgur ids to catch direct links | |
if "http://i.imgur.com" in submission.url: | |
this_url = submission.url[19:] | |
imgur_id = this_url[:-4] | |
info['imgur'] = imgur_id | |
if "http://imgur.com/" in submission.url: | |
if "/a/" in submission.url: | |
imgur_id = submission.url[19:] | |
info['imgur'] = imgur_id | |
elif "/gallery/" in submission.url: | |
imgur_id = submission.url[24:] | |
else: | |
imgur_id = submission.url[17:] | |
info['imgur'] = imgur_id | |
#write to database | |
new_redditdb.insert(info) | |
new_redditdb.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment