Skip to content

Instantly share code, notes, and snippets.

@wassname
Created September 15, 2018 07:55
Show Gist options
  • Select an option

  • Save wassname/422de878da51bd9e0ae321dfa5fefc5a to your computer and use it in GitHub Desktop.

Select an option

Save wassname/422de878da51bd9e0ae321dfa5fefc5a to your computer and use it in GitHub Desktop.
Scraping user flairs from TheCulture subreddit
# coding: utf-8
import praw
import json
from tqdm import tqdm
# I store my secret here and .gitignore it rather than risk commiting passwords
secrets = json.load(open('.secrets/reddit.json'))
userAgent = 'python:thecultureflairs.py:v1.0 (by {})'.format(secrets['username'])
reddit = praw.Reddit(user_agent=userAgent, **secrets)
assert reddit.user.me() == secrets['username'], 'should return usename'
print('user:', reddit.user.me())
subreddit = reddit.subreddit('theculture')
data = {}
# Get a sample of top submissions
all_submission = []
for time_filter in ['day', 'month', 'week', 'year', 'all']:
all_submission += list(subreddit.top(time_filter=time_filter))
all_submission = set(all_submission) # make unique
# walk through comments finding flairs and usernames
for submission in tqdm(all_submission, desc='submissions'):
submission.comments.replace_more()
for comment in submission.comments.list():
if isinstance(comment, praw.models.reddit.more.MoreComments):
print("Ops got a MoreCommments object, ignoring", comment)
elif isinstance(comment, praw.models.reddit.comment.Comment):
if comment.author_flair_text:
data[comment.author_flair_text]=comment.author.name
# format for reddit post
sorted_flairs = sorted([k for k in data.keys() if k])
sorted_flairs
s = ''
for flair in sorted_flairs:
username = data[flair]
s+='[{}]({})\n\n'.format(flair, username)
print(s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment