Created
September 15, 2018 07:55
-
-
Save wassname/422de878da51bd9e0ae321dfa5fefc5a to your computer and use it in GitHub Desktop.
Scraping user flairs from TheCulture subreddit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # coding: utf-8 | |
| import praw | |
| import json | |
| from tqdm import tqdm | |
| # I store my secret here and .gitignore it rather than risk commiting passwords | |
| secrets = json.load(open('.secrets/reddit.json')) | |
| userAgent = 'python:thecultureflairs.py:v1.0 (by {})'.format(secrets['username']) | |
| reddit = praw.Reddit(user_agent=userAgent, **secrets) | |
| assert reddit.user.me() == secrets['username'], 'should return usename' | |
| print('user:', reddit.user.me()) | |
| subreddit = reddit.subreddit('theculture') | |
| data = {} | |
| # Get a sample of top submissions | |
| all_submission = [] | |
| for time_filter in ['day', 'month', 'week', 'year', 'all']: | |
| all_submission += list(subreddit.top(time_filter=time_filter)) | |
| all_submission = set(all_submission) # make unique | |
| # walk through comments finding flairs and usernames | |
| for submission in tqdm(all_submission, desc='submissions'): | |
| submission.comments.replace_more() | |
| for comment in submission.comments.list(): | |
| if isinstance(comment, praw.models.reddit.more.MoreComments): | |
| print("Ops got a MoreCommments object, ignoring", comment) | |
| elif isinstance(comment, praw.models.reddit.comment.Comment): | |
| if comment.author_flair_text: | |
| data[comment.author_flair_text]=comment.author.name | |
| # format for reddit post | |
| sorted_flairs = sorted([k for k in data.keys() if k]) | |
| sorted_flairs | |
| s = '' | |
| for flair in sorted_flairs: | |
| username = data[flair] | |
| s+='[{}]({})\n\n'.format(flair, username) | |
| print(s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment