Last active
December 14, 2015 23:27
-
-
Save milesrout/a88b5dff38ea03eb2f30 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
import itertools | |
import praw | |
import time | |
# maximum 1000 | |
SUBMISSION_LIMIT = 100 | |
def flatten(it): | |
for x in it: | |
if (isinstance(x, collections.Iterable) and not isinstance(x, str)): | |
for y in flatten(x): | |
yield y | |
else: | |
yield x | |
reddit = praw.Reddit('submissions/comment scraper by /u/milesrout v0.1.') | |
subreddit_names = [ | |
'newzealand', | |
'programming' | |
] | |
subreddit_authors = {} | |
# takes iterable of comments, gives those comments and all of their replies | |
def all_comments_impl(comments): | |
return itertools.chain(comments, | |
itertools.chain.from_iterable( | |
all_comments_impl(c.replies) for c in cfilt(comments))) | |
def cfilt(it): | |
return filter(is_comment_with_author, it) | |
def is_comment_with_author(comm): | |
return not isinstance(comm, praw.objects.MoreComments) and comm.author is not None | |
# takes submission, gives not just top-level comments but ALL comments | |
def all_comments(submission): | |
return cfilt(all_comments_impl(cfilt(submission.comments))) | |
for j, subname in enumerate(subreddit_names): | |
subreddit = reddit.get_subreddit(subname) | |
authors = set() | |
for i, subm in enumerate(subreddit.get_hot(limit=SUBMISSION_LIMIT)): | |
print(100 * float(i + SUBMISSION_LIMIT*j) / (SUBMISSION_LIMIT * len(subreddit_names)) + "%") | |
for comm in all_comments(subm): | |
if comm.author is not None: | |
authors.add(comm.author.name) | |
subreddit_authors[subname] = authors | |
print set.intersection(*(subreddit_authors[k] for k in subreddit_authors)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment