Skip to content

Instantly share code, notes, and snippets.

@milesrout
Last active December 14, 2015 23:27
Show Gist options
  • Save milesrout/a88b5dff38ea03eb2f30 to your computer and use it in GitHub Desktop.
Save milesrout/a88b5dff38ea03eb2f30 to your computer and use it in GitHub Desktop.
import collections
import itertools
import praw
import time
# maximum 1000
SUBMISSION_LIMIT = 100
def flatten(it):
for x in it:
if (isinstance(x, collections.Iterable) and not isinstance(x, str)):
for y in flatten(x):
yield y
else:
yield x
reddit = praw.Reddit('submissions/comment scraper by /u/milesrout v0.1.')
subreddit_names = [
'newzealand',
'programming'
]
subreddit_authors = {}
# takes iterable of comments, gives those comments and all of their replies
def all_comments_impl(comments):
return itertools.chain(comments,
itertools.chain.from_iterable(
all_comments_impl(c.replies) for c in cfilt(comments)))
def cfilt(it):
return filter(is_comment_with_author, it)
def is_comment_with_author(comm):
return not isinstance(comm, praw.objects.MoreComments) and comm.author is not None
# takes submission, gives not just top-level comments but ALL comments
def all_comments(submission):
return cfilt(all_comments_impl(cfilt(submission.comments)))
for j, subname in enumerate(subreddit_names):
subreddit = reddit.get_subreddit(subname)
authors = set()
for i, subm in enumerate(subreddit.get_hot(limit=SUBMISSION_LIMIT)):
print(100 * float(i + SUBMISSION_LIMIT*j) / (SUBMISSION_LIMIT * len(subreddit_names)) + "%")
for comm in all_comments(subm):
if comm.author is not None:
authors.add(comm.author.name)
subreddit_authors[subname] = authors
print set.intersection(*(subreddit_authors[k] for k in subreddit_authors))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment