Created
October 24, 2018 19:42
-
-
Save KobaKhit/86cc56202cc3ec08c3a7c1bd7f011977 to your computer and use it in GitHub Desktop.
A class that enables user to download posts and comments from a subreddit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Reddit(): | |
def __init__(self,client_id, client_secret,user_agent='My agent'): | |
self.reddit = praw.Reddit(client_id=client_id, | |
client_secret=client_secret, | |
user_agent=user_agent) | |
def get_comments(self, submission): | |
# get comments information using the Post as a starting comment | |
comments = [RedditComment(author=submission.author, | |
commentid = submission.postid, | |
postid = submission.postid, | |
parentid = submission.postid, | |
body = submission.body, | |
created = submission.created, | |
score = submission.score, | |
num_char = len(submission.body), | |
num_words = len(submission.body.split(' ')))] | |
submission.comments.replace_more(limit=None) | |
for com in submission.comments.list(): | |
body = com.body | |
comment = RedditComment(author = com.author.name if com.author is not None else 'None', | |
commentid = com.id, | |
postid = post.postid, | |
parentid = com.parent().id, # get parent comment id | |
body = body, | |
created = datetime.utcfromtimestamp(com.created_utc), | |
score = com.score, | |
num_char = len(body), | |
num_words = len(body.split(' '))) | |
comments.append(comment) | |
return(comments) | |
def get_submissions(self, subreddit, time_filter='day', comments=False): | |
posts = [] | |
for submission in self.reddit.subreddit(subreddit).top(time_filter): | |
# get post information | |
post = RedditPost(postid = submission.id, | |
author = submission.author.name if submission.author is not None else 'None', | |
title = submission.title, | |
body = submission.selftext, | |
created = datetime.utcfromtimestamp(submission.created_utc), | |
score = submission.score, | |
num_comments = submission.num_comments, | |
comments = submission.comments) | |
post.comments = self.get_comments(post) if comments else None | |
posts.append(post) | |
return(posts) | |
@staticmethod | |
def posts_to_df(posts): | |
post_keys = [k for k in RedditPost.__dataclass_fields__.keys() if k != 'comments'] | |
return(pd.DataFrame([{k: x.__dict__[k] for k in post_keys} for x in posts])) | |
@staticmethod | |
def comments_to_df(posts): | |
return(pd.DataFrame([y.__dict__ for x in subs for y in x.comments])) | |
def main(): | |
reddit = Reddit(client_id, client_secret, user_agent) | |
posts = reddit.get_submissions('lakers', comments = True) # download posts and comments | |
df_posts = reddit.posts_to_df(subs) # create posts dataframe | |
df_comments = reddit.comments_to_df(subs) # create comments dataframe | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment