Last active
September 5, 2017 21:47
-
-
Save benrules2/f50dae985674c9f95c393bb052d7edba to your computer and use it in GitHub Desktop.
Reddit Sorry Counter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import praw | |
def get_subreddit_comments(reddit_agent, subreddit, comments_out = [], count = 100): | |
try: | |
sub = reddit_agent.get_subreddit(subreddit) | |
comments_raw = sub.get_comments(sub, limit=count) | |
comments_flat = praw.helpers.flatten_tree(comments_raw) | |
for comment in comments_flat: | |
try: | |
if hasattr(comment, 'comments'): | |
for reply in comment.comments: | |
comments_out.append(reply.body) | |
else: | |
comments_out.append(comment.body) | |
except: | |
pass | |
except: | |
pass | |
def get_reddit_agent(user_agent, client_id, client_secret, redirect='http://127.0.0.1'): | |
reddit_agent = praw.Reddit(user_agent = 'sorry counter') | |
reddit_agent.set_oauth_app_info(client_id = client_id, | |
client_secret = client_secret, | |
redirect_uri = redirect) | |
return reddit_agent | |
def get_sorry_and_word_count(comment_list, apologies = ['sorry', 'apologies']): | |
sorry_count = 0 | |
word_count = 0 | |
for comment in comment_list: | |
words = comment.split(' ') | |
word_count += int(len(words)) | |
for word in words: | |
for apology in apologies: | |
sorry_count += word.lower().count(apology) | |
return sorry_count, word_count | |
if __name__ == "__main__": | |
client_id = 'your_client_id' | |
client_secret = 'your_client_secret' | |
reddit_agent = get_reddit_agent('custom name for app', client_id, client_secret) | |
#list of canadian reddits to search | |
canada_reddits = ['canada','alberta','britishcolumbia','Manitoba','NewBrunswickCanada', 'newfoundland', | |
'NovaScotia','nunavut','NWT','ontario','PEI', 'saskatchewan','Yukon'] | |
#empty list where comments will be added | |
canada_comments = [] | |
for subreddit in canada_reddits: | |
get_subreddit_comments(reddit_agent, subreddit, canada_comments, count = (1000 / len(canada_reddits))) | |
canada_sorry, canada_total_words = get_sorry_and_word_count(canada_comments) | |
#empty list for world comments to be added | |
world_comments = [] | |
#Using all to represent the global sample, so only 1 subreddit passed in | |
get_subreddit_comments(reddit_agent, 'all', world_comments, count = 1000) | |
world_sorry, world_total_words = get_sorry_and_word_count(world_comments) | |
#print results | |
print("Canada {} / {}, All {} / {}".format(canada_sorry, canada_total_words, world_sorry, world_total_words)) | |
print("Canada {}% sorry, World {}%".format(float(canada_sorry)/canada_total_words * 100, float(world_sorry)/world_total_words*100)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment