Last active
May 7, 2019 03:50
-
-
Save timothycarambat/a64baf0f18e6d4916044a5aaaa2cec5d to your computer and use it in GitHub Desktop.
Reddit Moderator Comment Analysis for u/kenotism for thesis project! https://www.reddit.com/r/programmingrequests/comments/bkdjcf/reddit_web_scraping/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv, json, os | |
from datetime import datetime | |
import praw | |
def get_reddit(): | |
return praw.Reddit(client_id='', | |
client_secret = '', | |
user_agent='Praw') | |
def get_mod_data(): | |
users_file = 'users.csv' | |
moderators = [] | |
with open(users_file) as csv_file: | |
csv_reader = csv.reader(csv_file, delimiter=',') | |
next(csv_reader, None) | |
for row in csv_reader: | |
if row[0] == '': | |
continue | |
moderators.append({ | |
'name': row[0], | |
'subreddit': row[1], | |
}) | |
return moderators | |
def get_comments_for_mod_in_sub(reddit, mod_info): | |
moderator_comments = [] | |
for comment in reddit.redditor(mod_info['name']).comments.new(limit=None): | |
print(comment.subreddit.display_name.lower()) | |
if comment.subreddit.display_name.lower() == mod_info['subreddit'].replace('r/','').lower(): | |
moderator_comments.append({ | |
'user': mod_info['name'], | |
'subreddit': mod_info['subreddit'], | |
'date': comment.created_utc, | |
'comment': comment.body | |
}) | |
if len(moderator_comments) == 100: | |
break | |
return moderator_comments | |
def get_data(): | |
reddit = get_reddit() | |
moderators = get_mod_data() | |
data = [] | |
for moderator in moderators: | |
comment_list = get_comments_for_mod_in_sub(reddit, moderator) | |
data.append(comment_list) | |
f = open("data.json", "w+") | |
f.write(json.dumps(data)) | |
f.close() | |
def process_data(): | |
with open('data.json') as json_file: | |
data = json.load(json_file) | |
for file in os.listdir('outputs'): | |
os.remove('outputs/' + file) | |
for collection in data: | |
user = collection[0]['user'] | |
print("Making Collection for %s" % user ) | |
with open('outputs/%s_mod_comments.csv' % user, 'a') as writeFile: | |
writer = csv.writer(writeFile) | |
writer.writerow(["User", "Subreddit", "User URL", "Date", "Complete Comment" ,"Analysis Collumn"]) | |
for post in collection: | |
writer.writerow( [post['user'], | |
post['subreddit'], | |
"https://www.reddit.com/user/%s" % post['user'], | |
datetime.utcfromtimestamp(post['date']).strftime('%Y-%m-%d %H:%M:%S'), | |
post['comment'].encode('utf-8'), | |
''] ) | |
get_data() | |
process_data() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment