Skip to content

Instantly share code, notes, and snippets.

@timothycarambat
Last active May 7, 2019 03:50
Show Gist options
  • Save timothycarambat/a64baf0f18e6d4916044a5aaaa2cec5d to your computer and use it in GitHub Desktop.
Save timothycarambat/a64baf0f18e6d4916044a5aaaa2cec5d to your computer and use it in GitHub Desktop.
Reddit Moderator Comment Analysis for u/kenotism for thesis project! https://www.reddit.com/r/programmingrequests/comments/bkdjcf/reddit_web_scraping/
import csv, json, os
from datetime import datetime
import praw
def get_reddit():
return praw.Reddit(client_id='',
client_secret = '',
user_agent='Praw')
def get_mod_data():
users_file = 'users.csv'
moderators = []
with open(users_file) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
next(csv_reader, None)
for row in csv_reader:
if row[0] == '':
continue
moderators.append({
'name': row[0],
'subreddit': row[1],
})
return moderators
def get_comments_for_mod_in_sub(reddit, mod_info):
moderator_comments = []
for comment in reddit.redditor(mod_info['name']).comments.new(limit=None):
print(comment.subreddit.display_name.lower())
if comment.subreddit.display_name.lower() == mod_info['subreddit'].replace('r/','').lower():
moderator_comments.append({
'user': mod_info['name'],
'subreddit': mod_info['subreddit'],
'date': comment.created_utc,
'comment': comment.body
})
if len(moderator_comments) == 100:
break
return moderator_comments
def get_data():
reddit = get_reddit()
moderators = get_mod_data()
data = []
for moderator in moderators:
comment_list = get_comments_for_mod_in_sub(reddit, moderator)
data.append(comment_list)
f = open("data.json", "w+")
f.write(json.dumps(data))
f.close()
def process_data():
with open('data.json') as json_file:
data = json.load(json_file)
for file in os.listdir('outputs'):
os.remove('outputs/' + file)
for collection in data:
user = collection[0]['user']
print("Making Collection for %s" % user )
with open('outputs/%s_mod_comments.csv' % user, 'a') as writeFile:
writer = csv.writer(writeFile)
writer.writerow(["User", "Subreddit", "User URL", "Date", "Complete Comment" ,"Analysis Collumn"])
for post in collection:
writer.writerow( [post['user'],
post['subreddit'],
"https://www.reddit.com/user/%s" % post['user'],
datetime.utcfromtimestamp(post['date']).strftime('%Y-%m-%d %H:%M:%S'),
post['comment'].encode('utf-8'),
''] )
get_data()
process_data()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment