Skip to content

Instantly share code, notes, and snippets.

@j-mcc1993
Last active July 16, 2020 17:19
Show Gist options
  • Save j-mcc1993/a3bd0cdd9895a0ddb833ce915bfb0bef to your computer and use it in GitHub Desktop.
Save j-mcc1993/a3bd0cdd9895a0ddb833ce915bfb0bef to your computer and use it in GitHub Desktop.
Reddit Top Post-Time Histogram
#!/usr/local/bin/python3.5
import datetime
import praw
import numpy
import matplotlib.pyplot as plt
# Constant values
hours_in_day = 24
pacific_time_offset = 7
lim = 400
# List of subreddits to scrape
subreddits = ['announcements', 'iama', 'askreddit', 'funny', 'pics',
'space', 'books', 'itookapicture', 'television', 'science',
'todayilearned', 'worldnews', 'gaming', 'videos',
'movies', 'aww', 'blog', 'news', 'music']
# Connect to Reddit and identify script
my_user_agent = 'PostTimeRegression v1.0 by /u/I_Am_Treebeard'
# Get reddit instance
print('Getting reddit instance...')
r = praw.Reddit(client_id = 'my_client_id',
client_secret = 'my_client_secret',
user_agent = my_user_agent)
print('Done.')
# Grab post time data from submission
def postTime(post):
date = datetime.datetime.fromtimestamp(post.created)
pt = (date.hour - pacific_time_offset) % hours_in_day
return pt
# Empty list to store post times for each submission
post_times = []
# Loop through subreddits and top posts to collect time data
for sub in subreddits:
subreddit = r.subreddit(sub)
submissions = subreddit.top(limit = lim)
for s in submissions:
post_times.append(postTime(s))
# Plot histogram with 24 bins representing each hour of the day
plt.hist(post_times, hours_in_day, facecolor='blue', alpha=0.5)
plt.xlabel('Hour of Day (Pacific Time)')
plt.ylabel('Number of Posts')
plt.title('Post-Times of Most Popular Reddit Posts')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment