Skip to content

Instantly share code, notes, and snippets.

@Aareon
Last active June 5, 2021 01:44
Show Gist options
  • Save Aareon/bde0049880718a98934822c5702a6a1c to your computer and use it in GitHub Desktop.
Save Aareon/bde0049880718a98934822c5702a6a1c to your computer and use it in GitHub Desktop.
#!python3
import sys
import time
from pathlib import Path
import requests
from pyrate_limiter import Duration, Limiter, RequestRate
try:
import ujson as json
except ImportError:
import json
HERE = Path(__file__).parent
CACHE_DIR = HERE / "_cache"
CACHE_MAX_AGE = 5 * 60 # cache lasts 5 minutes
USERAGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
# limit 1 requests per 5 minutes
limiter = Limiter(RequestRate(5, Duration.MINUTE))
n_limit = 1
@limiter.ratelimit('top', delay=True, max_delay=10)
def get_top_json(sub, limit, when, session=None):
if session is None:
session = requests.session()
session.headers['User-Agent'] = USERAGENT
url = f"https://reddit.com/r/{sub}/top.json?t={when}&limit={limit}"
print(f"Getting {url}")
r = session.get(url)
return r
def help_usage():
return (f'usage: {Path(__file__).name} {{sub}} {{count}} {{when}}\n\n'
' sub the subreddit to scrape\n'
' limit number of posts to gather\n'
' when "hour", "day", "week", "month", "year", "all"\n\n'
f'Example: {Path(__file__).name} thenewboston 10 week')
def overwrite_file(fp, text):
try:
with open(fp, 'w+', encoding='utf-8') as f:
f.seek(0)
f.write(text)
f.truncate()
return True
except Exception as e:
raise e
def main():
if len(sys.argv) == 1:
print('Missing arguments')
print(help_usage())
sys.exit(1)
args = sys.argv[1:]
try:
sub = args[0]
limit = args[1]
when = args[2]
except IndexError:
print('Missing arguments')
print(help_usage())
sys.exit(1)
# check cache exists
if not CACHE_DIR.exists():
CACHE_DIR.mkdir(parents=True)
# check if cache is recent
curr_time = time.time()
json_fp = CACHE_DIR / f'{sub}-top-{limit}-{when}.json'
if json_fp.exists():
ft = json_fp.stat().st_mtime
if curr_time - ft >= CACHE_MAX_AGE:
print("Cache is old")
cache_valid = False
else:
print(f"Using cached result: {json_fp}")
cache_valid = True
else:
print("No cache found")
cache_valid = False
if not cache_valid:
r = get_top_json(sub, limit, when)
if r.status_code != 200:
print("Failed to download JSON. Got status code {r.status_code}")
sys.exit(1)
print("Downloaded!")
overwrite_file(json_fp, r.text)
data = r.text
else:
with open(json_fp) as f:
data = f.read()
data = json.loads(data)
print()
posts = []
for post in data['data']['children']:
post = post['data']
title = post['title']
author = post['author']
ups = post['ups']
created_utc = post['created']
posts.append({
'title': title,
'author': author,
'upvotes': ups,
'created': created_utc
})
print(f"Title: {title}")
print(f"Author: {author}")
print(f"Karma: {ups}")
print(f"Created UTC: {created_utc}")
print()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment