Last active
July 4, 2023 07:24
-
-
Save nyxiereal/1661573c25222392f9c7c82fee2e9fc6 to your computer and use it in GitHub Desktop.
Basic Reddit API scraper for subreddits
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import praw | |
import os | |
import shutil | |
import requests | |
from tqdm import tqdm | |
# Initialize PRAW with your Reddit API credentials | |
# Get your API creds at https://www.reddit.com/prefs/apps | |
reddit = praw.Reddit( | |
client_id='YOUR_CLIENT_ID', | |
client_secret='YOUR_CLIENT_SECRET', | |
user_agent='web:com.YOUR-SHORT-NAME.fems2:v1 (by u/YOUR-USER-NAME)' | |
) | |
# Define the subreddit and time range | |
subreddit_name = 'subreddit.name' | |
time_range = 'year' # Options: 'hour', 'day', 'week', 'month', 'year', 'all' | |
post_score_min = 100 | |
# Get top posts in the given time range | |
subreddit = reddit.subreddit(subreddit_name) | |
top_posts = subreddit.top(time_filter=time_range, limit=None) | |
# Filter posts with more than 100 upvotes | |
filtered_posts = [post for post in top_posts if post.score > post_score_min] | |
# Iterate over the filtered posts and download media files | |
for post in filtered_posts: | |
if post.is_self or not post.url: | |
continue # Skip self-posts and posts without URLs | |
file_url = post.url | |
print(file_url) | |
with open(f"r{subreddit_name}.txt", "a") as file: | |
file.write(file_url + "\n") | |
print("Download complete!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
downloader for this, doesn't work for RedGifs and Reddit albums!