Skip to content

Instantly share code, notes, and snippets.

@Zulux91
Created March 26, 2018 17:03
Show Gist options
  • Save Zulux91/36abac603fe6098741375b31a21fcf62 to your computer and use it in GitHub Desktop.
Save Zulux91/36abac603fe6098741375b31a21fcf62 to your computer and use it in GitHub Desktop.
A simple python script for ripping in a .txt file every entry in a subreddit.
#Import Python Reddit API Wrapper
import praw
#Import time to get the latest time
import time
#datetime to do unix/readable translations
import datetime
#We use this for making files and folders
import os
#main code
def main():
#Connect API to my APP
reddit = praw.Reddit(client_id='', client_secret='', password='', user_agent='', username='')
#ask what sub to archive
subname = input('Input the subreddit to archive: ')
#Set subreddit to what the user input
subreddit = reddit.subreddit(subname)
#Archive an entire subreddit
archive(subreddit,subname)
def archive(subreddit,subname):
#make a folder named after the subreddit we're archiving if it doesn't currently exist
if not os.path.exists(subname):
os.makedirs(subname)
#get the start date
date1 = input('Input the start date YYYY/MM/DD: ')
#make it unix
date1 = time.mktime(datetime.datetime.strptime(date1, "%Y/%m/%d").timetuple())
#adds 1 day to the first date - "Get all links from the first day"
date2 = date1 + 86400
#enters the directory we just made using the cd class made below
#outside of the 'with' it goes back to the folder where the script is held
with cd(subname):
#loop until the current datetime
while date2 < time.time():
#for each submission between the two dates, process them
for submission in subreddit.submissions(date1,date2):
process_submission(submission)
#add a day onto it
date1+=86400
date2+=86400
#What to do with each submission
def process_submission(submission):
title = submission.title
#strip invalid characters from submissions title
title = title.translate({ord(i):None for i in '/><?:|*"'})
#translate doesn't work for backslashes lol
title = title.replace("\\","")
#strip whitespaces newlines etc
title = title.strip()
#Max length is 255 chars in Windows
title=title[:240]
#check if it's already archived
if not os.path.exists(title):
#print the title to console
print(title)
#make a file <post name>.txt if it doesn't already exist
file = open(title+".txt","w", encoding='utf-8')
#some metadata
file.write("ID: "+submission.id+"\n")
#make a readable date and write to the file
readabledate = (datetime.datetime.fromtimestamp(int(submission.created)).strftime('%Y-%m-%d %H:%M:%S'))
file.write("date: "+str(readabledate)+"\n")
file.write("author: "+submission.author.name+"\n")
file.write("url: "+submission.url+"\n")
#write the selftext
if submission.selftext != "":
file.write("\n---------------------------------------\n\n")
file.write(submission.selftext)
file.write("\n\n---------------------------------------\n\n")
submission.comments.replace_more(limit=None)
for comment in submission.comments.list():
readablecommentdate = (datetime.datetime.fromtimestamp(int(comment.created)).strftime('%Y-%m-%d %H:%M:%S'))
file.write(comment.id+" // ")
#if the author deleted their account it fails so
if comment.author != None:
file.write(comment.author.name)
file.write(" // "+readablecommentdate+"\n")
file.write(comment.body+"\n\n")
file.close()
#so we can change directories easily
class cd:
#Context manager for changing the current working directory
def __init__(self, newPath):
self.newPath = os.path.expanduser(newPath)
def __enter__(self):
self.savedPath = os.getcwd()
os.chdir(self.newPath)
def __exit__(self, etype, value, traceback):
os.chdir(self.savedPath)
#actually run it lol
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment