Created
April 12, 2013 21:33
-
-
Save nikolak/5375314 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# | |
# Script Name: getRedditJSONSubmissionData.py | |
# Usage: ./getRedditJSONSubmissionData.py > redditData.json | |
# ---------------------------------------------------------------------------- | |
# This script will average one request every two seconds. | |
# If the servers return data faster, you might | |
# need to change the sleep time to avoid going over the API limits. | |
# Also, make sure you change the settings in your Reddit account | |
# to get 100 objects at a time. You can also use the URL variable "limit=100" | |
#(it might be count=100?) | |
# | |
# Also, the code to handle errors if a non-status 200 response is | |
# received should be improved to | |
# eventually stop requesting after X amount of failures -- | |
# this might happen if Reddit's servers go down | |
# for an extended time period. | |
# --------------------------------------------------------------------------- | |
import json | |
import time | |
import argparse | |
import requests | |
# ===== To modify ====== | |
user_agent = "My Awesome Reddit Python Script" | |
maximum_retries = 5 | |
debug = False | |
# ====================== | |
base_url = "http://www.reddit.com/r/{sub}/new/.json?limit=100&after={aft}" | |
def main(username, password, subreddit, output_file, mode): | |
after = "" | |
if username is None or password is None: | |
session = requests.Session() | |
session.headers.update({'User-Agent': user_agent}) | |
else: | |
user_pass_dict = {'user': username, | |
'passwd': password, | |
'api_type': 'json'} | |
session = requests.Session() | |
session.headers.update({'User-Agent': user_agent}) | |
request = session.post(r'http://www.reddit.com/api/login', | |
data=user_pass_dict) | |
json_data = json.loads(request.content) # ? | |
current_retries = 0 # Count of how many times previous request failed. | |
while True: | |
output = open(output_file, mode) | |
# Not using with open(file...) to avoid | |
# constantly opening and closing file | |
# raises IOError if it can't access/open file | |
time.sleep(2) # Sleep for 2 seconds to avoid going over API limit | |
url = base_url.format(sub=subreddit, aft=after) | |
print("Getting data from: {} ...".format(url)) | |
html = session.get(url) # Make request to Reddit API | |
if html.status_code != 200: | |
# Error handing block | |
output.write(str(html.status_code) + '\n' + url + '\n') | |
# Print HTTP error status code to STDOUT | |
current_retries += 1 | |
print("Retrying {} | {}/{}".format(url, current_retries, | |
maximum_retries)) | |
if current_retries >= maximum_retries: | |
after = "" | |
current_retries = 0 | |
# End Error handling block | |
else: | |
try: | |
url_data = json.loads(html.content) | |
after = url_data['data']['after'] | |
# Update after variable to receive the | |
# next batch of submissions in this loop | |
for submission in url_data['data']['children']: | |
out_string = "Submission in {} by {}".format( | |
submission['data']['subreddit'], | |
submission['data']['author']) | |
output.write(out_string+"\n") | |
print(out_string) | |
except: | |
print("Error") | |
if current_retries > 0: | |
current_retries = 0 | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='Python Reddit Script') | |
# parser.add_argument('-a', action="store_true", default=False) | |
parser.add_argument('-u', action="store", dest="username", default=None, | |
help='Yur reddit username') | |
parser.add_argument('-p', action="store", dest="password", default=None, | |
help='Your reddit password') | |
parser.add_argument('-r', action="store", dest="subreddit", default="all", | |
help='Subreddit to fetch data from') | |
parser.add_argument('-m', action="store", dest="mode", default="a", | |
help='File mode, "a" will append to previous file "r"\ | |
" will overwrite old data if there is any') | |
parser.add_argument('filename', action="store", | |
help='Filename to save data to.') | |
arg = parser.parse_args() | |
main(arg.username, arg.password, arg.subreddit, arg.filename, arg.mode) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment