Created
October 18, 2017 23:33
-
-
Save James-E-A/a8b90d324d09a8af83dcc2ff95d05fed to your computer and use it in GitHub Desktop.
Clean up Reddit's defective Atom feed. Runs locally or as CGI.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import cgi | |
#import cgitb | |
#cgitb.enable() | |
import json | |
import urllib.request | |
import urllib.parse | |
from html import unescape | |
from werkzeug.contrib.atom import AtomFeed | |
from datetime import datetime | |
from pytz import utc | |
import sys, os | |
form=cgi.FieldStorage() | |
if form.getvalue("url"): | |
BASE_URL=form.getvalue("url") | |
elif len(sys.argv) >= 3: | |
BASE_URL=sys.argv[2] | |
else: | |
BASE_URL="http://reddit.com/r/{}/.json" | |
if form.getvalue("r"): | |
SUBREDDIT=form.getvalue("r") | |
elif len(sys.argv) >= 2: | |
SUBREDDIT=sys.argv[1] | |
else: | |
SUBREDDIT="all" | |
RSS_URL=BASE_URL.format(SUBREDDIT) | |
headers={'User-Agent': "RedditAtom 0.4"} | |
if "REMOTE_ADDR" in os.environ: | |
headers['X-Forwarded-For']=os.environ["REMOTE_ADDR"] | |
with urllib.request.urlopen( | |
urllib.request.Request(url=RSS_URL, headers=headers) | |
) as response: | |
response_json = json.loads( | |
response.read().decode() | |
) | |
posts = [post['data'] for post in response_json['data']['children'] if post['kind'] == 't3'] | |
atom = AtomFeed(title='/r/' + RSS_URL.rstrip('.json').split('/r/')[-1], feed_url=RSS_URL) | |
for post in posts: | |
atom.add( | |
title = unescape(post['title']), | |
content = unescape(post['selftext_html']) if post['selftext_html'] else None, | |
content_type = "html", | |
url = post['permalink'], | |
author = post['author'], | |
id = post['id'], | |
updated = datetime.fromtimestamp(post['created_utc'], tz=utc), | |
links = [{'href': post['url'], 'rel': "self" if post['url'].endswith(post['permalink']) else "enclosure"}] | |
) | |
if "REMOTE_ADDR" in os.environ: | |
sys.stdout.buffer.write('Content-type: application/atom+xml\n'.encode()) | |
sys.stdout.buffer.write('\n'.encode()) | |
sys.stdout.buffer.write(atom.to_string().encode()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment