Skip to content

Instantly share code, notes, and snippets.

@alexwlchan
Created December 10, 2015 08:33
Show Gist options
  • Save alexwlchan/01cec115a6f51d35ab26 to your computer and use it in GitHub Desktop.
Save alexwlchan/01cec115a6f51d35ab26 to your computer and use it in GitHub Desktop.
drangReader – Python scripts for a simple RSS reader

drangreader

This is a set of scripts for aggregating RSS feeds. It's based on a script originally written by Dr. Drang: http://leancrew.com/all-this/2015/11/simpler-syndication/

Installation

Download all the files from this Gist. Put them all in a directory, create a virtualenv and install requirements:

~/drangreader virtualenv env
source env/bin/activate
pip install -r requirements.txt

Put a list of feed URLs in feeds.txt. One feed per line. To create the HTML file:

python main.py

Assuming nothing goes wrong, the posts will be written to output.html.

# encoding = utf-8
"""
This file contains the logic for filtering/munging posts. It's kept in
a separate file from the main feed parsing logic so the commit history
for main.py doesn't get polluted with nitpicks and tweaks.
"""
import collections
# List of keywords to filter
FILTER_WORDS = ['coffee', 'yankees', 'apple watch']
ExtendedPost = collections.namedtuple('Post', [
'time',
'blog',
'title',
'author',
'link',
'body',
'permalink'
])
def remove_final_link(html_text):
return html_text.rsplit('<a', maxsplit=1)[0]
def extract_last_link(html_text):
return html_text.rsplit('"', maxsplit=2)[-2]
def normalise_post(post):
"""
This function takes a post and a blog, and applies some
transformations to normalise the text. This is mostly based on
special cases and lots of if statements.
It returns an ExtendedPost tuple, which includes fields not found
in the regular Post.
It may also return None, which means this post should be hidden.
"""
blog = post.blog
if any(word.lower() in post.body.lower() for word in FILTER_WORDS):
return None
if (blog == 'Marco.org')
if ('coffee' in post.body):
return None
if post.title.startswith(u'→'):
title = post.title[2:]
body = remove_final_link(post.body)
permalink = extract_last_link(post.body)
return ExtendedPost(post.time, post.blog, title, post.author,
post.link, body, permalink)
elif (blog == 'Daring Fireball') and u'★' in post.body:
body = remove_final_link(post.body)
permalink = extract_last_link(post.body)
return ExtendedPost(post.time, post.blog, post.title, post.author,
post.link, body, permalink)
elif (blog == 'Erica Sadun') and (post.author == 'erica'):
return ExtendedPost(post.time, post.blog, post.title,
None, post.link, post.body, None)
return ExtendedPost(*post, permalink=None)
http://feedpress.me/512pixels
http://www.leancrew.com/all-this/feed/
http://ihnatko.com/feed/
http://blog.ashleynh.me/feed
http://www.betalogue.com/feed/
http://bitsplitting.org/feed/
http://feedpress.me/jxpx777
http://kieranhealy.org/blog/index.xml
http://blueplaid.net/news?format=rss
http://brett.trpstra.net/brettterpstra
http://feeds.feedburner.com/NerdGap
http://www.libertypages.com/clarktech/?feed=rss2
http://feeds.feedburner.com/CommonplaceCartography
http://kk.org/cooltools/feed
http://danstan.com/blog/imHotep/files/page0.xml
http://daringfireball.net/feeds/main
http://david-smith.org/atom.xml
http://feeds.feedburner.com/drbunsenblog
http://stratechery.com/feed/
http://www.gnuplotting.org/feed/
http://feeds.feedburner.com/jblanton
http://feeds.feedburner.com/IgnoreTheCode
http://indiestack.com/feed/
http://feedpress.me/inessential
http://feeds.feedburner.com/JamesFallows
http://feeds.feedburner.com/theendeavour
http://feed.katiefloyd.me/
http://feeds.feedburner.com/KevinDrum
http://www.kungfugrippe.com/rss
http://lancemannion.typepad.com/lance_mannion/rss.xml
http://www.caseyliss.com/rss
http://www.macdrifter.com/feeds/all.atom.xml
http://mackenab.com/feed
http://hints.macworld.com/backend/osxhints.rss
http://macsparky.com/blog?format=rss
http://www.macstories.net/feed/
http://www.marco.org/rss
http://merrillmarkoe.com/feed
http://mjtsai.com/blog/feed/
http://feeds.feedburner.com/mygeekdaddy
http://nathangrigg.net/feed.rss
http://onethingwell.org/rss
http://schmeiser.typepad.com/penny_wiseacre/rss.xml
http://feeds.feedburner.com/PracticallyEfficient
http://robjwells.com/rss
http://www.red-sweater.com/blog/feed/
http://feedpress.me/sixcolors
http://feedpress.me/candlerblog
http://inversesquare.wordpress.com/feed/
http://high90.com/feed
http://joe-steel.com/feed
http://feeds.veritrope.com/
http://xkcd.com/atom.xml
http://doingthatwrong.com/?format=rss
#!/usr/bin/env python
# coding=utf8
import collections
from datetime import datetime, timedelta
import time
import feedparser
import jinja2
import pytz
from extras import normalise_post
# Get a list of feed URLs
with open('feeds.txt') as f:
SUBSCRIPTIONS = list(f)
# Date and time setup. I want only posts from "today" and "yesterday",
# where the day lasts until 2 AM.
TIMEZONE = config.get(section='default', option='timezone', fallback='GMT')
# Get the current time in the home timezone, then step back to include
# the last two days.
home_tz = pytz.timezone(TIMEZONE)
dt = datetime.now(home_tz)
if dt.hour < 2:
dt -= timedelta(hours=72)
else:
dt -= timedelta(hours=48)
start = dt.replace(hour=0, minute=0, second=0, microsecond=0)
# Convert this time back into UTC.
utc = pytz.utc
START = start.astimezone(utc)
Post = collections.namedtuple('Post', [
'time',
'blog',
'title',
'author',
'link',
'body'
])
def process_entry(entry, blog):
"""
Coerces an entry from feedparser into a Post tuple.
Returns None if the entry should be excluded.
"""
# Get the date of the post. If it was published more than two days
# ago, drop the entry.
try:
when = entry['updated_parsed']
except KeyError:
when = entry['published_parsed']
when = utc.localize(datetime.fromtimestamp(time.mktime(when)))
if when < START:
return
title = entry['title']
try:
author = entry['author']
except KeyError:
author = ', '.join(a['name'] for a in entry.get('authors', []))
link = entry['link']
try:
body = entry['content'][0]['value']
except KeyError:
body = entry['summary']
return normalise_post(Post(when, blog, title, author, link, body))
posts = []
for url in SUBSCRIPTIONS:
feed = feedparser.parse(url)
try:
blog = feed['feed']['title']
except KeyError:
continue
for entry in feed['entries']:
post = process_entry(entry, blog)
if post:
posts.append(post)
# Get the template, and drop in the posts
with open('template.html') as f:
template = jinja2.Template(f.read())
with open('output.html', 'w') as f:
f.write(template.render(posts=posts, time=datetime.now()))
feedparser==5.2.1
Jinja2==2.8
MarkupSafe==0.23
pytz==2015.7
wheel==0.24.0
body {
font: 12pt Georgia, Palatino, 'Palatino Linotype', Times, 'Times New Roman', serif;
/* font: serif */
color: #222; /* body-gray */
}
a {
color: #732c7b;
}
a:hover {
text-decoration: none;
}
a:visited {
color: #421c52;
}
hr {
background-color: #eee;
height: 1px;
border: none;
}
hr.between_posts {
margin-top: 3em;
margin-bottom: 3em;
}
body {
margin: 0;
padding: 0;
}
#header_text {
padding: 1px 20px 0px 20px;
color: #eeddf9;
}
#header_text a {
color: #eeddf9;
text-decoration: none;
}
img {
margin-left: auto;
margin-right: auto;
display: block;
}
code {
margin: 2px;
padding: 2px;
}
code, pre {
font-family: Menlo, monospace; /* font: mono */
background-color: #eee; /* light-gray */
font-size: 0.9em; /* subfont-size */
}
/**
* The overflow-x line ensures that extra text only appears within the
* confines of the gray background, and doesn't spill out onto the page.
* Effectively the <pre> becomes a 'window' into the code beneath.
*/
pre {
padding: 10px;
line-height: 1.35em;
overflow-x: auto;
}
/*------------------------------------*\
# Footnotes
\*------------------------------------*/
/**
* The padding-tops adds a little extra space between the bottom of an article
* and the start of the footnote section.
*/
.footnote {
font-size: 0.9em; /* subfont-size */
}
/**
* These rules help the positioning of the footnote markers, although I'm not
* entirely sure how they work.
*/
sup, sub {
vertical-align: 0ex;
position: relative;
}
sup { bottom: 1ex; }
sub { top: 0.8ex; }
/*------------------------------------*\
# Article titles
\*------------------------------------*/
.article_title a,
.permalink a,
.continue_reading {
color: #732c7b !important; /* primary-red */
}
.fullpost_title a {
text-decoration: none;
font-size: 1.5em;
line-height: 1.5em;
}
/**
* The article_meta class covers permalinks and posting dates
*/
.article_meta {
font-size: 0.9em; /* subfont-size */
color: #999; /* accent-gray */
}
.linkpost_arrow {
color: #999; /* accent-gray */
}
.permalink a {
font-size: 1.2em;
text-decoration: none;
}
/**
* Adjust the spacing around titles to make them look nice
*/
.linkpost_title {
margin-bottom: -0.5em;
}
.fullpost_title {
margin-bottom: -0.3em;
}
/*------------------------------------*\
# Blockquotes
\*------------------------------------*/
blockquote {
border-left: 5px solid #ccc; /* primary-red */
margin-left: 15px;
margin-right: 0px;
padding: 1px 15px;
color: #666; /* blockquote-gray */
font-style: italic;
}
blockquote p {
margin-top: 10px;
margin-bottom: 10px;
}
/*------------------------------------*\
# Tweets
\*------------------------------------*/
/**
* I think Dr. Drang wrote this originally? Whatever, I have it
* inlined so that all my CSS comes down in a single file.
*/
.bbpBox {
width: 80%;
background: #8ec2da;
margin-left: auto;
margin-right: auto;
padding: 1em;
margin-top: 1em;
margin-bottom: 1.1em;
/* margin: 1em 0em 1.1em 0em;*/
font-family: Georgia !important;
}
.bbpBox blockquote {
background-color: white;
margin: 0em !important;
padding: .75em .75em .5em .75em !important;
-moz-border-radius: 5px;
-webkit-border-radius: 5px;
border-left-style: none !important;
font-style: normal !important;
line-height: 1.5em;
color: #222;
}
.bbpBox blockquote a {
color: blue;
text-decoration: none;
}
.bbpBox blockquote a:hover {
text-decoration: underline;
}
.bbpBox blockquote .twMeta {
font-size: 80%;
}
.bbpBox blockquote .twContent {
margin-bottom: 25em;
}
body {
background-color: #140623;
max-width: 750px;
margin-top: 0;
margin-left: auto;
margin-right: auto;
padding-top: 0;
}
h1 {
font-size: 2.5em;
}
.rss {
list-style-type: none;
margin: 0;
padding: .5em 1em 1em 1.5em;
background-color: white;
margin-bottom: 2em;
}
.rss li {
margin-left: -.5em;
line-height: 1.4;
}
.rss li pre {
overflow: auto;
}
img, figure, iframe {
max-width: 700px;
height: auto !important;
}
@media screen and (max-width: 700px) {
img, figure, iframe {
max-width: 100% !important;
}
}
.footnotes {
font-size: 0.85em;
}
a code {
text-decoration: none !important;
}
footer {
color: #eeddf9;
text-align: center;
margin-bottom: 2.2em;
font-size: 0.85em;
}
footer a {
color: #eeddf9 !important;
}
<html>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width" />
<head>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" type="text/css" href="style.css">
<meta name="apple-mobile-web-app-capable" content="yes" />
<style>
</style>
<title>drangReader: Today’s RSS</title>
<body>
<aside>
<div id="header_text"><h1><a href="javascript:window.location.reload()">drangReader</a></h1></div>
</aside>
<ul class="rss">
{% for post in posts|sort|reverse %}
<li>
<div class="article_title">
<h3 class="{% if post.permalink %}link{% else %}full{% endif %}post_title"><a href="{{ post.link }}">{{ post.title }}</a>{% if post.permalink %} <span class="linkpost_arrow">→</span>{% endif %}</h3>
</div>
<div class="article_meta"><p>
Posted on {{ post.time.strftime('%d %B %Y').strip('0') }} at {{ post.time.strftime('%I:%M&thinsp;%p').strip('0').lower() }}
• {{ post.blog }}
{% if post.author and post.blog != post.author %}
• by {{ post.author }}
{% endif %}
{% if post.permalink %}
• <span class="permalink"><a href="{{ post.permalink }}">∞</a></span>
{% endif %}
</p></div>
{{ post.body|safe }}
{# Put a line between posts, but only if this isn't the last one #}
{% if loop.index != posts|count %}<hr class="between_posts"/>{% endif %}
</li>
{% endfor %}
</ul>
<footer>
<p>
Made by <a href="http://alexwlchan.net">Alex Chan</a>.
Based on a script by <a href="http://leancrew.com/all-this/2015/11/simpler-syndication/">Dr Drang</a>.
</p>
<p>
Last updated on {{ time.strftime('%d %B %Y').strip('0') }} at {{ time.strftime('%I:%M&thinsp;%p').strip('0').lower() }}.
</p>
</footer>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment