Last active
February 4, 2020 14:17
-
-
Save mbafford/5333101 to your computer and use it in GitHub Desktop.
Quick and simple program to convert the Diecast podcast (http://www.shamusyoung.com/twentysidedtale/?cat=287) RSS feed into something my podcast reader can handle. It simply pulls the mp3 URL from the description and adds an enclosure element pointing at that URL.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diecast.xml | |
run.sh | |
original-rss.xml | |
.env/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Expects the AWS_ enviornment variables to be set for boto to know how to connect to AWS/S3 - they are: | |
# AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY | |
# | |
# Change the bucket name from mbafford-static for your own uses. | |
# | |
# Obvious enhancements would be to add the necessary tags for displaying artwork for the show in podcast software. | |
# This is just a hacky way to take the existing blog RSS feed and make it readable by podcast software | |
# validated and found mostly not broken with: | |
# http://www.feedvalidator.org/check.cgi?url=https%3A%2F%2Fmbafford-static.s3.amazonaws.com%2Fdiecast.xml | |
# https://podba.se/validate/?url=https://mbafford-static.s3.amazonaws.com/diecast.xml | |
# http://castfeedvalidator.com/?url=https://mbafford-static.s3.amazonaws.com/diecast.xml | |
import re | |
from urllib.request import urlopen, Request | |
from xml.dom.minidom import parseString | |
import boto | |
def findSourceURL(item, type): | |
itemxml = item.toxml() | |
m = re.search("<source[^>]+src=[\"']([^\"']*\\.mp3)[\"']", itemxml) | |
if m: return m.group(1) | |
return None | |
def fetch_rss_feed_xml(): | |
# Circumvent a potential bot blocker, see http://stackoverflow.com/questions/3336549/pythons-urllib2-why-do-i-get-error-403-when-i-urlopen-a-wikipedia-page | |
url = 'http://www.shamusyoung.com/twentysidedtale/?feed=rss2&cat=287' | |
req = Request(url, headers={'User-Agent' : "Diecast feed generator ([email protected])"}) | |
conn = urlopen(req) | |
rss = conn.read() | |
# a mix of UTF-8 and windows-1252 makes for an XML parsing error | |
# in the case of this script, fixing the errors isn't that important | |
# so just get rid of them | |
rss = rss.decode('utf-8', errors='replace').replace(u"\uFFFD", "") | |
rssxml = parseString(rss) | |
return rssxml | |
def podcastify_xml(rssxml): | |
# For iTunes | |
feedNodes = rssxml.getElementsByTagName("rss") | |
if feedNodes: feedNodes[0].setAttribute("xmlns:itunes", "http://www.itunes.com/dtds/podcast-1.0.dtd") | |
for link in rssxml.getElementsByTagName("atom:link"): | |
link.parentNode.removeChild( link ) | |
for item in rssxml.getElementsByTagName("item"): | |
mp3url = findSourceURL(item, 'mp3') | |
if mp3url: | |
if not mp3url.startswith("http"): | |
if mp3url.startswith("/"): | |
mp3url = "http://www.shamusyoung.com" + mp3url | |
else: | |
mp3url = "http://www.shamusyoung.com/twentysidedtale/" + mp3url | |
# remove existing enclosures, if any | |
enclosures = item.getElementsByTagName("enclosure") | |
for enclosure in enclosures: | |
enclosure.parentNode.removeChild( enclosure ) | |
enclosure = rssxml.createElement("enclosure") | |
enclosure.setAttribute("url", mp3url) | |
enclosure.setAttribute("type", "audio/mpeg") | |
enclosure.setAttribute("length", "75000000") # for iTunes | |
item.appendChild(enclosure) | |
channel = rssxml.getElementsByTagName("channel")[0] | |
image = rssxml.createElement("itunes:image") | |
image.setAttribute("href", "http://shamusyoung.com/twentysidedtale/images/diecast2018.jpg") | |
channel.insertBefore(image, channel.firstChild) | |
author = rssxml.createElement("itunes:author") | |
author.appendChild( rssxml.createTextNode("Twenty Sided") ) | |
channel.insertBefore(author, channel.firstChild) | |
category = rssxml.createElement("itunes:category") | |
category.setAttribute("text", "Games & Hobbies") | |
channel.insertBefore(category, channel.firstChild) | |
explicit = rssxml.createElement("itunes:explicit") | |
explicit.appendChild( rssxml.createTextNode("no") ) | |
channel.insertBefore(explicit, channel.firstChild) | |
link = rssxml.createElement("atom:link") | |
link.setAttribute("href", "https://mbafford-static.s3.amazonaws.com/diecast.xml") | |
link.setAttribute("rel", "self") | |
link.setAttribute("type", "application/rss+xml") | |
channel.insertBefore( link, channel.firstChild ) | |
def upload_xml( rssxml ): | |
s3 = boto.connect_s3( is_secure=False ) | |
bucket = s3.get_bucket('mbafford-static') | |
s3key = boto.s3.key.Key(bucket) | |
s3key.key = 'diecast.xml' | |
s3key.set_contents_from_string( rssxml.toprettyxml(encoding='utf-8'), headers={'Content-Type' : 'application/rss+xml'}, policy='public-read' ) | |
def write_xml_to_file( rssxml, filename ): | |
with open(filename, 'w') as f: | |
f.write( rssxml.toprettyxml(encoding='utf-8').decode('utf-8') ) | |
rssxml = fetch_rss_feed_xml() | |
write_xml_to_file( rssxml, 'original-rss.xml' ) | |
podcastify_xml( rssxml ) | |
write_xml_to_file( rssxml, 'diecast.xml' ) | |
upload_xml( rssxml ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
boto |
Merged and re-run. Updated feed at http://mbafford-static.s3.amazonaws.com/diecast.xml works with iTunes 11.0.2 now. Thank you for the patch!
It's a shame that iTunes requires a length parameter that it's going to then ignore. I remember having to do this a long time ago with another similar project, but I'd forgotten all about that requirement.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for this Matt, great stuff!
I updated it to work with iTunes. I didn't test the S3 part, but the generated XML works with iTunes. You can grab the changes from my fork.
https://gist.github.com/hezamu/5604249