Skip to content

Instantly share code, notes, and snippets.

@hezamu
Forked from mbafford/.gitignore
Last active December 17, 2015 11:39
Show Gist options
  • Save hezamu/5604249 to your computer and use it in GitHub Desktop.
Save hezamu/5604249 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# Expects the AWS_ enviornment variables to be set for boto to know how to connect to AWS/S3 - they are:
# AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
#
# Change the bucket name from mbafford-static for your own uses.
#
# Obvious enhancements would be to add the necessary tags for displaying artwork for the show in podcast software.
import re
import urllib2
from xml.dom.minidom import parseString
import boto
# Circumvent a potential bot blocker, see http://stackoverflow.com/questions/3336549/pythons-urllib2-why-do-i-get-error-403-when-i-urlopen-a-wikipedia-page
url = 'http://www.shamusyoung.com/twentysidedtale/?feed=rss2&cat=287'
req = urllib2.Request(url, headers={'User-Agent' : "Diecast feed generator"})
conn = urllib2.urlopen(req)
rss = conn.read()
rssxml = parseString(rss)
# For iTunes
feedNodes = rssxml.getElementsByTagName("rss")
if feedNodes: feedNodes[0].setAttribute("xmlns:itunes", "http://www.itunes.com/dtds/podca\
st-1.0.dtd")
def findSourceURL(item, type):
itemxml = item.toxml()
m = re.search("<source[^>]+src=[\"']([^\"']*\\.mp3)[\"']", itemxml)
if m:
return m.group(1)
for item in rssxml.getElementsByTagName("item"):
mp3url = findSourceURL(item, 'mp3')
if mp3url:
if not mp3url.startswith("http"):
if mp3url.startswith("/"):
mp3url = "http://www.shamusyoung.com" + mp3url
else:
mp3url = "http://www.shamusyoung.com/twentysidedtale/" + mp3url
enclosure = rssxml.createElement("enclosure")
enclosure.setAttribute("url", mp3url)
enclosure.setAttribute("type", "audio/mpeg")
enclosure.setAttribute("length", "75000000") # for iTunes
item.appendChild(enclosure)
image = rssxml.createElement("itunes:image")
image.setAttribute("href", "http://www.shamusyoung.com/twentysidedtale/images/splash_diecast.jpg")
rssxml.getElementsByTagName("channel")[0].appendChild(image)
conn = boto.connect_s3()
bucket = conn.get_bucket('mbafford-static')
s3key = boto.s3.key.Key(bucket)
s3key.key = 'diecast.xml'
s3key.set_contents_from_string( rssxml.toprettyxml(encoding='utf-8'), headers={'Content-Type' : 'application/rss+xml'}, policy='public-read' )
@mbafford
Copy link

Thank you!

This fork has been merged back into the original gist: https://gist.github.com/mbafford/5333101

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment