-
-
Save hezamu/5604249 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Expects the AWS_ enviornment variables to be set for boto to know how to connect to AWS/S3 - they are: | |
# AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY | |
# | |
# Change the bucket name from mbafford-static for your own uses. | |
# | |
# Obvious enhancements would be to add the necessary tags for displaying artwork for the show in podcast software. | |
import re | |
import urllib2 | |
from xml.dom.minidom import parseString | |
import boto | |
# Circumvent a potential bot blocker, see http://stackoverflow.com/questions/3336549/pythons-urllib2-why-do-i-get-error-403-when-i-urlopen-a-wikipedia-page | |
url = 'http://www.shamusyoung.com/twentysidedtale/?feed=rss2&cat=287' | |
req = urllib2.Request(url, headers={'User-Agent' : "Diecast feed generator"}) | |
conn = urllib2.urlopen(req) | |
rss = conn.read() | |
rssxml = parseString(rss) | |
# For iTunes | |
feedNodes = rssxml.getElementsByTagName("rss") | |
if feedNodes: feedNodes[0].setAttribute("xmlns:itunes", "http://www.itunes.com/dtds/podca\ | |
st-1.0.dtd") | |
def findSourceURL(item, type): | |
itemxml = item.toxml() | |
m = re.search("<source[^>]+src=[\"']([^\"']*\\.mp3)[\"']", itemxml) | |
if m: | |
return m.group(1) | |
for item in rssxml.getElementsByTagName("item"): | |
mp3url = findSourceURL(item, 'mp3') | |
if mp3url: | |
if not mp3url.startswith("http"): | |
if mp3url.startswith("/"): | |
mp3url = "http://www.shamusyoung.com" + mp3url | |
else: | |
mp3url = "http://www.shamusyoung.com/twentysidedtale/" + mp3url | |
enclosure = rssxml.createElement("enclosure") | |
enclosure.setAttribute("url", mp3url) | |
enclosure.setAttribute("type", "audio/mpeg") | |
enclosure.setAttribute("length", "75000000") # for iTunes | |
item.appendChild(enclosure) | |
image = rssxml.createElement("itunes:image") | |
image.setAttribute("href", "http://www.shamusyoung.com/twentysidedtale/images/splash_diecast.jpg") | |
rssxml.getElementsByTagName("channel")[0].appendChild(image) | |
conn = boto.connect_s3() | |
bucket = conn.get_bucket('mbafford-static') | |
s3key = boto.s3.key.Key(bucket) | |
s3key.key = 'diecast.xml' | |
s3key.set_contents_from_string( rssxml.toprettyxml(encoding='utf-8'), headers={'Content-Type' : 'application/rss+xml'}, policy='public-read' ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you!
This fork has been merged back into the original gist: https://gist.github.com/mbafford/5333101