mbafford · February 4, 2020 14:17 · hezamu · May 18, 2013 · mbafford · May 24, 2013
diff --git a/.gitignore b/.gitignore
 diecast.xml
 run.sh
 original-rss.xml
 .env/
diff --git a/diecast_to_podcast.py b/diecast_to_podcast.py
 #!/usr/bin/env python3

 # Expects the AWS_ enviornment variables to be set for boto to know how to connect to AWS/S3 - they are:
 # AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY 
 #
 # Change the bucket name from mbafford-static for your own uses.
 #
 # Obvious enhancements would be to add the necessary tags for displaying artwork for the show in podcast software.

 # This is just a hacky way to take the existing blog RSS feed and make it readable by podcast software
 # validated and found mostly not broken with:
 # http://www.feedvalidator.org/check.cgi?url=https%3A%2F%2Fmbafford-static.s3.amazonaws.com%2Fdiecast.xml
 # https://podba.se/validate/?url=https://mbafford-static.s3.amazonaws.com/diecast.xml
 # http://castfeedvalidator.com/?url=https://mbafford-static.s3.amazonaws.com/diecast.xml

 import re
 from urllib.request import urlopen, Request
 from xml.dom.minidom import parseString
 import boto

 def findSourceURL(item, type):
    itemxml = item.toxml()

    m = re.search("<source[^>]+src=[\"']([^\"']*\\.mp3)[\"']", itemxml)

    if m: return m.group(1)

    return None


 def fetch_rss_feed_xml():
    # Circumvent a potential bot blocker, see http://stackoverflow.com/questions/3336549/pythons-urllib2-why-do-i-get-error-403-when-i-urlopen-a-wikipedia-page
    url = 'http://www.shamusyoung.com/twentysidedtale/?feed=rss2&cat=287'
    req = Request(url, headers={'User-Agent' : "Diecast feed generator ([email protected])"})
    conn = urlopen(req)
    rss = conn.read()

    # a mix of UTF-8 and windows-1252 makes for an XML parsing error
    # in the case of this script, fixing the errors isn't that important
    # so just get rid of them
    rss = rss.decode('utf-8', errors='replace').replace(u"\uFFFD", "")
    
    rssxml = parseString(rss)

    return rssxml

 def podcastify_xml(rssxml):
    # For iTunes
    feedNodes = rssxml.getElementsByTagName("rss")
    if feedNodes: feedNodes[0].setAttribute("xmlns:itunes", "http://www.itunes.com/dtds/podcast-1.0.dtd")

    for link in rssxml.getElementsByTagName("atom:link"):
        link.parentNode.removeChild( link )

    for item in rssxml.getElementsByTagName("item"):
        mp3url = findSourceURL(item, 'mp3')

        if mp3url:
            if not mp3url.startswith("http"):
                if mp3url.startswith("/"):
                    mp3url = "http://www.shamusyoung.com" + mp3url
                else:
                    mp3url = "http://www.shamusyoung.com/twentysidedtale/" + mp3url

            # remove existing enclosures, if any
            enclosures = item.getElementsByTagName("enclosure")
            for enclosure in enclosures:
                enclosure.parentNode.removeChild( enclosure )

            enclosure = rssxml.createElement("enclosure")	
            enclosure.setAttribute("url", mp3url)
            enclosure.setAttribute("type", "audio/mpeg")	
            enclosure.setAttribute("length", "75000000") # for iTunes
            item.appendChild(enclosure)	

    channel = rssxml.getElementsByTagName("channel")[0]

    image = rssxml.createElement("itunes:image")
    image.setAttribute("href", "http://shamusyoung.com/twentysidedtale/images/diecast2018.jpg")
    channel.insertBefore(image, channel.firstChild)

    author = rssxml.createElement("itunes:author")
    author.appendChild( rssxml.createTextNode("Twenty Sided") )
    channel.insertBefore(author, channel.firstChild)

    category = rssxml.createElement("itunes:category")
    category.setAttribute("text", "Games & Hobbies")
    channel.insertBefore(category, channel.firstChild)

    explicit = rssxml.createElement("itunes:explicit")
    explicit.appendChild( rssxml.createTextNode("no") )
    channel.insertBefore(explicit, channel.firstChild)

    link = rssxml.createElement("atom:link")
    link.setAttribute("href", "https://mbafford-static.s3.amazonaws.com/diecast.xml")
    link.setAttribute("rel", "self")
    link.setAttribute("type", "application/rss+xml")
    channel.insertBefore( link, channel.firstChild )


 def upload_xml( rssxml ):
    s3     = boto.connect_s3( is_secure=False )
    bucket = s3.get_bucket('mbafford-static')
    s3key  = boto.s3.key.Key(bucket)
    s3key.key = 'diecast.xml'
    s3key.set_contents_from_string( rssxml.toprettyxml(encoding='utf-8'), headers={'Content-Type' : 'application/rss+xml'}, policy='public-read' )

 def write_xml_to_file( rssxml, filename ):
    with open(filename, 'w') as f:
        f.write( rssxml.toprettyxml(encoding='utf-8').decode('utf-8') )

 rssxml = fetch_rss_feed_xml()
 write_xml_to_file( rssxml, 'original-rss.xml' )
 podcastify_xml( rssxml )
 write_xml_to_file( rssxml, 'diecast.xml' )
 upload_xml( rssxml )
diff --git a/requirements.txt b/requirements.txt
 boto
	#!/usr/bin/env python3

	# Expects the AWS_ enviornment variables to be set for boto to know how to connect to AWS/S3 - they are:
	# AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
	#
	# Change the bucket name from mbafford-static for your own uses.
	#
	# Obvious enhancements would be to add the necessary tags for displaying artwork for the show in podcast software.

	# This is just a hacky way to take the existing blog RSS feed and make it readable by podcast software
	# validated and found mostly not broken with:
	# http://www.feedvalidator.org/check.cgi?url=https%3A%2F%2Fmbafford-static.s3.amazonaws.com%2Fdiecast.xml
	# https://podba.se/validate/?url=https://mbafford-static.s3.amazonaws.com/diecast.xml
	# http://castfeedvalidator.com/?url=https://mbafford-static.s3.amazonaws.com/diecast.xml

	import re
	from urllib.request import urlopen, Request
	from xml.dom.minidom import parseString
	import boto

	def findSourceURL(item, type):
	itemxml = item.toxml()

	m = re.search("<source[^>]+src=[\"']([^\"']*\\.mp3)[\"']", itemxml)

	if m: return m.group(1)

	return None


	def fetch_rss_feed_xml():
	# Circumvent a potential bot blocker, see http://stackoverflow.com/questions/3336549/pythons-urllib2-why-do-i-get-error-403-when-i-urlopen-a-wikipedia-page
	url = 'http://www.shamusyoung.com/twentysidedtale/?feed=rss2&cat=287'
	req = Request(url, headers={'User-Agent' : "Diecast feed generator ([email protected])"})
	conn = urlopen(req)
	rss = conn.read()

	# a mix of UTF-8 and windows-1252 makes for an XML parsing error
	# in the case of this script, fixing the errors isn't that important
	# so just get rid of them
	rss = rss.decode('utf-8', errors='replace').replace(u"\uFFFD", "")

	rssxml = parseString(rss)

	return rssxml

	def podcastify_xml(rssxml):
	# For iTunes
	feedNodes = rssxml.getElementsByTagName("rss")
	if feedNodes: feedNodes[0].setAttribute("xmlns:itunes", "http://www.itunes.com/dtds/podcast-1.0.dtd")

	for link in rssxml.getElementsByTagName("atom:link"):
	link.parentNode.removeChild( link )

	for item in rssxml.getElementsByTagName("item"):
	mp3url = findSourceURL(item, 'mp3')

	if mp3url:
	if not mp3url.startswith("http"):
	if mp3url.startswith("/"):
	mp3url = "http://www.shamusyoung.com" + mp3url
	else:
	mp3url = "http://www.shamusyoung.com/twentysidedtale/" + mp3url

	# remove existing enclosures, if any
	enclosures = item.getElementsByTagName("enclosure")
	for enclosure in enclosures:
	enclosure.parentNode.removeChild( enclosure )

	enclosure = rssxml.createElement("enclosure")
	enclosure.setAttribute("url", mp3url)
	enclosure.setAttribute("type", "audio/mpeg")
	enclosure.setAttribute("length", "75000000") # for iTunes
	item.appendChild(enclosure)

	channel = rssxml.getElementsByTagName("channel")[0]

	image = rssxml.createElement("itunes:image")
	image.setAttribute("href", "http://shamusyoung.com/twentysidedtale/images/diecast2018.jpg")
	channel.insertBefore(image, channel.firstChild)

	author = rssxml.createElement("itunes:author")
	author.appendChild( rssxml.createTextNode("Twenty Sided") )
	channel.insertBefore(author, channel.firstChild)

	category = rssxml.createElement("itunes:category")
	category.setAttribute("text", "Games & Hobbies")
	channel.insertBefore(category, channel.firstChild)

	explicit = rssxml.createElement("itunes:explicit")
	explicit.appendChild( rssxml.createTextNode("no") )
	channel.insertBefore(explicit, channel.firstChild)

	link = rssxml.createElement("atom:link")
	link.setAttribute("href", "https://mbafford-static.s3.amazonaws.com/diecast.xml")
	link.setAttribute("rel", "self")
	link.setAttribute("type", "application/rss+xml")
	channel.insertBefore( link, channel.firstChild )


	def upload_xml( rssxml ):
	s3 = boto.connect_s3( is_secure=False )
	bucket = s3.get_bucket('mbafford-static')
	s3key = boto.s3.key.Key(bucket)
	s3key.key = 'diecast.xml'
	s3key.set_contents_from_string( rssxml.toprettyxml(encoding='utf-8'), headers={'Content-Type' : 'application/rss+xml'}, policy='public-read' )

	def write_xml_to_file( rssxml, filename ):
	with open(filename, 'w') as f:
	f.write( rssxml.toprettyxml(encoding='utf-8').decode('utf-8') )

	rssxml = fetch_rss_feed_xml()
	write_xml_to_file( rssxml, 'original-rss.xml' )
	podcastify_xml( rssxml )
	write_xml_to_file( rssxml, 'diecast.xml' )
	upload_xml( rssxml )
No results found