Skip to content

Instantly share code, notes, and snippets.

@zthomae
Created October 24, 2012 16:35
Show Gist options
  • Select an option

  • Save zthomae/3947186 to your computer and use it in GitHub Desktop.

Select an option

Save zthomae/3947186 to your computer and use it in GitHub Desktop.
A script for turning an ugly class webpage into a podcast rss feed
from pyquery import PyQuery as pq
import urllib2
from datetime import datetime
import PyRSS2Gen as rss
d = pq(url='http://www.ssc.wisc.edu/~wright/Sociology-125-podcasts-2012.htm')
URL_ROOT = 'http://www.ssc.wisc.edu/~wright/'
items = []
for tr in reversed(d('table tr')):
try:
slides_link = URL_ROOT + tr[4].find('a').get('href')
except (AttributeError, IndexError):
slides_link = ''
try:
date_elem = tr[1].iter('font').next()
date = date_elem.text.split('/')
except AttributeError:
date = date_elem.find('b').text.split('/')
for n in reversed(tr[3]):
try:
url = n.get('href')
except:
continue
f = urllib2.urlopen(URL_ROOT + url)
items.append(rss.RSSItem(
title = ' '.join(tr[2].iter('font').next().text.strip().split()),
link = slides_link,
description = n.text,
guid = rss.Guid(URL_ROOT + url),
enclosure = rss.Enclosure(
url = URL_ROOT + url,
length = f.headers['Content-Length'],
type = 'audio/m4a-latm'
),
pubDate = datetime(2012, int(date[0]), int(date[1]))
))
feed = rss.RSS2(
title = "Sociology 125 Podcast - Fall 2012",
link = 'http://www.ssc.wisc.edu/~wright/Sociology-125-podcasts-2012.htm',
description = "Lectures and slides from Sociology 125, taught by Erik Olin Wright",
lastBuildDate = datetime.now(),
items = items
)
feed.write_xml(open('soc125-feed.xml', 'w'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment