Created
October 24, 2012 16:35
-
-
Save zthomae/3947186 to your computer and use it in GitHub Desktop.
A script for turning an ugly class webpage into a podcast rss feed
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pyquery import PyQuery as pq | |
| import urllib2 | |
| from datetime import datetime | |
| import PyRSS2Gen as rss | |
| d = pq(url='http://www.ssc.wisc.edu/~wright/Sociology-125-podcasts-2012.htm') | |
| URL_ROOT = 'http://www.ssc.wisc.edu/~wright/' | |
| items = [] | |
| for tr in reversed(d('table tr')): | |
| try: | |
| slides_link = URL_ROOT + tr[4].find('a').get('href') | |
| except (AttributeError, IndexError): | |
| slides_link = '' | |
| try: | |
| date_elem = tr[1].iter('font').next() | |
| date = date_elem.text.split('/') | |
| except AttributeError: | |
| date = date_elem.find('b').text.split('/') | |
| for n in reversed(tr[3]): | |
| try: | |
| url = n.get('href') | |
| except: | |
| continue | |
| f = urllib2.urlopen(URL_ROOT + url) | |
| items.append(rss.RSSItem( | |
| title = ' '.join(tr[2].iter('font').next().text.strip().split()), | |
| link = slides_link, | |
| description = n.text, | |
| guid = rss.Guid(URL_ROOT + url), | |
| enclosure = rss.Enclosure( | |
| url = URL_ROOT + url, | |
| length = f.headers['Content-Length'], | |
| type = 'audio/m4a-latm' | |
| ), | |
| pubDate = datetime(2012, int(date[0]), int(date[1])) | |
| )) | |
| feed = rss.RSS2( | |
| title = "Sociology 125 Podcast - Fall 2012", | |
| link = 'http://www.ssc.wisc.edu/~wright/Sociology-125-podcasts-2012.htm', | |
| description = "Lectures and slides from Sociology 125, taught by Erik Olin Wright", | |
| lastBuildDate = datetime.now(), | |
| items = items | |
| ) | |
| feed.write_xml(open('soc125-feed.xml', 'w')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment