Created
September 20, 2012 17:42
-
-
Save sampsyo/3757293 to your computer and use it in GitHub Desktop.
send This American Life episodes to Huffduffer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Simple scraper for adding This American Life episodes to Huffduffer. | |
TAL has the most amazing app on the App Store, but some stuff broke on | |
iOS 6 and it doesn't seem to get bug fixes very often. This is my ad-hoc | |
solution. | |
""" | |
import urllib.request | |
import urllib.parse | |
import sys | |
import bs4 | |
import cgi | |
import cgitb | |
PAGE_FMT = 'http://www.thisamericanlife.org/radio-archives/episode/{}' | |
AUDIO_FMT = 'http://audio.thisamericanlife.org/jomamashouse/ismymamashouse' \ | |
'/{}.mp3' | |
HUFFDUFF_URL = 'http://huffduffer.com/add' | |
def scrape(epnum): | |
url = PAGE_FMT.format(epnum) | |
soup = bs4.BeautifulSoup(urllib.request.urlopen(url).read()) | |
title = soup.find(class_='node-title').get_text().strip() | |
date = soup.find(class_='date').get_text().strip() | |
description = soup.find(class_='description').get_text().strip() | |
audio_url = AUDIO_FMT.format(epnum) | |
return { | |
'title': title, | |
'date': date, | |
'description': description, | |
'url': url, | |
'audio_url': audio_url, | |
} | |
def huffduff(info): | |
args = { | |
'bookmark[url]': info['audio_url'], | |
'bookmark[title]': 'TAL {}'.format(info['title']), | |
'bookmark[description]': info['description'], | |
'bookmark[tags]': 'This American Life', | |
} | |
url = '{}?{}'.format(HUFFDUFF_URL, urllib.parse.urlencode(args)) | |
return url | |
def cgi_redirect(): | |
cgitb.enable() | |
form = cgi.FieldStorage() | |
if 'ep' not in form: | |
print('Content-Type: text/plain') | |
print() | |
print('Specify an episode number.') | |
return | |
epnum = int(form['ep'].value) | |
url = huffduff(scrape(epnum)) | |
print('Content-Type: text/html') | |
print('Status: 302 Found') | |
print('Location: ' + url) | |
print() | |
print('<a href="{}">huffduff</a>'.format(url)) | |
if __name__ == '__main__': | |
# For the command line. | |
# print(huffduff(scrape(int(sys.argv[1])))) | |
# As a CGI script. | |
cgi_redirect() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment