Skip to content

Instantly share code, notes, and snippets.

@teh
Created June 15, 2012 13:59
Show Gist options
  • Save teh/2936606 to your computer and use it in GitHub Desktop.
Save teh/2936606 to your computer and use it in GitHub Desktop.
Script to download Prairie Home Companion show
"""
Script to download a show from http://prairiehome.publicradio.org/
Depends on rtmpdump (apt-get install rtmpdump)
Usage:
# List shows in January
python prar.py 2012 1
# Download show with index 0
python prar.py 2012 1 0
Please be nice and support their show via donations :)
"""
import time
import sys
from lxml import etree
import subprocess
BASE_URL = 'http://prairiehome.publicradio.org'
URL = 'http://prairiehome.publicradio.org/programs/{year:4d}/{month:02d}'
COMMAND = 'rtmpdump -v --playpath mp3:ondemand/{phc} -r rtmp://archivemedia.publicradio.org/music'
ENCODE_COMMAND = 'gst-launch-0.10 filesrc location={name} ! decodebin2 location={name} ! audioconvert ! vorbisenc ! oggmux ! filesink location={name}.ogg'
def shows(year, month):
xml = etree.parse(URL.format(year=year, month=month), etree.HTMLParser())
for i, a in enumerate(xml.xpath('//h2/a[contains(@href, "/programs/")]')):
yield i, a.get('href'), a.text
def get_phc(link):
xml = etree.parse(BASE_URL + link, etree.HTMLParser())
# href="http://prairiehome.publicradio.org/www_publicradio/tools/media_player/popup.php?name=phc/2012/03/10/phc_20120310_128"
phc_path = xml.xpath('//a[contains(@href, "popup.php?name=phc")]')[0]
return phc_path.get('href').split('=')[1]
def main():
if len(sys.argv) == 3:
for index, _, title in shows(int(sys.argv[1]), int(sys.argv[2])):
print "Show {} - {}".format(index, title)
if len(sys.argv) == 4:
for index, link, title in shows(int(sys.argv[1]), int(sys.argv[2])):
if index != int(sys.argv[3]):
continue
phc = get_phc(link)
# phc is e.g. 'phc/2012/04/07/phc_20120407_128'
name = 'phc_{:4d}_{:02d}_{:d}'.format(int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3]))
with open(name, 'w') as out:
subprocess.call(COMMAND.format(phc=phc).split(), stdout=out)
subprocess.call(ENCODE_COMMAND.format(name=name).split())
if __name__ == '__main__':
main()
@dobriai
Copy link

dobriai commented Jul 24, 2014

Great script! Many thanks!

It seems though that as of some recent time (end of July 2014), lines 30-31 should be changed, to something like this:

     # href="http://prairiehome.publicradio.org/listen/full/?name=phc/2014/07/05/phc_20140705_128"
     phc_path = xml.xpath('//a[contains(@href, "listen/full/?name=phc")]')[0]

in order for the script to work.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment