Created
June 15, 2012 13:59
-
-
Save teh/2936606 to your computer and use it in GitHub Desktop.
Script to download Prairie Home Companion show
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Script to download a show from http://prairiehome.publicradio.org/ | |
Depends on rtmpdump (apt-get install rtmpdump) | |
Usage: | |
# List shows in January | |
python prar.py 2012 1 | |
# Download show with index 0 | |
python prar.py 2012 1 0 | |
Please be nice and support their show via donations :) | |
""" | |
import time | |
import sys | |
from lxml import etree | |
import subprocess | |
BASE_URL = 'http://prairiehome.publicradio.org' | |
URL = 'http://prairiehome.publicradio.org/programs/{year:4d}/{month:02d}' | |
COMMAND = 'rtmpdump -v --playpath mp3:ondemand/{phc} -r rtmp://archivemedia.publicradio.org/music' | |
ENCODE_COMMAND = 'gst-launch-0.10 filesrc location={name} ! decodebin2 location={name} ! audioconvert ! vorbisenc ! oggmux ! filesink location={name}.ogg' | |
def shows(year, month): | |
xml = etree.parse(URL.format(year=year, month=month), etree.HTMLParser()) | |
for i, a in enumerate(xml.xpath('//h2/a[contains(@href, "/programs/")]')): | |
yield i, a.get('href'), a.text | |
def get_phc(link): | |
xml = etree.parse(BASE_URL + link, etree.HTMLParser()) | |
# href="http://prairiehome.publicradio.org/www_publicradio/tools/media_player/popup.php?name=phc/2012/03/10/phc_20120310_128" | |
phc_path = xml.xpath('//a[contains(@href, "popup.php?name=phc")]')[0] | |
return phc_path.get('href').split('=')[1] | |
def main(): | |
if len(sys.argv) == 3: | |
for index, _, title in shows(int(sys.argv[1]), int(sys.argv[2])): | |
print "Show {} - {}".format(index, title) | |
if len(sys.argv) == 4: | |
for index, link, title in shows(int(sys.argv[1]), int(sys.argv[2])): | |
if index != int(sys.argv[3]): | |
continue | |
phc = get_phc(link) | |
# phc is e.g. 'phc/2012/04/07/phc_20120407_128' | |
name = 'phc_{:4d}_{:02d}_{:d}'.format(int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3])) | |
with open(name, 'w') as out: | |
subprocess.call(COMMAND.format(phc=phc).split(), stdout=out) | |
subprocess.call(ENCODE_COMMAND.format(name=name).split()) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Great script! Many thanks!
It seems though that as of some recent time (end of July 2014), lines 30-31 should be changed, to something like this:
in order for the script to work.