Created
July 13, 2013 14:05
-
-
Save melpomene/5990824 to your computer and use it in GitHub Desktop.
Python script to download all the Last Tuesday Society podcasts in one swoop
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
Python script to download all the Last Tuesday Society podcasts in one swoop | |
Find them here: http://www.thelasttuesdaysociety.org/podcasts.html | |
Run with 'python lasttuesdaysocietypodcast.py' | |
Depends on requests and Beautiful soup | |
""" | |
from requests import get | |
from BeautifulSoup import BeautifulSoup, SoupStrainer | |
BASE = "http://www.thelasttuesdaysociety.org/" | |
def download(): | |
r = get(BASE + "podcasts.html").content | |
soup = BeautifulSoup(r) | |
for link in soup.findAll('a'): | |
if "http" not in link['href'] and "pdf" not in link["href"] and "mp3" not in link["href"] and "html" in link["href"]: | |
try: | |
print "Downloading ", link['href'] | |
r = get(BASE + link['href']).content | |
page = BeautifulSoup(r) | |
for link in page.findAll('embed'): | |
if "images/podcasts/" in link["src"] and "mp3" in link["src"]: | |
print "Downloading mp3..." | |
f = open(link["src"][16:], 'wb') | |
r = get( BASE + link["src"]) | |
for chunk in r.iter_content(): | |
f.write(chunk) | |
print "Done." | |
except: | |
print "Error" | |
if __name__ == '__main__': | |
download() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment