-
-
Save travisbhartwell/2278448 to your computer and use it in GitHub Desktop.
Python Script to download October 2011 General Conference Archives
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
This is a handy script to download the media from General Conference for | |
your own use. Execute this script with the appropriate options to download the | |
files you wish. | |
The only non-Python standard library dependency is BeautifulSoup. | |
Example invocation: | |
./ldsconf.py --individual 2012 4 audio-mp3 | |
This will download mp3 files of the individual talks for the regular | |
four sessions for April 2012 General Conference. | |
""" | |
import sys | |
import urllib | |
import urlparse | |
import argparse | |
from BeautifulSoup import BeautifulSoup | |
download_classes = ( | |
'video-360p', | |
'video-720p', | |
'video-1080p', | |
'video-wmv', | |
'audio-mp3', | |
'audio-m4b', | |
) | |
def main(download_individual=True, | |
download_music=False, | |
download_sessions=False, | |
download_priesthood=False, | |
download_young_women_meeting=False, | |
download_relief_society_meeting=False, | |
download_class=None, | |
dryrun=True, | |
conference_url='http://lds.org/general-conference/' | |
'sessions/2011/10?lang=eng'): | |
if (not download_individual) and download_music: | |
print "To download music, must download individual" | |
sys.exit(1) | |
page = urllib.urlopen(conference_url) | |
document = BeautifulSoup(page) | |
download_tags = document.findAll("a", attrs={"class": download_class}) | |
for tag in download_tags: | |
href = tag.attrMap["href"] | |
# Don't donwload Young Women Meeting files if not wanted | |
if not download_young_women_meeting and \ | |
tag.findParents(attrs={"class": "sessions", "id": "young-women"}): | |
continue | |
# Don't donwload Young Women Meeting files if not wanted | |
if not download_relief_society_meeting and \ | |
tag.findParents(attrs={"class": "sessions", | |
"id": "relief-society"}): | |
continue | |
# Don't donwload Priesthood Session files if not wanted | |
if not download_priesthood and \ | |
tag.findParents(attrs={"class": "sessions", "id": "priesthood"}): | |
continue | |
# Don't download full session files if not wanted | |
if not download_sessions and \ | |
tag.findParents(attrs={"class": "head-row"}): | |
continue | |
# Don't download individual files if not wanted | |
if not download_individual and \ | |
not tag.findParents(attrs={"class": "head-row"}): | |
continue | |
# Don't download musical number files if not wanted | |
if not download_music and \ | |
tag.findParents(attrs={"class": "music"}): | |
continue | |
# Get an appropriate filename | |
filename = urlparse.urlsplit(href).path.split("/")[-1] | |
print "Downloading %s as %s" % (href, filename) | |
if not dryrun: | |
urllib.urlretrieve(href, filename) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="LDS Conference Downloader") | |
parser.add_argument('year', type=int, | |
help='Year of general conference sesssions to download') | |
parser.add_argument('month', type=int, | |
help='Month of generation conference sessions to download (4 or 10)') | |
parser.add_argument('--dry-run', action='store_true', | |
help='Do not perform downloads') | |
parser.add_argument('--music', action='store_true', | |
help="Download musical numbers") | |
parser.add_argument('--priesthood', action='store_true', | |
help="Download Priesthood Session.") | |
parser.add_argument('--young-womens', action='store_true', | |
help="Download Young Women's Session.") | |
parser.add_argument('--relief-society', action='store_true', | |
help="Download Relief Society Session.") | |
group = parser.add_mutually_exclusive_group(required=True) | |
group.add_argument('--individual', action='store_true') | |
group.add_argument('--entire-sessions', action='store_true', | |
help="Only get files for entire session, not individual files.") | |
parser.add_argument('class_', type=str, nargs='?', | |
choices=download_classes, default='audio-mp3', | |
help="Class of file to download (default: %(default)s).") | |
args = parser.parse_args() | |
if args.month not in [4, 10]: | |
print "conference is only in April or October" | |
sys.exit(1) | |
conference_url = 'http://lds.org/general-conference/' \ | |
'sessions/%d/%02d?lang=eng' % (args.year, args.month) | |
if not args.entire_sessions and args.class_ == 'audio-m4b': | |
print "m4b is only available for full sessions,"\ | |
"must have --entire-sessions selected" | |
sys.exit(1) | |
main(download_individual=args.individual, | |
download_music=args.music, | |
download_sessions=args.entire_sessions, | |
download_priesthood=args.priesthood, | |
download_young_women_meeting=args.young_womens, | |
download_relief_society_meeting=args.relief_society, | |
download_class=args.class_, | |
dryrun=args.dry_run, | |
conference_url=conference_url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment