Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save shaneog/c0bd29c62ae5ef9a33b6284518a5cdf9 to your computer and use it in GitHub Desktop.
Save shaneog/c0bd29c62ae5ef9a33b6284518a5cdf9 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# encoding: utf-8
# Based off the script from https://gist.github.com/Paaskehare/3949299
# @author: Ash Ramesh (27/10/2016)
import cookielib
import urllib
import urllib2
import re
import os.path
# Auth details
email = 'ADD EMAIL HERE'
password = 'ADD PASSWORD HERE'
# Setup urllib
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
urllib2.install_opener(opener)
# Base url for DAS
BASE_URL = 'https://www.destroyallsoftware.com/'
def login():
# Get the sign in page
url = BASE_URL + 'screencasts/users/sign_in'
page = urllib2.urlopen(url).read()
# Get the auth token
token = re.search('<input type="hidden" name="authenticity_token" value="([\w/\+=]+?)" />', page).group(1)
# Build the form data
values = {
'utf8': '✓',
'authenticity_token': token,
'user[email]': email,
'user[password]': password,
'commit': 'Sign in'
}
# Call the form with sign in
data = urllib.urlencode(values)
req = urllib2.Request(url, data)
return urllib2.urlopen(req).read()
def get_catalog_urls():
"""Get a list of relative urls to each video in the catalog.
Urls look like '/screencasts/catalog/clarity-via-isolated-tests'
"""
url = BASE_URL + 'screencasts/catalog'
page = urllib2.urlopen(url).read()
screencasts = re.findall('<div class="episode">\s+<a href="(.*?)">', page)[::-1]
return screencasts
def download_all_screencasts(relative_url_list):
for screencast_url in relative_url_list:
# Build filename - e.g. clarity-via-isolated-tests
filename = screencast_url.split('/')[-1]
full_filename = filename + '.mov'
if os.path.exists(full_filename):
print('Already downloaded: ' + filename + ' skipping ...')
continue
else:
print('Attempting to retrieve and download ' + filename)
# Go to the screencast page and get the source url for the video
url = BASE_URL + screencast_url[1:] # Remove / from the relative url
page = urllib2.urlopen(url).read()
download_urls = re.findall('source.src = "(.*?)"', page)
# Download the video & save to file
print('Downloading "' + filename + '" ...')
req = urllib2.Request(download_urls[0])
response = urllib2.urlopen(req)
while 1:
data = response.read(512)
if not len(data):
break
else:
with open(filename + '.mov', 'ab') as f:
f.write(data)
def main():
page = login()
catalog_urls = get_catalog_urls()
download_all_screencasts(catalog_urls)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment