Last active
March 2, 2017 21:09
-
-
Save AshwinRamesh/e10af8a210f7fbe4b9463b7abbb180ea to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# encoding: utf-8 | |
# Based off the script from https://gist.github.com/Paaskehare/3949299 | |
# @author: Ash Ramesh (27/10/2016) | |
import cookielib | |
import urllib | |
import urllib2 | |
import re | |
import os.path | |
# Auth details | |
email = 'ADD EMAIL HERE' | |
password = 'ADD PASSWORD HERE' | |
# Setup urllib | |
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar())) | |
urllib2.install_opener(opener) | |
# Base url for DAS | |
BASE_URL = 'https://www.destroyallsoftware.com/' | |
def login(): | |
# Get the sign in page | |
url = BASE_URL + 'screencasts/users/sign_in' | |
page = urllib2.urlopen(url).read() | |
# Get the auth token | |
token = re.search('<input type="hidden" name="authenticity_token" value="([\w/\+=]+?)" />', page).group(1) | |
# Build the form data | |
values = { | |
'utf8': '✓', | |
'authenticity_token': token, | |
'user[email]': email, | |
'user[password]': password, | |
'commit': 'Sign in' | |
} | |
# Call the form with sign in | |
data = urllib.urlencode(values) | |
req = urllib2.Request(url, data) | |
return urllib2.urlopen(req).read() | |
def get_catalog_urls(): | |
"""Get a list of relative urls to each video in the catalog. | |
Urls look like '/screencasts/catalog/clarity-via-isolated-tests' | |
""" | |
url = BASE_URL + 'screencasts/catalog' | |
page = urllib2.urlopen(url).read() | |
screencasts = re.findall('<div class="episode">\s+<a href="(.*?)">', page)[::-1] | |
return screencasts | |
def download_all_screencasts(relative_url_list): | |
for screencast_url in relative_url_list: | |
# Build filename - e.g. clarity-via-isolated-tests | |
filename = screencast_url.split('/')[-1] | |
full_filename = filename + '.mov' | |
if os.path.exists(full_filename): | |
print('Already downloaded: ' + filename + ' skipping ...') | |
continue | |
else: | |
print('Attempting to retrieve and download ' + filename) | |
# Go to the screencast page and get the source url for the video | |
url = BASE_URL + screencast_url[1:] # Remove / from the relative url | |
page = urllib2.urlopen(url).read() | |
download_urls = re.findall('source.src = "(.*?)"', page) | |
# Download the video & save to file | |
print('Downloading "' + filename + '" ...') | |
req = urllib2.Request(download_urls[0]) | |
response = urllib2.urlopen(req) | |
while 1: | |
data = response.read(512) | |
if not len(data): | |
break | |
else: | |
with open(filename + '.mov', 'ab') as f: | |
f.write(data) | |
def main(): | |
page = login() | |
catalog_urls = get_catalog_urls() | |
download_all_screencasts(catalog_urls) | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment