Created
February 26, 2018 16:21
-
-
Save simonerni/d7f87d322a4ad78ba08e8d606615c4b1 to your computer and use it in GitHub Desktop.
Video Lecture Downloader ETHZ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[computer networks 2018] | |
url: https://www.video.ethz.ch/lectures/d-infk/2018/spring/252-0064-00L.html | |
directory: /myAbsolutePath/ComputerNetworks | |
quiet: True |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original Author: Basil Fürer, just redistributing here for convenience. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
# -*- coding: utf-8 -*- | |
# | |
import argparse | |
import bs4 | |
import datetime as d | |
import os | |
import sys | |
import urllib.request as ul | |
import configparser | |
global DIR, QUALITY, QUIET, UAGENT | |
DIR = os.path.expanduser('~/downloads') | |
QUALITY = -1 # 0 for worst & -1 for best | |
QUIET = False | |
UAGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' | |
def pprint(out): | |
if not QUIET: | |
print(out) | |
def die(e, s): | |
print('{}: error: {}'.format(os.path.basename(__file__), e)) | |
exit(s) | |
def pad(s): | |
return (s + ':' + 80 * ' ')[:40 - len(' [failed]')] | |
def url_open(url): | |
req = ul.Request(url, headers={'User-Agent': UAGENT}) | |
res = ul.urlopen(req).read().decode('utf-8') | |
soup = bs4.BeautifulSoup(res, 'html.parser') | |
return soup | |
def get_vids(url): | |
soup = url_open(url) | |
vids = [x for x in soup.find_all('div', {'class': 'play'})] | |
vids = map(lambda s: s.find_all('a')[0]['href'].split('?')[0], vids) | |
return vids | |
def get_mp4(url): | |
if url[0] == '/': | |
url = 'https://www.video.ethz.ch' + url | |
soup = url_open(url) | |
hrfs = [x.find_all('a') for x in soup.find_all('li', {'class': 'video'})] | |
mp4s = {int(x[0].getText().split('x')[-1]): x[0]['href'] for x in hrfs} | |
details = soup.find_all('div', {'class': 'accordionContent'})[0] | |
name = details.findNext('h3').getText().replace(' ', '_') | |
date = details.find_all('p')[2].getText().strip() | |
date = d.datetime.strptime(date, '%d.%m.%Y').strftime('%Y.%m.%d') | |
return ('{}-{}.mp4'.format(date, name), mp4s) | |
def download(fname, url): | |
req = ul.Request(url, headers={'User-Agent': UAGENT}) | |
with open(fname, 'wb') as f: | |
f.write(ul.urlopen(req).read()) | |
def fetch_videos(aurl): | |
if not os.path.isdir(DIR): | |
os.makedirs(DIR, exist_ok=True) | |
downloaded = False | |
for u in get_vids(aurl): | |
name, urls = get_mp4(u) | |
url = [urls[k] for k in sorted(urls.keys())][QUALITY] | |
fname = '{}/{}'.format(DIR, name) | |
if not os.path.exists(fname): | |
if not downloaded: | |
pprint('Downloading:') | |
downloaded = True | |
if not QUIET: | |
sys.stdout.write(pad(' {}:'.format(name.replace('.mp4', '')))) | |
sys.stdout.flush() | |
try: | |
download(fname, url) | |
pprint('\t[ok]') | |
except: | |
try: | |
os.remove(fname) | |
except OSError: | |
pass | |
pprint('\t[failed]') | |
if not downloaded: | |
pprint('Nothing to download..') | |
if __name__ == '__main__': | |
example = 'https://www.video.ethz.ch/lectures/d-infk/2018/spring/252-0064-00L.html' | |
desc = ('Download podcasts from ethz.ch\n\n' | |
'Example config (can contain multiple entries):\n\n' | |
'\t[computer networks]\n' | |
'\turl: {}\n' | |
'\tdirectory: ~/documents/computer_networks/podcasts\n' | |
'\t# quality: 0 # uncomment for worst quality\n' | |
'\tquiet: True\n' | |
).format(example) | |
parser = argparse.ArgumentParser(description=desc, | |
formatter_class=argparse.RawTextHelpFormatter) | |
group = parser.add_mutually_exclusive_group(required=True) | |
group.add_argument('-u', '--urls', nargs='+', | |
help='url(s) of podcast') | |
group.add_argument('-c', '--conf', metavar='CONF', nargs=1, | |
help='specify config file') | |
parser.add_argument('-d', '--dir', metavar='DIR', nargs=1, | |
help='specify directory') | |
parser.add_argument('-q', '--quiet', action='store_true', | |
help="don't write to stdout") | |
parser.add_argument('-s', '--stingy', action='store_true', | |
help='download worst quality') | |
args = parser.parse_args() | |
if args.dir: | |
DIR = args.directory[-1] | |
if args.quiet: | |
QUIET = True | |
if args.stingy: | |
QUALITY = 0 | |
DIR = os.path.abspath(DIR) | |
if args.conf: | |
for conf in args.conf: | |
if not os.path.isfile(conf): | |
die("can't read '{}'".format(conf), 3) | |
defaults = {'directory': DIR, | |
'quiet': 'False', | |
'quality': '-1'} | |
config = configparser.ConfigParser(defaults) | |
try: | |
config.read(conf) | |
except: | |
raise | |
die("can't parse '{}'".format(conf), 4) | |
for s in config.sections(): | |
url = config.get(s, 'url') | |
DIR = os.path.expanduser(config.get(s, 'directory')) | |
try: | |
QUIET = config.getboolean(s, 'quiet') | |
except: | |
die("can't parse field 'quiet' in '{}'".format(s), 5) | |
try: | |
QUALITY = config.getint(s, 'quality') | |
except: | |
die("can't parse field 'quality' in '{}'".format(s), 5) | |
try: | |
fetch_videos(url) | |
except ValueError: | |
raise | |
die("invalid url '{}'".format(url), 1) | |
except: | |
raise | |
die('download aborted', 9) | |
else: | |
for url in args.urls: | |
try: | |
fetch_videos(url) | |
except ValueError: | |
die("invalid url '{}'".format(url), 1) | |
except: | |
die('download aborted', 9) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
eth-video.py [-h] (-u URLS [URLS ...] | -c CONF) [-d DIR] [-q] [-s] | |
Download podcasts from ethz.ch | |
Example config (can contain multiple entries): | |
[computer networks] | |
url: https://www.video.ethz.ch/lectures/d-infk/2018/spring/252-0064-00L.html | |
directory: ~/documents/computer_networks/podcasts | |
# quality: 0 # uncomment for worst quality | |
quiet: True | |
optional arguments: | |
-h, --help show this help message and exit | |
-u URLS [URLS ...], --urls URLS [URLS ...] | |
url(s) of podcast | |
-c CONF, --conf CONF specify config file | |
-d DIR, --dir DIR specify directory | |
-q, --quiet don't write to stdout | |
-s, --stingy download worst quality |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment