Skip to content

Instantly share code, notes, and snippets.

@niccokunzmann
Last active July 7, 2016 20:52
Show Gist options
  • Save niccokunzmann/9c60cb40d3d3975ef830 to your computer and use it in GitHub Desktop.
Save niccokunzmann/9c60cb40d3d3975ef830 to your computer and use it in GitHub Desktop.
This script downloads all the mp4 files for a tele-task course
#!/usr/bin/python3
## The MIT License (MIT)
##
## Copyright (c) 2014 Nicco Kunzmann
##
## https://gist.github.com/niccokunzmann/9c60cb40d3d3975ef830
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to deal
## in the Software without restriction, including without limitation the rights
## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
## copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
## THE SOFTWARE.
"""
This Python Script downloads whole tele-task classes into a folder.
use python 3
source and updates
https://gist.github.com/niccokunzmann/9c60cb40d3d3975ef830
"""
import webbrowser
import urllib.request
import re
import os
import concurrent.futures
import traceback
def _download(url, name, to_directory, number):
try:
print('downloading number {}: {}'.format(number, name))
urllib.request.urlretrieve(url, os.path.join(to_directory, name))
print('downloaded number {}: {}'.format(number, name))
except:
traceback.print_exc()
def download_course(number_or_url, downloads_at_a_time = 2, to_directory = 'tele-task'):
""" download all chapters of the course
number_or_url
is the number or url of the tele-task course
downloads_at_a_time
how many files should be downloaded at a time
to_directory
where the download will be stored"""
if isinstance(number_or_url, int):
url = 'http://www.tele-task.de/archive/series/overview/{}/'.format(number_or_url)
else:
url = number_or_url
if not os.path.exists(to_directory):
print('creating download directory', to_directory)
os.makedirs(to_directory)
print('opening course overview')
website = urllib.request.urlopen(url)
# <a href="/archive/video/ipod/23411/">
paths = re.findall(b'<a[^>]+?href="(/archive/video/ipod/[^"]*)"', website.read(), re.IGNORECASE)
tpe = concurrent.futures.ThreadPoolExecutor(downloads_at_a_time)
downloads = 0
for path in paths:
path = path.decode('ASCII')
print('opening', path)
site = urllib.request.urlopen('http://www.tele-task.de' + path)
# <a href="http://stream.hpi.uni-potsdam.de:8080/download/podcast/SS_2014/DPS_SS14/DPS_2014_04_14/DPS_2014_04_14_part_1_podcast.mp4">
downstreams = re.findall(b'<a[^>]+?href="(http://stream.hpi.uni-potsdam.de:8080/download/podcast/[^"]*)"', site.read(), re.IGNORECASE)
for downstream in downstreams:
url = downstream.decode('ASCII')
name = os.path.basename(url)
if name in os.listdir(to_directory):
print('skipping', name)
else:
print('marking for download', name)
downloads += 1
tpe.submit(_download, url, name, to_directory, downloads)
break
print('{} file {} will be downloaded.'.format(downloads, 's' * (downloads > 1)))
tpe.shutdown()
print('done')
# Dependable Systems (SS 2014) is 1005 for example
# https://www.tele-task.de/archive/series/overview/1005/
# You will need to input "1005/", then. Do not forget the "/" at the end.
download_course(int(input('https://www.tele-task.de/archive/series/overview/')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment