Created
June 7, 2017 09:57
-
-
Save radianttap/43455d9b92433895e73c64c3d3690761 to your computer and use it in GitHub Desktop.
Bash script to download all HD videos + PDF slides for WWDC 2017
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#Setup the environnement | |
mkdir wwdc2017 | |
cd wwdc2017 | |
mkdir tmp_download | |
cd tmp_download | |
#Extract IDs | |
echo "Downloading the index" | |
wget -q https://developer.apple.com/videos/wwdc2017/ -O index.html | |
# find parts of the document where data-released=true, all the way to the first H4 header where title of that talk is | |
# then find lines containing "videos/play/wwdc2017", then remove all chars except session number, then clean duplicated lines | |
cat index.html | sed -n '/data-released=\"true\"/,/\<h4/p' | grep videos/play/wwdc2017 | sed -e 's/.*wwdc2017\///' -e 's/\/\"\>//' | sed '$!N; /^\(.*\)\n\1$/!P; D' > ../downloadData | |
rm index.html | |
#Iterate through the talk IDs | |
while read -r line | |
do | |
#Download the page with the real download URL and the talk name | |
wget -q "https://developer.apple.com/videos/play/wwdc2017/$line/" -O webpage | |
#We grab the title of the page then clean it up | |
talkName=$(cat webpage | grep "<title" | sed -e "s/.*\<title\>//" -e "s/ \- WWDC 2017.*//") | |
#We grep "_hd_" which bring up the download URL, then some cleanup | |
#If we were to want SD video, all we would have to do is replace _hd_ by _sd_ | |
dlURL=$(cat webpage | grep _hd_ | sed -e "s/.*href\=//" -e "s/\>.*//" -e "s/\"//g") | |
pdfURL=$(cat webpage | grep .pdf | grep devstreaming | sed -e "s/.*href\=//" -e "s/\>.*//" -e "s/\"//g" -e "s/ .*$//g") | |
rm webpage | |
#Is there a video URL? | |
if [ -z "$dlURL" ]; then | |
echo | |
else | |
echo "Video $line ($talkName)" | |
echo " url: $dlURL" | |
#Great, we download the file | |
wget -c "$dlURL" -O "../$line - $talkName.mp4" | |
fi | |
#Is there a PDF URL? | |
if [ -z "$pdfURL" ]; then | |
echo | |
else | |
echo "PDF $line ($talkName)" | |
echo " url: $pdfURL" | |
#Great, we download the file | |
wget -c "$pdfURL" -O "../$line - $talkName.pdf" | |
fi | |
done < "../downloadData" | |
#cleanup | |
cd .. | |
rm -rf tmp_download | |
rm downloadData |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@ulidev ±150GB