Created
December 6, 2022 13:44
-
-
Save davidaparicio/4f766dd250ead9608b6710ada5af3a39 to your computer and use it in GitHub Desktop.
Bash script to download all courses from a website like MIT OCW (MIT OpenCourseWare)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#Date uploaded 4/30/2013, 2:21 PM | |
#Henrique CTRL+L (clear), cd - (prev) | |
#David CTRL+R (reverse search), CTRL+G (clear the search) | |
#http://en.kioskea.net/faq/1757-how-to-read-a-file-line-by-line | |
old_IFS=$IFS # save the field separator | |
IFS=$'\n' # new field separator, the end of line | |
#Get index.php and the session cookie | |
wget --keep-session-cookies --save-cookies cookies.txt --post-data 'login=guest&password=guest' http://lampiao.ic.unicamp.br/weblectures/index.php | |
#Get oldclasses | |
#wget --load-cookies cookies.txt http://lampiao.ic.unicamp.br/weblectures/old_courses.php | |
#Get all courseId | |
#cat index.php | grep "lectures.php?course=" | cut -d'=' -f3 | cut -d"'" -f1 | sort > index | |
#cat old_courses.php | grep "lectures.php?course=" | cut -d'=' -f3 | cut -d"'" -f1 | sort > old_courses | |
#cat index old_courses > courses.txt | |
#Clean the php pages and tmp files | |
#rm -f index.php index old_courses.php old_courses | |
line="MC102-1s-2009" | |
#for line in $(cat courses.txt) | |
#do echo "$line" | |
#do #mkdir $line | |
wget --load-cookies cookies.txt http://lampiao.ic.unicamp.br/weblectures/lectures.php?course=$line --output-document=$line.tmp | |
cat $line.tmp | grep -o "lectureId=[0-9]\+" | cut -d'=' -f2 > $line.id | |
for ligne in $(cat $line.id) | |
do #cd $line | |
wget --load-cookies cookies.txt http://lampiao.ic.unicamp.br/weblectures/aula.php?lectureId=$ligne --output-document=$ligne.aula | |
wget --load-cookies cookies.txt http://lampiao.ic.unicamp.br/weblectures/slideview.php?lectureId=$ligne --output-document=$ligne.slide | |
cat $ligne.aula | grep "lampiao" | cut -d"'" -f2 > url | |
for l in $(cat url) | |
do wget --load-cookies cookies.txt $l #--output-document=$ligne.flv | |
done | |
#cd .. | |
done | |
#done | |
#Clean the tmp files | |
rm -f *.tmp | |
#FOR EACH COURSE | |
#wget --load-cookies cookies.txt http://lampiao.ic.unicamp.br/weblectures/lectures.php?course=MC202EF%20-%202012s2 | |
#FOR EACH COURSE | |
#cat lectures.php\?course\=MC202EF\ -\ 2012s2 | grep -o "lectureId=[0-9]\+" | |
IFS=$old_IFS # restore default field separator |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment