Created
March 4, 2021 10:11
-
-
Save salkin-mada/329fa35939819fcd8e47afee84f74a22 to your computer and use it in GitHub Desktop.
download all volumes of the computer music journal
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
base_dir=computer_music_journals | |
download_count=0 | |
mkdir -p $base_dir && cd $base_dir | |
for volume in {23..43}; do | |
mkdir -p "volume_$volume" && cd "volume_$volume" | |
for issue in {1..4}; do | |
if [[ $volume -eq 43 ]] | |
then | |
if [[ $issue -eq 2 ]] | |
then | |
issue="2-3" | |
elif [[ $issue -eq 3 ]] | |
then | |
continue; | |
fi | |
fi | |
mkdir -p $issue && cd $issue | |
wget -A.html "https://www.mitpressjournals.org/toc/comj/$volume/$issue/" | |
rg -ioP '(?<=/doi/pdf/).*?(?=">PDF)' index.html | sed 's/^/https:\/\/www.mitpressjournals.org\/doi\/pdf\//' > urls.txt | |
url_count=0 | |
while IFS="" read -r link || [ -n "$link" ] | |
do | |
title="${titles[$url_count]}" | |
printf '\n%s\n' "$link" | |
wget -O "issue_$issue-$url_count.pdf" $link | |
(( url_count++ )) | |
(( download_count++ )) | |
done < urls.txt | |
touch $(cat index.html | rg -oP '(?<=<span class="issueInfo">).*?(?=</span>)' | sed -e 's/\s/_/g') | |
rm index.html | |
cd .. | |
done | |
cd .. | |
done | |
cd .. | |
printf "\v\t in total $download_count pdf files were saved" | |
# copyleft (C<-) salkin mada 2021 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment