Last active
April 7, 2020 22:16
-
-
Save theagoliveira/c00da4b1512ed31298018e00ed6e0a62 to your computer and use it in GitHub Desktop.
Bash script to list and/or download files from The Art Institute of Chicago website
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
DownloadFlag=false | |
DownloadFromFileFlag=false | |
ListFlag=false | |
ArtDir="./ArtIC" | |
ArtDirPD="$ArtDir/PD" | |
ArtDirNPD="$ArtDir/NPD" | |
ListName="" | |
FileListName="" | |
Iterate="" | |
while getopts ":fdl" opt; do | |
case ${opt} in | |
f ) # DOWNLOAD FROM FILE | |
DownloadFromFileFlag=true | |
ListName="download_from_file__list" | |
FileListName="download_from_file__file_list" | |
Iterate=$(cat "./codes.txt") | |
;; | |
d ) # DOWNLOAD | |
DownloadFlag=true | |
ListName="download__list" | |
FileListName="download__file_list" | |
Iterate=$(seq 0 249999) | |
;; | |
l ) # LIST | |
ListFlag=true | |
ListName="full_list" | |
FileListName="" | |
Iterate=$(seq 0 249999) | |
;; | |
\? ) | |
echo "Unsupported option." | |
echo "Usage: [-f -d -l]" | |
exit 0 | |
;; | |
esac | |
done | |
if [[ $DownloadFlag = true && $DownloadFromFileFlag = true ]] || \ | |
[[ $DownloadFlag = true && $ListFlag = true ]] || \ | |
[[ $DownloadFromFileFlag = true && $ListFlag = true ]] | |
then | |
echo "You have to specify only one option." | |
exit 0 | |
fi | |
if [[ $DownloadFlag = false && $DownloadFromFileFlag = false && $ListFlag = false ]] | |
then | |
echo "You have to specify one option." | |
exit 0 | |
fi | |
mkdir $ArtDir | |
if [ $ListFlag = false ] | |
then | |
mkdir $ArtDirPD | |
mkdir $ArtDirNPD | |
fi | |
for i in $Iterate; | |
do | |
IMGLINK="https://www.artic.edu/artworks/${i}/" | |
SRC=$(curl -fsL "$IMGLINK") | |
if [ "$SRC" == "" ]; then | |
echo "$i" | |
else | |
TITLE=$(echo "$SRC" | pup 'dd[itemprop="name"] span text{}') | |
AUTHOR=$(echo "$SRC" | pup 'dd[itemprop="creator"] a text{}') | |
TITLE=${TITLE//—/--} | |
AUTHOR=${AUTHOR//$'\n'/, } | |
AUTHOR=${AUTHOR//—/--} | |
TITLE=$(echo "$TITLE" | recode HTML..LATIN1) | |
AUTHOR=$(echo "$AUTHOR" | recode HTML..LATIN1) | |
# Save current IFS (SOURCE: https://stackoverflow.com/questions/24628076/bash-convert-n-delimited-strings-into-array) | |
SAVEIFS=$IFS | |
# Change IFS to new line. | |
IFS=$'\n' | |
DOWNLINK=($(echo "$SRC" | pup 'button attr{data-gallery-img-download-url}')) | |
PUBDOM=($(echo "$SRC"| pup 'button attr{data-gallery-img-credit}')) | |
# Restore IFS | |
IFS=$SAVEIFS | |
echo "No.: $i; Author: $AUTHOR; Title: $TITLE; Copyright: $PUBDOM" | |
echo "No.: $i; Author: $AUTHOR; Title: $TITLE; Copyright: $PUBDOM" >> "$ArtDir/$ListName.txt" | |
if [ $ListFlag = false ] | |
then | |
NUMDOWN=${#DOWNLINK[@]} | |
TITLE=${TITLE//\//-} | |
TITLE=${TITLE//[<>:\\|?*]/_} | |
AUTHOR=${AUTHOR//\//-} | |
AUTHOR=${AUTHOR//[<>:\\|?*]/_} | |
FILENAME="Author_ ${AUTHOR:0:50}; Title_ ${TITLE:0:50}; No._ ${i}; Copyright_ $PUBDOM" | |
if [ "$DOWNLINK" != "" ]; then | |
if [ "$NUMDOWN" == "1" ]; then | |
echo "File: $FILENAME.jpg; URL: $DOWNLINK" >> "$ArtDir/$FileListName.txt" | |
if [ "$PUBDOM" == "CC0 Public Domain Designation" ]; then | |
wget -q -O "$ArtDirPD/$FILENAME.jpg" "$DOWNLINK" | |
else | |
wget -q -O "$ArtDirNPD/$FILENAME.jpg" "$DOWNLINK" | |
fi | |
sleep 0.25s | |
else | |
for ((j=0;j<NUMDOWN;j++)); | |
do | |
echo "File: $FILENAME-${j}.jpg; URL: ${DOWNLINK[j]}" >> "$ArtDir/$FileListName.txt" | |
if [ "${PUBDOM[j]}" == "CC0 Public Domain Designation" ]; then | |
wget -q -O "$ArtDirPD/$FILENAME-${j}.jpg" "${DOWNLINK[j]}" | |
else | |
wget -q -O "$ArtDirNPD/$FILENAME-${j}.jpg" "${DOWNLINK[j]}" | |
fi | |
sleep 0.25s | |
done | |
fi | |
fi | |
fi | |
fi | |
done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 | |
9 | |
11 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment