Skip to content

Instantly share code, notes, and snippets.

@theagoliveira
Last active April 7, 2020 22:16
Show Gist options
  • Save theagoliveira/c00da4b1512ed31298018e00ed6e0a62 to your computer and use it in GitHub Desktop.
Save theagoliveira/c00da4b1512ed31298018e00ed6e0a62 to your computer and use it in GitHub Desktop.
Bash script to list and/or download files from The Art Institute of Chicago website
#!/usr/bin/env bash
DownloadFlag=false
DownloadFromFileFlag=false
ListFlag=false
ArtDir="./ArtIC"
ArtDirPD="$ArtDir/PD"
ArtDirNPD="$ArtDir/NPD"
ListName=""
FileListName=""
Iterate=""
while getopts ":fdl" opt; do
case ${opt} in
f ) # DOWNLOAD FROM FILE
DownloadFromFileFlag=true
ListName="download_from_file__list"
FileListName="download_from_file__file_list"
Iterate=$(cat "./codes.txt")
;;
d ) # DOWNLOAD
DownloadFlag=true
ListName="download__list"
FileListName="download__file_list"
Iterate=$(seq 0 249999)
;;
l ) # LIST
ListFlag=true
ListName="full_list"
FileListName=""
Iterate=$(seq 0 249999)
;;
\? )
echo "Unsupported option."
echo "Usage: [-f -d -l]"
exit 0
;;
esac
done
if [[ $DownloadFlag = true && $DownloadFromFileFlag = true ]] || \
[[ $DownloadFlag = true && $ListFlag = true ]] || \
[[ $DownloadFromFileFlag = true && $ListFlag = true ]]
then
echo "You have to specify only one option."
exit 0
fi
if [[ $DownloadFlag = false && $DownloadFromFileFlag = false && $ListFlag = false ]]
then
echo "You have to specify one option."
exit 0
fi
mkdir $ArtDir
if [ $ListFlag = false ]
then
mkdir $ArtDirPD
mkdir $ArtDirNPD
fi
for i in $Iterate;
do
IMGLINK="https://www.artic.edu/artworks/${i}/"
SRC=$(curl -fsL "$IMGLINK")
if [ "$SRC" == "" ]; then
echo "$i"
else
TITLE=$(echo "$SRC" | pup 'dd[itemprop="name"] span text{}')
AUTHOR=$(echo "$SRC" | pup 'dd[itemprop="creator"] a text{}')
TITLE=${TITLE//—/--}
AUTHOR=${AUTHOR//$'\n'/, }
AUTHOR=${AUTHOR//—/--}
TITLE=$(echo "$TITLE" | recode HTML..LATIN1)
AUTHOR=$(echo "$AUTHOR" | recode HTML..LATIN1)
# Save current IFS (SOURCE: https://stackoverflow.com/questions/24628076/bash-convert-n-delimited-strings-into-array)
SAVEIFS=$IFS
# Change IFS to new line.
IFS=$'\n'
DOWNLINK=($(echo "$SRC" | pup 'button attr{data-gallery-img-download-url}'))
PUBDOM=($(echo "$SRC"| pup 'button attr{data-gallery-img-credit}'))
# Restore IFS
IFS=$SAVEIFS
echo "No.: $i; Author: $AUTHOR; Title: $TITLE; Copyright: $PUBDOM"
echo "No.: $i; Author: $AUTHOR; Title: $TITLE; Copyright: $PUBDOM" >> "$ArtDir/$ListName.txt"
if [ $ListFlag = false ]
then
NUMDOWN=${#DOWNLINK[@]}
TITLE=${TITLE//\//-}
TITLE=${TITLE//[<>:\\|?*]/_}
AUTHOR=${AUTHOR//\//-}
AUTHOR=${AUTHOR//[<>:\\|?*]/_}
FILENAME="Author_ ${AUTHOR:0:50}; Title_ ${TITLE:0:50}; No._ ${i}; Copyright_ $PUBDOM"
if [ "$DOWNLINK" != "" ]; then
if [ "$NUMDOWN" == "1" ]; then
echo "File: $FILENAME.jpg; URL: $DOWNLINK" >> "$ArtDir/$FileListName.txt"
if [ "$PUBDOM" == "CC0 Public Domain Designation" ]; then
wget -q -O "$ArtDirPD/$FILENAME.jpg" "$DOWNLINK"
else
wget -q -O "$ArtDirNPD/$FILENAME.jpg" "$DOWNLINK"
fi
sleep 0.25s
else
for ((j=0;j<NUMDOWN;j++));
do
echo "File: $FILENAME-${j}.jpg; URL: ${DOWNLINK[j]}" >> "$ArtDir/$FileListName.txt"
if [ "${PUBDOM[j]}" == "CC0 Public Domain Designation" ]; then
wget -q -O "$ArtDirPD/$FILENAME-${j}.jpg" "${DOWNLINK[j]}"
else
wget -q -O "$ArtDirNPD/$FILENAME-${j}.jpg" "${DOWNLINK[j]}"
fi
sleep 0.25s
done
fi
fi
fi
fi
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment