Last active
August 29, 2015 14:02
-
-
Save Jolg42/caba465420e075f0988e to your computer and use it in GitHub Desktop.
Download Unsplah Images https://stackoverflow.com/questions/20701428/periodically-scrape-and-download-all-images-from-a-website-with-javascript-auto
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
export PATH=... # might not be needed, but sometimes the PATH is not set | |
# correctly in cron-called scripts. Copy the PATH setting you | |
# normally see under console. | |
cd YOUR_DIRECTORY # the directory where the script and imgs directory is located | |
{ | |
echo "========================" | |
echo -n "run unsplash.sh from cron " | |
date | |
./unsplash.sh | |
} >> OUT.log 2>> ERR.log | |
#0 3 * * * PATH_to_the/unsplash.cron | |
#This will run the script every day at 3:10. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mkdir imgs | |
I=1 | |
while true ; do # for all the pages | |
wget unsplash.com/page/$I -O tmppage | |
grep '<a href.*<img src.*title' tmppage > tmppage.imgs | |
if [ ! -s tmppage.imgs ] ; then # empty page - end the loop | |
break | |
fi | |
echo "Reading page $I:" | |
sed 's/^.*<a href="\([^"]*\)".*title="\([^"]*\)".*$/\1 \2/' tmppage.imgs | while read IMG POST ; do | |
# for all the images on page | |
TARGET=imgs/`echo $POST | sed 's|.*post/\(.*\)$|\1|' | sed 's|/|_|g'`.jpg | |
echo -n "Photo $TARGET: " | |
if [ -f $TARGET ] ; then # we already have this image | |
echo "already have" | |
continue | |
fi | |
echo "downloading" | |
wget $IMG -O $TARGET | |
done | |
I=$((I+1)) | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment