Last active
January 12, 2017 16:38
-
-
Save adrianshort/01ae1c3adccab86bf9ee to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env sh | |
# Download PDF files for a planning application from Sutton Council planning website | |
# If you run this more than once it'll only download the new files uploaded for that application. | |
# Usage: $ get.sh <application number>, e.g. $ get.sh B2015/71962 | |
# Install curl and wget before use. Mac users can install them with Homebrew. | |
# Windows users: Try running this in Cygwin or install Linux in a virtual machine. | |
# Adrian Short 26 Feb 2016 | |
COOKIEJAR=cookiejar.txt | |
URLS=urls.txt | |
BASEURL=https://fastweb.sutton.gov.uk/FASTWEB | |
mkdir -p $1 | |
cd $1 | |
echo "Getting session cookies" | |
curl -s -c $COOKIEJAR "$BASEURL/welcome.asp" > /dev/null # Get the session cookies | |
echo "OK" | |
echo | |
echo "Getting list of PDF files. This could take several minutes if there are a large number of documents for this application." | |
curl -s -c $COOKIEJAR \ | |
--data "cbxCopyrightStatement=on" \ | |
--data "ApplicationNumber=$1" \ | |
"$BASEURL/images.asp" \ | |
| grep -E -o 'http.+?\.(pdf|PDF)' > $URLS | |
echo "OK" | |
echo "Downloading PDFs" | |
wget --no-check-certificate --no-clobber -i $URLS | |
echo "Downloading complete" | |
rm $COOKIEJAR | |
ls -lht | |
cd - |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment