Created
October 24, 2012 18:55
-
-
Save rnagle/3948066 to your computer and use it in GitHub Desktop.
Freezer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Create a local copy of a website, suitable | |
# for upload to S3 as a static archive. | |
# | |
# Provide the domain name you wish to archive | |
# as the first argument. | |
# | |
# For example: | |
# | |
# $ ./freezer triblocal.com | |
# | |
# By default, this script runs wget with -mp, but you | |
# can pass extra parameters by adding them after the | |
# domain name you want to archive: | |
# | |
# For example: | |
# | |
# $ ./freezer triblocal.com --max-redirect 0 | |
# | |
PWD=`pwd` | |
if [[ ${1} == "" ]] | |
then | |
echo "Please specify the domain you would like to archive." | |
exit | |
fi | |
DOMAIN=$1 | |
if ! echo $DOMAIN | grep -Eq '(www\.)?([^.]*)\.(.*\.?)*' | |
then | |
echo "Please specify a valid domain name." | |
exit | |
fi | |
echo "Archiving ${DOMAIN} to local directory: ${PWD}/${DOMAIN}" | |
echo "..." | |
if [[ ${2} != "" ]] | |
then | |
WGET_ARGS="" | |
while [[ ${2} != "" ]] | |
do | |
WGET_ARGS=$WGET_ARGS" "${2} | |
shift | |
done | |
fi | |
wget -mp $WGET_ARGS http://${DOMAIN}/ ${DOMAIN} | |
echo "Making all links relative." | |
echo "..." | |
DOMAIN_REGEX=`echo "${DOMAIN}" | sed -e "s/\([\.\-]\)/\\\\\\\\\1/g"` | |
find ./${DOMAIN} -type f -exec sed -i -e "s/http\:\/\/${DOMAIN_REGEX}//g" {} \; | |
echo "Cleaning filenames, removing query params to prevent 404 errors." | |
echo "..." | |
FILES=$(find ./${DOMAIN} -regex ".*\?.*$") | |
for i in $FILES | |
do | |
ORIGINAL=$i | |
CLEANED=$(echo $i | sed s/\?.*$//) | |
mv $ORIGINAL $CLEANED | |
done | |
echo "Removing temporary files" | |
echo "..." | |
FILES=$(find ./${DOMAIN} -regex ".*\-e$") | |
for i in $FILES | |
do | |
rm $i | |
done | |
echo "Done. The contents of directory: " | |
echo ${PWD}/${DOMAIN} | |
echo "are ready to be deployed to S3." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment