Created
February 18, 2015 04:56
-
-
Save jaseclamp/27c4b74e535ddd749bbe to your computer and use it in GitHub Desktop.
Warm Varnish Cache
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# this script will | |
# a. crawl the designated site X levels deep to generate a urls list | |
# b. completely purge all urls on the desisgnated varnish servers for the designated url | |
# c. clear cache on nginx / mod pagespeed for the designated app servers | |
# d. individually warm all the listed urls on each designated varnish server | |
# e. rewarm them in case pagespeed sent a purge request | |
# define some variables | |
WEBHOST="www.yoursite.com" | |
#make sure the server you run this script on has firewall permission to bypass a potential ELB and go straight to varnish servers. | |
VARNISHHOSTARRAY=("10.128.16.5:6081" "10.128.16.37:6081") | |
#make sure the server you run this on has firewall permission to access app servers | |
APPHOSTARRAY=("10.128.16.10" "10.128.16.42") | |
USERAGENT="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.102 Safari/537.36" | |
# a. first we need to get a list of urls for the site. | |
# we need the list because wget can only spider in non-gzip mode but we want to finally cache gzipped pages. | |
# we're only crawling 2 levels deep | |
#if the url list is less than X days old then leave it otherwise refresh it | |
if test `find "${WEBHOST}-urls.txt" -mtime -7 2>/dev/null` | |
then | |
echo urls file still new | |
else | |
echo generating urls | |
wget -r -A htm -R pdf --spider -l 2 -nc -T 15 -e robots=off -U "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.102 Safari/537.36" http://$WEBHOST/ 2>&1 | grep -Eo http.+$ | sort | uniq > $WEBHOST-urls.txt | |
fi | |
# this is optional. use only if you run google pagespeed module behind varnish | |
# now we need to ssh over to each app and touch the pagespeed clear cache flag | |
# make sure ssh keys exist to support this | |
# be sure to adjust ports and/or path to cache below. | |
for APPHOST in "${APPHOSTARRAY[@]}"; do | |
echo clearing pagespeed on $APPHOST | |
ssh -p 22 www-data@$APPHOST 'touch /var/ngx_pagespeed_cache/cache.flush' | |
done | |
# do a full cache warm for first proxy then continue to second etc... | |
for VARNISHHOST in "${VARNISHHOSTARRAY[@]}"; do | |
# now we need to ban everything on all proxies with gzip on/off so we can start fresh... | |
# banning "/" bans every url with a "/" - which is every url | |
# note: you need to have setup the PURGE method handler in VCLr for this to work | |
echo purging $VARNISHHOST | |
curl -X PURGE -A $USERAGENT -m 15 -s -H "Accept-encoding: gzip" -H "Host: $WEBHOST" http://$VARNISHHOST/ > /dev/null 2>&1 | |
curl -X PURGE -A $USERAGENT -m 15 -s -H "Host: $WEBHOST" http://$VARNISHHOST/ > /dev/null 2>&1 | |
#now start warming | |
while read u; do | |
#we just want the path not the host etc | |
WEBPATH=`sed "s|http://[^/]*/||" <<< "${u}"` | |
echo warming $WEBPATH on $VARNISHHOST | |
curl -A $USERAGENT -m 15 -s -H "Accept-encoding: gzip" -H "Host: $WEBHOST" http://$VARNISHHOST/$WEBPATH > /dev/null 2>&1 | |
sleep .5 | |
done < $WEBHOST-urls.txt | |
#hit them a second time because (if configured to do so) nginx will purge most pages after first hit because it may not have optimized the whole page by the time it delivered it | |
while read u; do | |
WEBPATH=`sed "s|http://[^/]*/||" <<< "${u}"` | |
echo rewarming $WEBPATH on $VARNISHHOST | |
curl -A $USERAGENT -m 15 -s -H "Accept-encoding: gzip" -H "Host: $WEBHOST" http://$VARNISHHOST/$WEBPATH > /dev/null 2>&1 | |
#sleep .5 | |
done < $WEBHOST-urls.txt | |
done | |
#we used to run this twice with wget on url list ingest feature but it doesn't guarantee that pages get cached on both proxies due to using an ELB | |
#we just use the curl version above to ensure this | |
#wget -i $WEBHOST-urls.txt -nc -T 15 -e robots=off -U $USERAGENT --header="accept-encoding: gzip" --spider |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment