joshkoenig · May 23, 2018 22:50 · gitressa · Nov 21, 2016 · tdmalone · Apr 21, 2018
diff --git a/spiderme.sh b/spiderme.sh
 #!/bin/sh

 # Spiderme - quick and clean benchmarking for website performance on Pantheon.
 #
 # This script uses wget to "spider" your website to provide a good set of data
 # on site performance. It will automatically bypass Pantheon's edge cache, and
 # skip images, javascript, css, etc. It will also only spider links that are
 # under the multidev environment you are spidering. 
 #
 #
 # USAGE
 # 
 # ./spiderme.sh https://envname-sitename.pantheonsites.io
 #
 #
 # INSTALLATION
 # 
 # Just download and make the script executable:
 #
 # chmod +x spiderme.sh
 #
 # This script requires the common linus uitilities 'wget' and 'timeout'.
 # You can install these for MacOS via homebrew.
 #
 # $> brew install wget
 # $> brew install coreutils
 #
 # 
 # HOW TO BENCHMARK
 # For best results you'll want to work on a fresh environment to avoid any
 # confusion about what you are measuring. Step by step instructions:
 # 
 # 1) Set up a fresh multidev environment to test your performance changes.
 #
 # 2) Insure New Relic is enabled and capturing data for the environment 
 #    you intend to test.
 #
 # 3) Clear the cache for the environment.
 #
 # 4) Run the script to establish baseline performance:
 #    ./spiderme.sh https://envname-sitename.pantheonsites.io
 #
 # 5) Push your performance change (e.g. enable PHP 7)
 #
 # 6) Clear the cache for the environment.
 #
 # 7) Run the script again.
 #
 # This should give you a solid "side by side" comparison of before and 
 # after performance. You'll have two sets of data, each consisting of a
 # cold and warm spider pass on the website. This should give you a sense
 # of how much benefit your performance improvements deliver in under 30 mins.

 # Assumes that the script takes a url to a multidev env as an arg.
 #
 # e.g. https://lcache-outlandish-josh.pantheonsites.io
 #
 # Thanks to http://stackoverflow.com/questions/6174220/parse-url-in-shell-script

 # extract the protocol
 PROTO="$(echo $1 | grep :// | sed -e's,^\(.*://\).*,\1,g')"
 # remove the protocol
 URL="$(echo ${1/$PROTO/})"
 # extract the user (if any)
 USER="$(echo $URL | grep @ | cut -d@ -f1)"
 # extract the host
 HOST="$(echo ${URL/$USER@/} | cut -d/ -f1)"

 # Set proper timeout command.
 unamestr=`uname`
 if [[ "$unamestr" == 'Darwin' ]]; then
   TIMEOUT_COMMAND='gtimeout' 
 else
   TIMEOUT_COMMAND='timeout' 
 fi

 # Wget is more efficient when it doesn't waste time downloading static files.
 # Add prefixes here if you have other file types in play.
 REJECT_FILES='jpg,png,pdf,css,js,eot,svg,gif,ico,xml,ttf,mp3,mov,mpg,mp4'

 COOKIE='--header "Cookie: NO_CACHE=1"'
 SPIDER_ME="$TIMEOUT_COMMAND 300s wget --reject $REJECT_FILES -e robots=off $COOKIE -r -l inf -D$HOST $PROTO$HOST"

 # Main script. You shouldn't need to edit below this line.
 echo "Starting first pass on $URL"
 echo "Output is suppressed, just sit tight."
 echo "This will go for a maximum of five minutes..."
 echo
 # Having trouble? Uncomment below and start picking apart the wget
 echo "$SPIDER_ME"
 eval $SPIDER_ME
 echo "First pass complete!"
 rm -rf $HOST
 echo "Pausing for two minutes to create neat break in New Relic..."
 echo 
 sleep 120
 echo "Starting second pass on $URL..."
 echo 
 eval $SPIDER_ME
 echo "Complete!"
 # Clean up
 rm -rf $HOST
	#!/bin/sh

	# Spiderme - quick and clean benchmarking for website performance on Pantheon.
	#
	# This script uses wget to "spider" your website to provide a good set of data
	# on site performance. It will automatically bypass Pantheon's edge cache, and
	# skip images, javascript, css, etc. It will also only spider links that are
	# under the multidev environment you are spidering.
	#
	#
	# USAGE
	#
	# ./spiderme.sh https://envname-sitename.pantheonsites.io
	#
	#
	# INSTALLATION
	#
	# Just download and make the script executable:
	#
	# chmod +x spiderme.sh
	#
	# This script requires the common linus uitilities 'wget' and 'timeout'.
	# You can install these for MacOS via homebrew.
	#
	# $> brew install wget
	# $> brew install coreutils
	#
	#
	# HOW TO BENCHMARK
	# For best results you'll want to work on a fresh environment to avoid any
	# confusion about what you are measuring. Step by step instructions:
	#
	# 1) Set up a fresh multidev environment to test your performance changes.
	#
	# 2) Insure New Relic is enabled and capturing data for the environment
	# you intend to test.
	#
	# 3) Clear the cache for the environment.
	#
	# 4) Run the script to establish baseline performance:
	# ./spiderme.sh https://envname-sitename.pantheonsites.io
	#
	# 5) Push your performance change (e.g. enable PHP 7)
	#
	# 6) Clear the cache for the environment.
	#
	# 7) Run the script again.
	#
	# This should give you a solid "side by side" comparison of before and
	# after performance. You'll have two sets of data, each consisting of a
	# cold and warm spider pass on the website. This should give you a sense
	# of how much benefit your performance improvements deliver in under 30 mins.

	# Assumes that the script takes a url to a multidev env as an arg.
	#
	# e.g. https://lcache-outlandish-josh.pantheonsites.io
	#
	# Thanks to http://stackoverflow.com/questions/6174220/parse-url-in-shell-script

	# extract the protocol
	PROTO="$(echo $1 \| grep :// \| sed -e's,^\(.://\).,\1,g')"
	# remove the protocol
	URL="$(echo ${1/$PROTO/})"
	# extract the user (if any)
	USER="$(echo $URL \| grep @ \| cut -d@ -f1)"
	# extract the host
	HOST="$(echo ${URL/$USER@/} \| cut -d/ -f1)"

	# Set proper timeout command.
	unamestr=`uname`
	if [[ "$unamestr" == 'Darwin' ]]; then
	TIMEOUT_COMMAND='gtimeout'
	else
	TIMEOUT_COMMAND='timeout'
	fi

	# Wget is more efficient when it doesn't waste time downloading static files.
	# Add prefixes here if you have other file types in play.
	REJECT_FILES='jpg,png,pdf,css,js,eot,svg,gif,ico,xml,ttf,mp3,mov,mpg,mp4'

	COOKIE='--header "Cookie: NO_CACHE=1"'
	SPIDER_ME="$TIMEOUT_COMMAND 300s wget --reject $REJECT_FILES -e robots=off $COOKIE -r -l inf -D$HOST $PROTO$HOST"

	# Main script. You shouldn't need to edit below this line.
	echo "Starting first pass on $URL"
	echo "Output is suppressed, just sit tight."
	echo "This will go for a maximum of five minutes..."
	echo
	# Having trouble? Uncomment below and start picking apart the wget
	echo "$SPIDER_ME"
	eval $SPIDER_ME
	echo "First pass complete!"
	rm -rf $HOST
	echo "Pausing for two minutes to create neat break in New Relic..."
	echo
	sleep 120
	echo "Starting second pass on $URL..."
	echo
	eval $SPIDER_ME
	echo "Complete!"
	# Clean up
	rm -rf $HOST