Last active
August 1, 2024 15:51
-
-
Save AJMaxwell/d9e737277c8a4f42fc19f14e464a4d3f to your computer and use it in GitHub Desktop.
A quick bash script I wrote to prime the cache of all my websites. This script grabs the sitemap of the site you wish to warm, then grep the urls to wget each one to cache it on the server.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
##################################################################################################### | |
# Cache Warmer | |
# | |
# Useage: cache-warmer.sh ...args | |
# | |
# This script grabs the sitemap of the site you wish to warm, then grep the urls to wget each one | |
# to cache it on the server. I'm sure there are better ways to do this, but this was a simple enough | |
# method for my needs. I didn't want to have to type in the urls each time I warmed their cache, so | |
# I just made simple functions with short names to feed those urls into the cache warming function. | |
# I also created an 'all' function to run all of short name functions, with pause breaks, when 'all' | |
# or no argument is provided. | |
##################################################################################################### | |
# This is the user agent of my local machine. The sites I use this script on disallow blank and wget | |
# useragent strings (among others) | |
USERAGENT="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36" | |
example () { | |
echo "Warming cache for example.com..." | |
warmUp "www.example.com" | |
echo "www.example.com cache warmed" | |
} | |
shop () { | |
echo "Warming cache for shop.example.com..." | |
warmUp "shop.example.com" | |
echo "shop.example.com cache warmed" | |
} | |
pause () { | |
echo; echo | |
echo "Pausing for 30 seconds to avoid possible connection limits..." | |
sleep 30s | |
echo "Resuming..."; echo | |
} | |
all () { | |
echo "Warming cache for all sites...." | |
example; pause | |
shop | |
} | |
warmUp () { | |
# If there are particular subdirectories in your sitemap that you do not wish to parse | |
# (i.e. because they cannot be cached), you can use the following regex: | |
# grep -oP "https?://$1\/((?!subdirectory))[^<]*" | |
wget --user-agent="$USERAGENT" -q "https://$1/sitemap.xml" -O - | grep -oP "https?://$1\/[^<]*" | wget -nv --user-agent="$USERAGENT" -i - -O /dev/null -w 1 | |
} | |
if [ $# -eq 0 ]; then | |
all | |
elif [ $# -eq 1 ]; then | |
${1} | |
else | |
for var in "$@"; do | |
${var}; pause | |
done | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Probably I didn't help you to understand. Txt file is not in address. Can I call it from a directory and not from url?