-
-
Save erikhansen/f070558188e9927621b29b5df9816fc8 to your computer and use it in GitHub Desktop.
Script to crawl and warm the cache two levels deep on Magento demo Pods via CronJob spec
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -euo pipefail | |
FRONT_URL="${FRONT_URL:-https://app.exampleproject.test/}" | |
echo "==> [$(date +%H:%M:%S)] waiting on readiness" | |
ELAPSED_SECONDS=0 | |
while : ; do | |
ELAPSED_SECONDS=$(echo ${ELAPSED_SECONDS}+2 | bc) | |
RESPONSE_CODE=$(curl -sI "${FRONT_URL}" 2>/dev/null | head -n1 | awk '{print $2}') | |
if [ ${RESPONSE_CODE} -eq "200" ] || [ ${ELAPSED_SECONDS} -gt 1800 ]; then | |
break; | |
fi | |
printf "." | |
sleep 2 | |
done | |
echo | |
echo "==> [$(date +%H:%M:%S)] ${FRONT_URL}" | |
URL_LIST="$(curl -s "${FRONT_URL}" | grep -Eo 'href="[^\"]+"' \ | |
| grep -Eo '(http|https)://[^#"]+' | grep .html | sort -n | uniq)"$'\n' | |
for url in $(echo ${URL_LIST}); do | |
echo "==> [$(date +%H:%M:%S)] ${url}" | |
URL_LIST="${URL_LIST}$(curl -s "${url}" | grep -Eo 'href="[^\"]+"' \ | |
| grep -Eo '(http|https)://[^#"]+' | grep '.html$')"$'\n' | |
done | |
URL_LIST="$(echo "${URL_LIST}" | sort -n | uniq)" | |
for url in $(echo ${URL_LIST}); do | |
echo "==> [$(date +%H:%M:%S)] ${url}" | |
curl -s ${url} >/dev/null || true | |
done | |
echo "==> [$(date +%H:%M:%S)] crawl complete" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -euo pipefail | |
FRONT_URL="${FRONT_URL:-https://app.exampleproject.test/}" | |
echo "==> [$(date +%H:%M:%S)] waiting on readiness" | |
ELAPSED_SECONDS=0 | |
while : ; do | |
ELAPSED_SECONDS=$(echo ${ELAPSED_SECONDS}+2 | bc) | |
RESPONSE_CODE=$(curl -sI "${FRONT_URL}" 2>/dev/null | head -n1 | awk '{print $2}') | |
if [ ${RESPONSE_CODE} -eq "200" ] || [ ${ELAPSED_SECONDS} -gt 1800 ]; then | |
break; | |
fi | |
printf "." | |
sleep 2 | |
done | |
echo | |
echo "==> [$(date +%H:%M:%S)] ${FRONT_URL}" | |
URL_LIST="$(curl -s "${FRONT_URL}" | grep -Eo 'href="[^\"]+"' \ | |
| grep -Eo '(http|https)://[^#"]+' | grep .html | sort -n | uniq)"$'\n' | |
for url in $(echo ${URL_LIST}); do | |
echo "==> [$(date +%H:%M:%S)] ${url}" | |
URL_LIST="${URL_LIST}$(curl -s "${url}" | grep -Eo 'href="[^\"]+"' \ | |
| grep -Eo '(http|https)://[^#"]+' | grep .html | grep -E '(\?p=|\?color=|^[^\?]+$)')"$'\n' | |
done | |
URL_LIST="$(echo "${URL_LIST}" | sort -n | uniq)" | |
for url in $(echo ${URL_LIST}); do | |
echo "==> [$(date +%H:%M:%S)] ${url}" | |
IMG_LIST="$(curl -s "${url}" | grep -Eo 'src="[^\"]+"' \ | |
| grep -Eo '(http|https)://[^#"]+' | grep catalog/product/cache)"$'\n' || true | |
for url in $(echo "${IMG_LIST}" | sort -n | uniq); do | |
echo "==> [$(date +%H:%M:%S)] ${url}" | |
curl -s ${url} >/dev/null || true & | |
done | |
wait | |
done | |
echo "==> [$(date +%H:%M:%S)] crawl complete" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment