davidalger · April 29, 2021 08:31 · davidalger · Aug 4, 2020
diff --git a/warmer-cron.sh b/warmer-cron.sh
 #!/bin/bash
 set -euo pipefail

 FRONT_URL="${FRONT_URL:-https://app.exampleproject.test/}"

 echo "==> [$(date +%H:%M:%S)] waiting on readiness"
 ELAPSED_SECONDS=0
 while : ; do
  ELAPSED_SECONDS=$(echo ${ELAPSED_SECONDS}+2 | bc)
  RESPONSE_CODE=$(curl -sI "${FRONT_URL}" 2>/dev/null | head -n1 | awk '{print $2}')
  if [ ${RESPONSE_CODE} -eq "200" ] || [ ${ELAPSED_SECONDS} -gt 1800 ]; then
    break;
  fi
  printf "."
  sleep 2
 done
 echo

 echo "==> [$(date +%H:%M:%S)] ${FRONT_URL}"
 URL_LIST="$(curl -s "${FRONT_URL}" | grep -Eo 'href="[^\"]+"' \
  | grep -Eo '(http|https)://[^#"]+' | grep .html | sort -n | uniq)"$'\n'

 for url in $(echo ${URL_LIST}); do
  echo "==> [$(date +%H:%M:%S)] ${url}"
  URL_LIST="${URL_LIST}$(curl -s "${url}" | grep -Eo 'href="[^\"]+"' \
      | grep -Eo '(http|https)://[^#"]+' | grep '.html$')"$'\n'
 done
 URL_LIST="$(echo "${URL_LIST}" | sort -n | uniq)"

 for url in $(echo ${URL_LIST}); do
  echo "==> [$(date +%H:%M:%S)] ${url}"
  curl -s ${url} >/dev/null || true
 done
 echo "==> [$(date +%H:%M:%S)] crawl complete"
diff --git a/warmer-imgs.sh b/warmer-imgs.sh
 #!/bin/bash
 set -euo pipefail

 FRONT_URL="${FRONT_URL:-https://app.exampleproject.test/}"

 echo "==> [$(date +%H:%M:%S)] waiting on readiness"
 ELAPSED_SECONDS=0
 while : ; do
  ELAPSED_SECONDS=$(echo ${ELAPSED_SECONDS}+2 | bc)
  RESPONSE_CODE=$(curl -sI "${FRONT_URL}" 2>/dev/null | head -n1 | awk '{print $2}')
  if [ ${RESPONSE_CODE} -eq "200" ] || [ ${ELAPSED_SECONDS} -gt 1800 ]; then
    break;
  fi
  printf "."
  sleep 2
 done
 echo

 echo "==> [$(date +%H:%M:%S)] ${FRONT_URL}"
 URL_LIST="$(curl -s "${FRONT_URL}" | grep -Eo 'href="[^\"]+"' \
  | grep -Eo '(http|https)://[^#"]+' | grep .html | sort -n | uniq)"$'\n'

 for url in $(echo ${URL_LIST}); do
  echo "==> [$(date +%H:%M:%S)] ${url}"
  URL_LIST="${URL_LIST}$(curl -s "${url}" | grep -Eo 'href="[^\"]+"' \
      | grep -Eo '(http|https)://[^#"]+' | grep .html | grep -E '(\?p=|\?color=|^[^\?]+$)')"$'\n'
 done
 URL_LIST="$(echo "${URL_LIST}" | sort -n | uniq)"

 for url in $(echo ${URL_LIST}); do
  echo "==> [$(date +%H:%M:%S)] ${url}"
  IMG_LIST="$(curl -s "${url}" | grep -Eo 'src="[^\"]+"' \
    | grep -Eo '(http|https)://[^#"]+' | grep catalog/product/cache)"$'\n' || true

  for url in $(echo "${IMG_LIST}" | sort -n | uniq); do
    echo "==> [$(date +%H:%M:%S)] ${url}"
    curl -s ${url} >/dev/null || true &
  done
  wait
 done

 echo "==> [$(date +%H:%M:%S)] crawl complete"
	#!/bin/bash
	set -euo pipefail

	FRONT_URL="${FRONT_URL:-https://app.exampleproject.test/}"

	echo "==> [$(date +%H:%M:%S)] waiting on readiness"
	ELAPSED_SECONDS=0
	while : ; do
	ELAPSED_SECONDS=$(echo ${ELAPSED_SECONDS}+2 \| bc)
	RESPONSE_CODE=$(curl -sI "${FRONT_URL}" 2>/dev/null \| head -n1 \| awk '{print $2}')
	if [ ${RESPONSE_CODE} -eq "200" ] \|\| [ ${ELAPSED_SECONDS} -gt 1800 ]; then
	break;
	fi
	printf "."
	sleep 2
	done
	echo

	echo "==> [$(date +%H:%M:%S)] ${FRONT_URL}"
	URL_LIST="$(curl -s "${FRONT_URL}" \| grep -Eo 'href="[^\"]+"' \
	\| grep -Eo '(http\|https)://[^#"]+' \| grep .html \| sort -n \| uniq)"$'\n'

	for url in $(echo ${URL_LIST}); do
	echo "==> [$(date +%H:%M:%S)] ${url}"
	URL_LIST="${URL_LIST}$(curl -s "${url}" \| grep -Eo 'href="[^\"]+"' \
	\| grep -Eo '(http\|https)://[^#"]+' \| grep '.html$')"$'\n'
	done
	URL_LIST="$(echo "${URL_LIST}" \| sort -n \| uniq)"

	for url in $(echo ${URL_LIST}); do
	echo "==> [$(date +%H:%M:%S)] ${url}"
	curl -s ${url} >/dev/null \|\| true
	done
	echo "==> [$(date +%H:%M:%S)] crawl complete"