Created
February 27, 2024 13:56
-
-
Save siepkes/8c9dd4907e9f28599a81f4b381bed2cf to your computer and use it in GitHub Desktop.
WAL-G Full backup with healtchecks.io
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ksh93 | |
# This script determines if it is running on the primary PostgreSQL node and then starts a full backup. It does nothing | |
# when running on PostgreSQL nodes that are replica's. | |
# Keep PID detection at the top of script to prevent it from breaking (getting wrong values). | |
OWN_PID=$$ | |
PARENT_PID=$(ps -o ppid= -p "${OWN_PID}") | |
SHELL_NAME="ksh93" | |
SCRIPT_NAME=$0 | |
# Optional UUID of the healthchecks.io check to report status to. | |
HEALTHCHECKS_IO_UUID=$1 | |
HEALTHCHECKS_IO_URL="https://hc-ping.com" | |
WALG_BIN="/opt/walg/wal-g" | |
WALG_CONF_DIR="/etc/walg" | |
POSTGRESQL_DATA_DIR="/var/lib/patroni/postgres" | |
# We put the shell name in front of the script name so pgrep doesn't find the parent shell. | |
for RUNNING_PID in $(pgrep -f "${SHELL_NAME} ${SCRIPT_NAME}"); do | |
if [ "${RUNNING_PID}" != "${OWN_PID}" ] && [ "${RUNNING_PID}" != "${PARENT_PID}" ]; then | |
echo "ERROR: Process already running: ${RUNNING_PID}" | |
exit 1 | |
fi | |
done | |
# Get the current role (master, replica) of the node. | |
ROLE=$(curl --connect-timeout 20 -s http://127.0.0.1:8009/primary | jq --raw-output .role) | |
# We only want to run on the master node. Not on replica's. | |
if [ "${ROLE}" = "master" ]; then | |
: # Simply continue. | |
elif [ "${ROLE}" = "replica" ]; then | |
# Nothing to do. Only the master must make the full backup. | |
exit 0 | |
else | |
echo "ERROR: Server is in an unknown role ${ROLE} (not 'master' or 'replica')." | |
exit 1 | |
fi | |
if [[ -n "${HEALTHCHECKS_IO_UUID}" ]]; then | |
# If a healthchecks.io check UUID is provided signal that the job has started. | |
curl -fsS -m 10 --retry 5 -o /dev/null "${HEALTHCHECKS_IO_URL}/${HEALTHCHECKS_IO_UUID}/start" | |
fi | |
# Start the actual backup. | |
${WALG_BIN} --config ${WALG_CONF_DIR}/walg.json backup-push ${POSTGRESQL_DATA_DIR} | |
BACKUP_EXIT_CODE=$? | |
if [[ -n "${HEALTHCHECKS_IO_UUID}" ]]; then | |
# If a healthchecks.io check UUID is provided report the exit code of the backup process. | |
curl -fsS -m 10 --retry 5 -o /dev/null "${HEALTHCHECKS_IO_URL}/${HEALTHCHECKS_IO_UUID}/${BACKUP_EXIT_CODE}" | |
fi | |
# Exit with the code of the backup process so cron also knows if the process failed or succeeded. | |
exit ${BACKUP_EXIT_CODE} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment