Skip to content

Instantly share code, notes, and snippets.

@Snuffy2
Last active August 21, 2025 02:34
Show Gist options
  • Save Snuffy2/010b4a476aa571a5c8ec4f84ccbec861 to your computer and use it in GitHub Desktop.
Save Snuffy2/010b4a476aa571a5c8ec4f84ccbec861 to your computer and use it in GitHub Desktop.
#!/bin/bash
# ------------------------------------------------------------------------------
# vpn-uptime-restart.sh
#
# Monitors the VPN container in a Docker Compose stack, restarts the stack if the
# VPN is down or unhealthy, and restarts dependent containers whose uptime
# exceeds the VPN's uptime. Sends notifications via Pushover.
#
# Main variables to set/change:
# COMPOSE_FILE - Path to your docker-compose.yml file
# LOG_FILE - Path to log file for script output
# ENV_FILE - Path to your .env file for Docker Compose (optional)
# GLUETUN_SERVERS_FILE - Path to Gluetun VPN servers.json file (optional)
# PUSHOVER - Path to pushover.sh notification script (optional)
# ------------------------------------------------------------------------------
COMPOSE_FILE="/home/pi/docker/compose/compose.yml"
ENV_FILE="/home/pi/docker/compose/.env"
GLUETUN_SERVERS_FILE="/mnt/cfs/discovery-docker/appdata/gluetun/config/servers.json"
LOG_FILE="/home/pi/logs/vpn-uptime-restart.log"
PUSHOVER="/home/pi/scripts/pushover.sh"
MAX_RETRIES=5
RETRY_DELAY=30
ENV_FILE="${ENV_FILE:-}"
GLUETUN_SERVERS_FILE="${GLUETUN_SERVERS_FILE:-}"
PUSHOVER="${PUSHOVER:-}"
log_message() {
local message=$1
local send_to_pushover=${2:-false}
local pushover_priority=${3:--2}
NOW=$(date +'%m/%d %H:%M:%S')
echo "${NOW} : ${message}" | tee -a "$LOG_FILE"
if $send_to_pushover && [ -n "$PUSHOVER" ] && [ -x "$PUSHOVER" ]; then
"$PUSHOVER" --priority ${pushover_priority} "${message} @ ${NOW}"
fi
}
status_to_seconds() {
local status="$1"
# Handle exact matches like "Up 10 minutes" or "Up 2 hours"
if [[ "$status" =~ Up[[:space:]]([0-9]+)[[:space:]](second|seconds|minute|minutes|hour|hours|day|days) ]]; then
local value=${BASH_REMATCH[1]}
local unit=${BASH_REMATCH[2]}
case "$unit" in
second|seconds) echo $((value)) ;;
minute|minutes) echo $((value * 60)) ;;
hour|hours) echo $((value * 3600)) ;;
day|days) echo $((value * 86400)) ;;
*) echo 0 ;;
esac
# Handle cases like "Up About an hour" and "Up About a minute"
elif [[ "$status" =~ Up[[:space:]]About[[:space:]](a[[:space:]]minute|an[[:space:]]hour) ]]; then
if [[ "$status" =~ a[[:space:]]minute ]]; then
# "About a minute" = 60 seconds
echo 60
elif [[ "$status" =~ an[[:space:]]hour ]]; then
# "About an hour" = 3600 seconds
echo 3600
fi
else
echo 0
fi
}
log_message "Starting"
log_message "Getting container data"
if [ -n "$ENV_FILE" ]; then
CONTAINER_DATA=$(docker compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" ps --format json)
else
CONTAINER_DATA=$(docker compose -f "$COMPOSE_FILE" ps --format json)
fi
if [ $? -ne 0 ] || [ -z "$CONTAINER_DATA" ]; then
log_message "Failed to get container data or data is empty" true 0
exit 1
fi
VPN_STATUS=$(echo "$CONTAINER_DATA" | jq -r '. | select(.Name == "vpn") | .Status')
if [ $? -ne 0 ]; then
log_message "Failed to parse VPN status from container data" true 0
exit 1
fi
VPN_RUNNING=$(echo "$CONTAINER_DATA" | jq -r '. | select(.Name == "vpn") | .State')
if [ $? -ne 0 ]; then
log_message "Failed to parse VPN running state from container data" true 0
exit 1
fi
if [[ -z "$VPN_STATUS" || "$VPN_RUNNING" != "running" ]]; then
log_message "VPN is not running. Attempting full stack restart" true
if [ -n "$ENV_FILE" ]; then
docker compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" up -d
else
docker compose -f "$COMPOSE_FILE" up -d
fi
ATTEMPT=1
VPN_HEALTHY=false
while [ $ATTEMPT -le $MAX_RETRIES ]; do
sleep $RETRY_DELAY
if [ -n "$ENV_FILE" ]; then
CONTAINER_DATA=$(docker compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" ps --format json)
else
CONTAINER_DATA=$(docker compose -f "$COMPOSE_FILE" ps --format json)
fi
if [ $? -ne 0 ] || [ -z "$CONTAINER_DATA" ]; then
log_message "Failed to get container data during retry $ATTEMPT" true 0
exit 1
fi
VPN_STATUS=$(echo "$CONTAINER_DATA" | jq -r '. | select(.Name == "vpn") | .Status')
if [ $? -ne 0 ]; then
log_message "Failed to parse VPN status during retry $ATTEMPT" true 0
exit 1
fi
if [[ "$VPN_STATUS" == *"(unhealthy)"* ]]; then
log_message "Retry $ATTEMPT: VPN is still unhealthy: $VPN_STATUS"
elif [[ "$VPN_STATUS" == Up* ]]; then
log_message "VPN is now healthy: $VPN_STATUS"
VPN_HEALTHY=true
break
else
log_message "Retry $ATTEMPT: VPN is not up yet: $VPN_STATUS"
fi
((ATTEMPT++))
done
if ! $VPN_HEALTHY; then
log_message "VPN failed to start after $MAX_RETRIES attempts" true 0
if [ -n "$GLUETUN_SERVERS_FILE" ] && [ -f "$GLUETUN_SERVERS_FILE" ]; then
# Check if file is older than 5 minutes
if [ "$(find "$GLUETUN_SERVERS_FILE" -mmin +5)" ]; then
# Check if file size is less than 1KB
if [ $(stat -c%s "$GLUETUN_SERVERS_FILE") -lt 1024 ]; then
log_message "Deleting $GLUETUN_SERVERS_FILE (older than 5 min and <1KB)"
sudo rm "$GLUETUN_SERVERS_FILE"
fi
fi
fi
exit 1
fi
fi
VPN_UPTIME_SECONDS=$(status_to_seconds "$VPN_STATUS")
log_message "VPN uptime: ${VPN_UPTIME_SECONDS}s [${VPN_STATUS}]"
RESTART_COUNT=0
RESTARTED_CONTAINERS=()
log_message "Checking other containers"
if [ -n "$CONTAINER_DATA" ]; then
while read -r container_json; do
CONTAINER_NAME=$(echo "$container_json" | jq -r '.Name')
if [ $? -ne 0 ] || [ -z "$CONTAINER_NAME" ]; then
log_message "Failed to parse container name or name is empty" true 0
continue
fi
CONTAINER_STATUS=$(echo "$container_json" | jq -r '.Status')
if [ $? -ne 0 ]; then
log_message "Failed to parse container status for $CONTAINER_NAME" true 0
continue
fi
CONTAINER_UPTIME_SECONDS=$(status_to_seconds "$CONTAINER_STATUS")
NETWORK_MODE=$(docker inspect -f '{{.HostConfig.NetworkMode}}' "$CONTAINER_NAME" 2>/dev/null)
if [ $? -ne 0 ]; then
log_message "Failed to inspect network mode for $CONTAINER_NAME" true 0
continue
fi
if [[ "$NETWORK_MODE" == container:* ]]; then
if [ "$CONTAINER_UPTIME_SECONDS" -gt "$VPN_UPTIME_SECONDS" ]; then
log_message "$CONTAINER_NAME (network_mode: container:*) uptime: ${CONTAINER_UPTIME_SECONDS}s > VPN uptime: ${VPN_UPTIME_SECONDS}s, restarting"
if [ -n "$ENV_FILE" ]; then
docker compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" restart "$CONTAINER_NAME"
else
docker compose -f "$COMPOSE_FILE" restart "$CONTAINER_NAME"
fi
if [ $? -ne 0 ]; then
log_message "Failed to restart $CONTAINER_NAME" true 0
else
((RESTART_COUNT++))
RESTARTED_CONTAINERS+=("$CONTAINER_NAME")
fi
fi
fi
done < <(echo "$CONTAINER_DATA" | jq -c '. | select(.State == "running")')
else
log_message "No container data to process for other containers" true 0
fi
ORIGINAL_RUNNING=$(echo "$CONTAINER_DATA" | jq -s '[.[] | select(.State == "running")] | length')
if [ -n "$ENV_FILE" ]; then
docker compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" up -d
CONTAINER_DATA=$(docker compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" ps --format json)
else
docker compose -f "$COMPOSE_FILE" up -d
CONTAINER_DATA=$(docker compose -f "$COMPOSE_FILE" ps --format json)
fi
TOTAL_RUNNING=$(echo "$CONTAINER_DATA" | jq -s '[.[] | select(.State == "running")] | length')
NUM_STARTED=$((TOTAL_RUNNING - ORIGINAL_RUNNING))
if [ "$NUM_STARTED" -lt 0 ]; then
NUM_STARTED=0
fi
echo "Num Started: $NUM_STARTED"
if [ "$RESTART_COUNT" -gt 0 ]; then
RESTARTED_LIST=$(echo "${RESTARTED_CONTAINERS[*]}" | sed -e 's/ /, /g')
log_message "Total running containers: $TOTAL_RUNNING. Started: $NUM_STARTED. Restarted: $RESTART_COUNT ($RESTARTED_LIST)" true
else
log_message "Total running containers: $TOTAL_RUNNING. Started: $NUM_STARTED. Restarted: 0"
fi
# Trim log file
if [ -f "$LOG_FILE" ]; then
tail -1000 "$LOG_FILE" > "${LOG_FILE}.tmp" && mv "${LOG_FILE}.tmp" "$LOG_FILE"
else
log_message "Log file $LOG_FILE does not exist, skipping trim"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment