Created
November 23, 2025 09:56
-
-
Save jonnyreeves/91ae115fae5797a897c77e3ef3b1ae5f to your computer and use it in GitHub Desktop.
TrueNAS Scale scripts to (a) handle waking a backup server from sleep, (b) shut down the backup server after a grace period has passed AND all replication / scrub activity is complete. `Install wake_backup_server_for_replication.sh` as a cron job on the main server, triggered once 30m before your replication task is scheduled. Install `shutdown_…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # ----------------------------------------------- | |
| # shutdown_after_replication.sh – Overview | |
| # | |
| # This script safely powers off the server when nightly ZFS replication | |
| # has completed (or has not begun within a configured time window). | |
| # Intended to be run via cron after a remote server wakes for replication. | |
| # | |
| # Workflow: | |
| # 1. Script runs only if the /tmp/replication_mode FLAG is present. | |
| # (Flag indicates an automated boot for replication. Without it, exit.) | |
| # | |
| # 2. Prevents simultaneous executions using a /tmp/shutdown.lock file. | |
| # | |
| # 3. Records the first time the script notices the FLAG via START_TIME_FILE. | |
| # Exits early while waiting for replication to start. | |
| # | |
| # 4. Waits up to WAIT_WINDOW (default 1h) for replication activity. | |
| # If still waiting, logs progress and exits. | |
| # | |
| # 5. Checks for active replication or scrub work by scanning system state: | |
| # - Uses loginctl / job checks to detect zfs send/receive or netcat. | |
| # - Checks `zpool status tank` for scrubbing. | |
| # If active, exits and tries again on next cron run. | |
| # | |
| # 6. When replication looks finished or never started, enters a 5-minute | |
| # grace period and re-checks in case a job resumed. | |
| # | |
| # 7. If still idle, removes FLAG and timing files and shuts down host. | |
| # | |
| # Purpose: | |
| # Ensures automated shutdown only happens when replication is complete, | |
| # avoiding cutting power during ZFS activity or while a scrub is running. | |
| # | |
| # ----------------------------------------------- | |
| # SCRIPT CONFIGURATION | |
| WAIT_WINDOW=3600 # seconds to wait for replication to begin (1 hour) | |
| FLAG="/tmp/replication_mode" | |
| LOG="/var/log/shutdown-after-replication.log" | |
| LOCK_FILE="/tmp/shutdown.lock" | |
| START_TIME_FILE="/tmp/replication_wait_start" | |
| # ----------------------------------------------- | |
| if [ ! -f "$FLAG" ]; then | |
| echo "$(date) - No flag present, manual boot assumed. Exiting." | |
| exit 0 | |
| fi | |
| # Prevent multiple simultaneous executions | |
| if [ -f "$LOCK_FILE" ]; then | |
| echo "$(date) - Shutdown lock active, exiting." | tee -a "$LOG" | |
| exit 0 | |
| fi | |
| # Record first time script sees the flag | |
| if [ ! -f "$START_TIME_FILE" ]; then | |
| date +%s > "$START_TIME_FILE" | |
| echo "$(date) - Waiting for replication to begin..." | tee -a "$LOG" | |
| exit 0 | |
| fi | |
| START_TIME=$(cat "$START_TIME_FILE") | |
| NOW=$(date +%s) | |
| ELAPSED=$((NOW - START_TIME)) | |
| # If not running yet, but within wait window, hold | |
| if [ "$ELAPSED" -lt "$WAIT_WINDOW" ]; then | |
| echo "$(date) - Waiting ($ELAPSED / $WAIT_WINDOW seconds) for replication to start..." | tee -a "$LOG" | |
| exit 0 | |
| fi | |
| # Loop while zfs send or netcat are running | |
| if loginctl | awk '{print $2}' | grep -q '^0$' || \ | |
| zpool status tank | grep -q 'scrub in progress'; then | |
| echo "$(date) - Replication or Scrub in progress..." | tee -a "$LOG" | |
| exit 0 | |
| fi | |
| # If flag exists and replication is finished or never started, safe shutdown | |
| echo "$(date) - Replication window expired or completed. Shutting down after 5m..." | tee -a "$LOG" | |
| touch "$LOCK_FILE" | |
| sleep 300 | |
| # final check | |
| if loginctl | awk '{print $2}' | grep -q '^0$' || \ | |
| zpool status tank | grep -q 'scrub in progress'; then | |
| echo "$(date) - Replication or scrub restarted during grace period!" | tee -a "$LOG" | |
| rm -f "$LOCK_FILE" | |
| exit 0 | |
| fi | |
| # all good, cul8r | |
| echo "$(date) - TTFN..." | tee -a "$LOG" | |
| rm -f "$LOCK_FILE" "$FLAG" "$START_TIME_FILE" | |
| shutdown -P now | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # ----------------------------------------------- | |
| # SCRIPT CONFIGURATION | |
| BACKUP_MAC="d0:xx:xx:xx:xx:fc" # <-- Backup Server NIC MAC address TO WAKE | |
| HOST_WOL_IFACE="enp2s0" # <-- Host NIC MAC address to send magic packet FROM | |
| BACKUP_HOST="192.168.1.11" # <-- hostname or IP address of backup server | |
| SSH_USER="root" | |
| LOG="/var/log/wake_backup_server_for_replication.log" | |
| # ----------------------------------------------- | |
| echo "$(date) - Starting replication wake script" | tee -a "$LOG" | |
| # 1) Wake Backup Server using etherwake | |
| echo "$(date) - Sending Wake-on-LAN magic packet to $BACKUP_MAC" | tee -a "$LOG" | |
| /root/sbin/etherwake -i "$HOST_WOL_IFACE" "$BACKUP_MAC" | |
| # 2) Wait until Backup Host responds to ping | |
| echo -n "$(date) - Waiting for $BACKUP_HOST to respond " | tee -a "$LOG" | |
| while ! ping -c1 -W1 "$BACKUP_HOST" >/dev/null 2>&1 ; do | |
| echo -n "." | tee -a "$LOG" | |
| sleep 30 | |
| done | |
| echo " UP" | tee -a "$LOG" | |
| # 3) Allow time for SSH to become ready | |
| echo "$(date) - Waiting for Backup Host SSH readiness..." | tee -a "$LOG" | |
| sleep 30 | |
| # 4) Create flag file indicating auto-shutdown is allowed | |
| echo "$(date) - Setting replication_mode flag on $BACKUP_HOST" | tee -a "$LOG" | |
| if ! ssh "${SSH_USER}@${BACKUP_HOST}" 'date > /tmp/replication_mode'; then | |
| echo "$(date) - ERROR: Failed to set replication_mode flag on $BACKUP_HOST" | tee -a "$LOG" >&2 | |
| exit 1 | |
| fi | |
| echo "$(date) - replication_mode flag created successfully" | tee -a "$LOG" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment