Skip to content

Instantly share code, notes, and snippets.

@haarp
Last active August 10, 2024 14:48
Show Gist options
  • Save haarp/0c13614038de1570046cc255e7478a41 to your computer and use it in GitHub Desktop.
Save haarp/0c13614038de1570046cc255e7478a41 to your computer and use it in GitHub Desktop.
backup-zfs
#!/bin/bash
# ZFS-send backups v1.9.0 :D
#
# $1: target device (e.g. `/dev/sdg`) - if omitted, uses udev's $DEVNAME env var instead
# $2: optional - if existing, assume we're called by udev -> obey inhibit, log to /tmp/backup-zfs_*.log instead of stdout
# create /tmp/backup-zfs.inhibit to disable automatic udev backups
# needs udev rule like:
# `ACTION=="add", KERNEL=="sd*[!0-9]", ENV{ID_SERIAL}!="?*", SUBSYSTEMS=="usb", RUN+="/usr/bin/setsid --fork /usr/local/bin/backup-zfs $env{DEVNAME} udev"`
#
# status beep HP led Fit-statUSB LED
# idle - - -
# backup in progress 1 blue perm blue blink
# backup successful 1 magenta perm green flash/perm (LED switches off after 4d or on disk removal)
# backup disabled/non-backup disk 2 orange flash yellow flash
# backup failed/problems occured 5 orange perm red perm (LED switches off after 4d, investigate before removing disk!!)
#
# WARNING: Do *not* remove the disk while ZFS is imported, you *will* rape the kernel driver!
# https://github.com/openzfs/zfs/issues/3461
# https://github.com/openzfs/zfs/pull/11082#issuecomment-716284367
#
# NOTE: This will delete fs not being backed up and all snapshots but the latest on target devices!
# TODO: Check target health. periodic scrubs?
# naming prefixes
SNAP_PREFIX="bzfs" # snapshots will be created/checked against this
POOL_PREFIX="backup" # target pools will be checked against this
# source pool
SOURCE="data3"
# non-recursive:
EXCLUDES=( "$SOURCE/Downloads" "$SOURCE/MP3" "$SOURCE/Stuff/Low" "$SOURCE/tmp" )
# how many most recent snapshots to keep on source
KEEP=10
auxiliary() {
echo -e "\e[43mDoing auxiliary transfers\e[49m"
[[ $(/sbin/zfs get -H -o value mounted "$target") == "yes" ]] || /sbin/zfs mount "$target" || { ((uhoh++)); return 1; }
echo "/etc/backuppc"
rsync -aHAX -h -R --delete "/etc/backuppc" "$(/sbin/zfs get -H -o value mountpoint "$target")"
}
beep() {
for i in $(seq 1 ${1:-1}); do
echo -ne "\a" > /dev/tty31
sleep 0.35
done
}
hp_led() {
# args: <off|blue|orange|magenta>
# or no args to check if leds exist at all
# https://github.com/stupidpupil/hp_n36-40-54l_health_led_drivers
# (re)build on new kernel with build_all_debs.sh && sudo dpkg -i dist/*.deb
# rmmod i2c_piix4 sp5100_tco; modprobe i2c-piix4; sleep 5; modprobe gpio-sb8xx
local blue orange
{ base_addr=$(< /sys/class/gpio/gpiochip256/base); } 2>/dev/null
[[ $base_addr ]] || return 1
case $1 in
off) blue=0
orange=0;;
blue) blue=1
orange=0;;
orange) blue=0
orange=1;;
magenta) blue=1
orange=1;;
*) return 0;;
esac
blue_addr=$(( $base_addr + 188 ))
orange_addr=$(( $base_addr + 187 ))
[[ -e /sys/class/gpio/gpio$blue_addr ]] || echo $blue_addr >/sys/class/gpio/export
[[ -e /sys/class/gpio/gpio$orange_addr ]] || echo $orange_addr >/sys/class/gpio/export
# invert
[[ $blue == 1 ]] && blue=0 || blue=1
[[ $orange == 1 ]] && orange=0 || orange=1
echo $blue >/sys/class/gpio/gpio$blue_addr/value
echo $orange >/sys/class/gpio/gpio$orange_addr/value
}
fit_led() {
# args: <off|red|green|blue|yellow> [cycle time in ms]
# or no args to check if leds exist at all
local string
local path=$(echo /dev/serial/by-id/usb-Compulab_LTD_fit_StatUSB*)
[[ -w $path ]] || return 1
case $1 in
off) string="#000000";;
red) string="#200000";;
green) string="#002000";;
blue) string="#000020";;
yellow) string="#200800";;
*) return 0;;
esac
[[ $2 ]] && string="B$string-$2#000000-$2"
stty -F "$path" 9600 raw -echo -echoe -echok -echoctl -echoke
echo "$string" >"$path"
}
status() {
# if no target set yet, show device instead
[[ "$target" ]] || local target="${device##*/}"
case $1 in
inprogress)
wall "${0##*/}_$target: Starting backup from $SOURCE"
beep 1 &
hp_led blue &
fit_led blue 750 &
;;
successful)
wall "${0##*/}_$target: Backup finished"
beep 1 &
hp_led magenta &
# flash for a while to indicate SMR possibly still being written
fit_led green 1500 &
# Switch off led after device is removed or after timeout
# TODO: have new instances of this script kill this
hp_led && { timeout 4d inotifywait -q -e delete_self "$device"; hp_led off; } &
fit_led && { timeout 20m inotifywait -q -e delete_self "$device"; if [[ $? == 124 ]]; then
fit_led green
timeout 4d inotifywait -q -e delete_self "$device"
fit_led off
else
fit_led off
fi; } &
;;
nobackup)
wall "${0##*/}_$target: Not doing backups to this disk"
beep 2 &
{ hp_led orange; sleep 1; hp_led off; } &
{ fit_led yellow; sleep 1; fit_led off; } &
;;
disabled)
wall "${0##*/}_$target: Backups disabled"
beep 2 &
{ hp_led orange; sleep 1; hp_led off; } &
{ fit_led yellow; sleep 1; fit_led off; } &
;;
failed)
if [[ $uhoh > 0 ]]; then
wall "${0##*/}_$target: Backup finished with $uhoh problem(s)!"
else
wall "${0##*/}_$target: Backup failed!"
fi
beep 5 &
hp_led orange &
fit_led red &
hp_led && { timeout 4d inotifywait -q -e delete_self "$device"; hp_led off; } &
fit_led && { timeout 4d inotifywait -q -e delete_self "$device"; fit_led off; } &
;;
esac
}
main() {
### findtarget
# Check for ZFS on device
# FIXME: won't work, empty output. udev hasn't filled this in yet?
## if ! lsblk -n -o FSTYPE "$device" | grep -q 'zfs_member'; then
## echo "No zpool found on $device, exiting."
## status nobackup
## exit 0
## fi
# Find pool on device (assumes one pool on device)
while [[ ! "$target" ]]; do # fake loop
# look at imported pools
# FIXME: fails when pool is already imported but on different device
while read d junk; do
if [[ ! "$junk" ]]; then
# it's a list header, let's look at this pool
pool="$d"
else
if [[ $(readlink -f "$d") =~ $(readlink -f "$device") ]]; then
target="$pool" # previously found pool belongs to our device!
break 2
fi
fi
done <<< $(/sbin/zpool list -H -P -o name -v)
# look at importable pools
for d in $device*; do arg+="-d $d "; done # scan all partitions on $device
target=$(/sbin/zpool import $arg 2>/dev/null | awk '/^ *pool/{print $2}')
break
done
if [[ "$target" == "$SOURCE" ]]; then
# did we just boot or import the source pool? don't clobber it!
echo "Source and target zpools are the same, exiting."
exit 2
elif [[ "$target" =~ ^"$POOL_PREFIX" ]]; then
# success
:
elif [[ "$target" ]]; then
echo "Target zpool $target doesn't sound like a backup, exiting."
status nobackup
exit 0 # don't exit >0 if random-ass zfs drive got attached
else
echo "Couldn't find pool to go with $device!"
status nobackup
exit 0 # don't exit >0 if random-ass non-zfs drive got attached
fi
### main-main
# Ensure single instance
LOCKFILE="/run/${0##*/}_$target.lock"
if { kill -0 $(< "$LOCKFILE"); } 2>/dev/null; then
echo "Already running, aborting!"
exit 1
fi
echo $$ > "$LOCKFILE"
# Sanity check: Would be nice for the source to exist
if ! /sbin/zpool list -o name "$SOURCE" &>/dev/null; then
echo "Source pool $SOURCE not found, aborting!"
status failed
exit 1
fi
echo -e "\e[43mStarting backup\e[49m from \e[45m$SOURCE\e[49m to \e[45m$target\e[49m"
status inprogress
# Actually do the importing if necessary
if ! /sbin/zpool list -o name "$target" &>/dev/null; then
echo "Importing pool $target..."
/sbin/zpool import -R /mnt "$target" || {
status failed
exit 1
}
fi
/sbin/zpool status "$target"
sleep 3
uhoh=0
auxiliary
fslist=()
while read fs; do
# Skip excluded fs
for e in ${EXCLUDES[@]}; do
if [[ "$fs" == "$e" ]]; then
##echo -e "\e[44mSkipping $fs\e[49m (excluded)"
continue 2
fi
done
fslist+=("$fs")
done <<< $(/sbin/zfs list -H -o name -t filesystem,volume -r "$SOURCE")
newsnap="${SNAP_PREFIX}_$(date '+%Y-%m-%d')"
echo -e "\e[43mCreating snapshots\e[49m $SOURCE@\e[45m$newsnap\e[49m"
# concatenate snapshot to array elements so we can still use ${foo[@]} quoting magic with spaces inside fs names
/sbin/zfs snapshot "${fslist[@]/%/@$newsnap}" ##|| ((uhoh++)) # don't uhoh, snapshots might exist already
destroy=()
echo -e "\e[43mStarting transfers\e[49m"
for fs in "${fslist[@]}"; do
# Terminal title
[[ -t 1 ]] && echo -ne "\033]2;$target/$fs [${0##*/}]\007"
# Snapshots present for this fs on source
snaps_source=()
while IFS='@' read junk snap; do
[[ "$snap" =~ ^"${SNAP_PREFIX}"_[0-9-]+$ ]] && snaps_source+=("$snap")
done <<< "$(/sbin/zfs list -H -o name -t snapshot -s creation "$fs" 2>/dev/null)"
# Remember snapshots to destroy on source later, keeping $KEEP most recent (reverse order because faster)
destroy_snaps=""
for (( i=$(( ${#snaps_source[@]}-1 - $KEEP )); i>=0; i-- )); do
destroy_snaps+="${snaps_source[$i]}," # only snapshot names may be comma-separated
done
[[ "$destroy_snaps" ]] && destroy+=("$fs@$destroy_snaps")
# Snapshots present for this fs on target
snaps_target=()
while IFS='@' read junk snap; do
[[ "$snap" =~ ^"${SNAP_PREFIX}"_[0-9-]+$ ]] && snaps_target+=("$snap")
done <<< "$(/sbin/zfs list -H -o name -t snapshot -s creation "$target/$fs" 2>/dev/null)"
# Remember snapshots to destroy on target, but not the one we're about to transfer, if it already exists
if [[ "${snaps_target[-1]}" == "$newsnap" ]] 2>/dev/null; then
[[ "${snaps_target[-2]}" ]] 2>/dev/null && destroy+=("$target/$fs@%${snaps_target[-2]}") # % operator denotes range
else
[[ "${snaps_target[-1]}" ]] 2>/dev/null && destroy+=("$target/$fs@%${snaps_target[-1]}")
fi
# Find newest common snapshot
commonsnap=""
for (( i=${#snaps_target[@]}-1; i>=0; i-- )); do
# skip fs if planned snapshot already exists
if [[ "${snaps_target[$i]}" == "$newsnap" ]]; then
echo -e "\e[44mSkipping $fs\e[49m (@\e[45m$newsnap\e[49m already exists on $target)"
continue 2
fi
for snap in "${snaps_source[@]}"; do
if [[ "${snaps_target[$i]}" == "$snap" ]]; then
commonsnap="${snaps_target[$i]}"
break 2
fi
done
done
if [[ $commonsnap ]]; then
echo -e "\e[42mSending $fs\e[49m (incr from @\e[45m$commonsnap\e[49m to @\e[45m$newsnap\e[49m)"
incrparam=("-i" "$fs@$commonsnap")
else
# gotta destroy it first :/
# https://www.reddit.com/r/zfs/comments/dkldqr/having_trouble_using_sendreceive_on_encrypted/
# https://www.reddit.com/r/zfs/comments/ja0f5r/backing_up_encrypted_stream/
# https://www.reddit.com/r/zfs/comments/u6d3p4/rfc_notes_on_encrypted_zfs_backup_creation_and/i5h082c/
/sbin/zfs list "$target/$fs" &>/dev/null && {
/sbin/zfs destroy -v -r "$target/$fs"
echo "waiting a bit for space to free up..."
sleep 120 # this can take *much* longer tho
}
echo -e "\e[42mSending $fs\e[49m (full @\e[45m$newsnap\e[49m)"
incrparam=()
fi
# `zfs send -P` can mysteriously fail with `warning: cannot send 'foo': Unknown error 1030`, so handle it gracefully
size=$(set -o pipefail; /sbin/zfs send -n -P "${incrparam[@]}" "$fs@$newsnap" 2>/dev/null | awk '/^size/{print $2}') || size=0
if [[ $size -gt $(/sbin/zpool list -H -p -o free $target) ]]; then
echo "Not enough free space, skipping!"
((uhoh++))
continue
fi
# Transfer!
# always use -L: https://github.com/openzfs/zfs/issues/6224
/sbin/zfs send -L -w -p "${incrparam[@]}" "$fs@$newsnap" | pv -s $size -D 0.1 -i 10 | /sbin/zfs receive -F -u "$target/$fs" || ((uhoh++))
done
# Execute deletion
if [[ $uhoh == 0 && "${destroy[@]}" ]]; then
echo -e "\e[43mDestroying old snapshots on source and target\e[49m"
for d in "${destroy[@]}"; do
/sbin/zfs destroy -v "$d"
done
fi
# Delete old/excluded fs on target
destroy=()
while read tfs; do
# skip datasets not in target/source/ tree
[[ "$tfs" =~ ^"$target/$SOURCE" ]] || continue
# skip datasets subject to backup
for sfs in ${fslist[@]}; do
if [[ "$tfs" == "$target/$sfs" ]]; then
continue 2
fi
done
destroy+=("$tfs")
done <<< $(/sbin/zfs list -H -o name -t filesystem,volume -r "$target")
# Execute deletion
if [[ $uhoh == 0 && "${destroy[@]}" ]]; then
echo -e "\e[43mDestroying old/excluded fs on target\e[49m"
for d in "${destroy[@]}"; do
/sbin/zfs destroy -v -r "$d"
done
fi
echo -e "\e[43mFinishing backup\e[49m"
sleep 15 # so `used` below is sorta accurate
echo "Elapsed time: ${SECONDS} seconds, $(/sbin/zpool list -H -o alloc "$target")B of $(/sbin/zpool list -H -o size "$target")B in use on $target."
[[ -t 1 ]] && {
echo -n "Exporting...press Ctrl+c to cancel! "
for i in {5..1}; do echo -n "${i}.."; sleep 1; done; echo ""
}
if /sbin/zpool export "$target"; then
sync
sleep 5
echo "Unmounted and exported."
# TODO: have new instances of this script kill this
# TODO: query drive for SMR first? nope, Seagate Expansion lies about it... (/sys/block/sdX/device/scsi_disk/*/zoned_cap=none)
# Give SMR drives 30min to finish writing from CMR zone, then shut down spindle
# { for i in {1..30}; do [[ -b "$device" ]] || exit; sleep 60; done; /sbin/hdparm -Y "$device"; } &
else
echo "Unmount/Export failed!"
((uhoh++))
fi
if [[ $uhoh == 0 ]]; then
status successful
else
echo -e "\e[41mThere were $uhoh problem(s)! destroy skipped.\e[49m"
status failed
fi
rm "$LOCKFILE"
exit 0
}
if [[ -b "$1" ]]; then
# device passed
device="$1"
elif [[ "$1" ]]; then
echo "Don't know what to do with $1!"
exit 1
else
echo "Don't know what to do!"
exit 1
fi
if [[ "$2" ]]; then
# called by udev
# bail out if still booting
runlevel=$(runlevel)
[[ ${runlevel#* } =~ [2-5] ]] || exit 2
# obey inhibit
if [[ -e "/tmp/backup-zfs.inhibit" ]]; then
status disabled
exit 0
fi
# strip colors
main 2>&1 | sed --unbuffered -E "s/[[:cntrl:]]\[[0-9]{1,3}m//g" > "/tmp/backup-zfs_$(basename "$device").log"
else
main
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment