spchamp · May 30, 2025 16:11
diff --git a/zpool_recovery.sh b/zpool_recovery.sh
 #!/bin/sh
 # zpool_recovery.sh
 #
 # usage: zpool_recovery.sh [-p pool]
 #   [-r dest_pool/recovery_path | -d  dumpdir]
 #   [-t txg_id] [-i index_file] [-z xz_or_pixz_args ]
 #   [-x][-n][-b][-w][-q]
 #
 # default pool: tank
 # default dumpdir: $PWD/dump/each
 # default txg_id: Will be determined with zdb
 #
 # other args are descibed below
 #
 # recommended usage:
 #
 # # zfs set mountpoint=off dest_pool/dest_dataset
 # # zpool_recovery.sh -p pool -r dest_pool/dest_dataset
 #
 # This assumes that the dest_pool will have sufficient
 # storage space for the contents of all/most top-level
 # filesystem and volume datasets from the recovered pool,
 # absent of earlier snapshots.
 #
 # this script will try to use zdb -B to produce a zfs-send
 # stream for each filesystem or volume in the pool denoted
 # with '-p'
 #
 # If a dataset cannot be recovered from the top-level
 # volume or filesystem dataset, this script will try
 # to locate the most recent snapshot that can be recovered
 # for the original dataset. Other snapshots will not be
 # recovered. If a dataset does not have any earlier
 # snapshots, this script will continue to the next
 # recoverable dataset.
 #
 #
 # Caveats:
 #
 # it's assumed that the origin pool has not changed
 # across subsequent calls to this script
 #
 # for any dataset ID in which ${DUMPDIR}/${ID}.SKIP exists,
 # the dataset will not be recovered, whether using the
 # dumpdir or recovery_pool destination. The ID value would
 # denote the object set ID for the filesystem, as stored
 # in the index file.
 #
 # partly supported here: recovery for encrypted datasets,
 # mainly in args for zfs-send(8)-style args for zdb -B
 #
 # if the original pool uses encryption, this script can
 # be called with the -x arg to prevent the zdb -B call
 # from using the -e arg like zfs-send, and in which case
 # the -w arg will be used for zfs-send, assuming similar
 # encryption for the receiving pool. (This has not been
 # tested at this time)
 #
 # further documentation is provided in the following


 set -e
 ## use set -x to echo script commands (debuging)
 # set -x


 # args
 #
 # [ -p POOLNAME ] : use POOLNAME as the origin pool,
 #                   default is 'tank'
 #
 # [ -t TXG ] : assume TXG is the best txg value for recovery.
 #              default is determined with zdb, then stored
 #              in the file <DUMPDIR>/TXG
 #
 # [ -i FILENAME ] : filename for object set info from zdb, compressed
 #                  with xz or pixz. default is <DUMPDIR>/INDEX.xz
 #
 # [ -x ] : assume pool is encrypted; use -w and not -e as with zfs send
 #
 # [ -n ] : do not use compression as with zfs send -c
 #
 # [ -b ] : do not use large block support as with zfs send -L
 #
 # [ -w ] : use xz, not pixz, even if pixz is installed
 #
 # [ -z ARGSTR ] : arg string (quoted) for xz / pixz
 #
 # [ -q ] : produce less output on stdout
 #
 #
 # Persistence
 #
 # [ -d DUMPDIR ] : use DUMPDIR for persistent data. default: ${PWD}/dump
 #
 #   This dumpdir will be used for some persistent data, even when a
 #   receiving pool is provided in args.
 #
 #   Though not recommended, if no receiving pool is provided in args
 #   then the dumpdir will be used for storing all recovered object set
 #   streams, each compressed further with xz or pixz. The pathname for
 #   these stream files: <DUMPDIR>/each/<ID>.xz
 #
 #   These can then be restored using xzcat and zfs receive onto some new
 #   pool.
 #
 # [ -r RECV_POOL/PATH ] : receiving pool and dataset path
 #
 #    It's assumed by default that this receiving pool can receive
 #    streams as from 'zfs send -Lec'. Each of these send-style args
 #    can be disabled with individual args to this script, described
 #    in the previous, if required for the receiving pool.
 #
 #    It's recommended to set 'mountpoint=none' on the receiving dataset,
 #    until the recovery process may be considered complete
 #
 # In all cases, additional files used by this script:
 #
 #   % <DUMPDIR>/TXG : If exists, the file must contain only the
 #   transaction group ID (TXG) to use when dumping from the original
 #   pool.
 #
 #   If this file does not exist, the TXG file will be created from the
 #   numerically largest usable TXG available for purposes of recovery
 #   with  ZDB.
 #
 #   If a -t TXG arg is provided, this value wil override any value
 #   stored in <DUMPDIR>/TXG
 #
 #   % <DUMPDIR>/index.xz : If exists, the file should contain object set
 #   information as produced with zdb, compressed using pixz or xz.
 #
 #   If the index file does not exist, the file will be created after
 #   parsing debug data from zdb
 #
 #   % <DUMPDIR>/each/<ID>.SKIP
 #
 #   For any file that exists with this syntax, the object set (i.e
 #   filesystem or volume) for the matching ID will not be dumped,
 #   whether to a receiving pool or to the "each" dir under the DUMPDIR.
 #
 #   The corresponding dataset for each ID can be determined once the
 #   index.xz file is created, or by using a zdb command similar to that
 #   used when creating the index file in this script, e.g
 #
 #   # zdb -d -AAAXe -t ${TXG} ${POOL} | less
 #
 #   The contents of the each/<ID>.SKIP file will not be analyzed by this
 #   script. The file can be created generally with touch(1) with a path
 #   corresponding to the object set ID for any dataset that should not
 #   be recovered with this script.
 #
 #   % Only when recovering to DUMPDIR: <DUMPDIR>/each/<ID>.xz
 #
 #   When using the DUMPDIR recovery method, the existence of this
 #   file will be assumed to indicate that the corresponding ID
 #   has already been recovered. If the script is interrupted before
 #   the strean file is completely written, the partially recovered
 #   stream file should be manually deleted before any subsequent
 #   call to this script. As such, the output from this script may
 #   generally be stored in this case, such as with 'script -F' in order
 #   to be able to determine what TXG files may have been created before
 #   any interruption in the send (e.g interruption due to a corrupted
 #   dataset, or due to poweroff or system error)
 #
 #   For partial sends using the -r RECVPOOL/PATH storage method,
 #   any dataset created on receipt of the partial send should be
 #   automatically deleted by ZFS, after the send is interrupted
 #
 #   (this script does not use resumable sends)
 #
 # % Additional Notes and Known Limitations
 #
 # - this script provides limited handling for signals, e.g interrupt.
 #
 #   this signal handling will generally be affected by the shell
 #   interpreter under which the script is run, e.g /bin/sh or GNU BASH
 #
 # - limited support for partial sends
 #
 #   - partial sends should generally not persist in any receiving pool
 #
 #   - if recovering to a dumpdir, partially sent files will not be
 #     automatically deleted and may result in incomplete recovery, if
 #     each incomplete dump file is not removed before any subsequent
 #     script run
 #
 # - The dumpdir support represented one earlier storage method used when
 #   develping this script. It's generally recommended to recover to a
 #   new zpool, rather than to store the individual send/receive streams
 #   to a dumpdir.
 #
 # - Unless not supported by the receiving pool, compression support
 #   and large block support should generally not be disabled when
 #   running this shell script
 #
 # - During zfs receive, a @--HEAD-- snapshot will be created for every
 #   successfully received dataset - except in the case of a dataset
 #   recovered from some earlier snapshot. These snapshots will then be
 #   used by this script, to determine whether each dataset was
 #   completely sent. These individual snapshots should not be deleted
 #   from the receiving pool until the recovery process may be considered
 #   complete.
 #
 # - This script may not be able to restore any non-inherited mount points
 #   or other ZFS properties that were in use for datasets in the original
 #   pool
 #
 #   it's generally recommended to set mountpoint=none on the receiving
 #   ZFS dataset, at least until such time as when the original mountpoints
 #   can be restored to the recovered datasets
 #
 # - this script may not detect all incomplete sends, such as when a
 #   dataset could not be sent due to corrupted storage data
 #
 #   one type of error message that may occur on stderr, during send:
 #
 #      dump_backup: dmu_send_obj: Input/output error
 #
 #   this error message may indicate a corrupted object set.
 #
 #   if any snapshot exists for the corresponding dataset within the original
 #   pool, then any of those snapshots may still be usable for purpose of
 #   recovery.
 #
 #   after detecting an incomplete send as such, this script will try to
 #   recover the corresponding dataset from the newest available snapshot
 #   for the same dataset
 #
 #   additional information presented on stderr for this script may
 #   include lines such as the following, corresponding with an error
 #   condition such as above:
 #
 #    #-- <scriptname> Failed: receive for <dataset_name> [<object_set_id>]
 #
 # - TXG values may be listed on a per-vdev basis with the command
 #
 #   # zdb -ul -AAAXe /path/to/vdev | awk '$1 == "txg" { print $3 }'
 #
 #   TXG IDs produced by ZDB may generally be listed in chronological
 #   order. This order can be reversed, for displaying any more recent TXG
 #   ID first, such as by piping the output of the above command to GNU
 #   tac (gtac, from GNU coreutils) or to `tail -r` on FreeBSD systems.
 #   This assumes that a numerically larger TXG ID may represent a more
 #   recent transaction group.

 #
 # Copyright (c) 2025 Sean Champ. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 #
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 #
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.


 ## try to detect the script name, which may not be available
 ## if this script is sourced via sh(1) or BASH
 THIS=$(if THIS=$(readlink -f "$0" 2>/dev/null); then
           basename ${THIS}; else echo "zpool_recovery.sh"; fi)

 msg() {
 	echo "#-- ${THIS}: $@" 1>&2
 }

 fail() {
 	# 'exit' within a subshell iterator may not exit the script as expected
 	msg "$@"
 	exit 1
 }

 #
 # args handling
 #

 POOL=tank
 RECVDEST=""
 DUMPDIR=dump
 TXG=""
 ENCRYPTED=""
 COMPRESSION="c"
 LARGEBLOCKS="L"
 XZ=""
 XZARGS=""
 QUIET=""


 while getopts ":p:t:d:i:xnwz:qr:" ARG; do
    case ${ARG} in
 	p)
 	    POOL="${OPTARG}"
 	    ;;
 	t)
 	    TXG="${OPTARG}"
 	    ;;
 	d)
 	    DUMPDIR="${OPTARG}"
 	    ;;
 	i)
 	    INDEX="${OPTARG}"
 	    ;;
 	x)
 	    ENCRYPTED="y"
 	    ;;
 	n)
 	    COMPRESSION=""
 	    ;;
 	w)
 	    XZ="xz"
 	    ;;
 	z)
 	    XZARGS="${XZARGS}${XZARGS:+ }${OPTARG}"
 	    ;;
 	q)
 	    QUIET="y"
 	    ;;
 	r)
 	    RECVDEST="${OPTARG}"
 	    ;;
 	*)
 	    fail "Unknown options in args: ${1}"
 	    ;;
    esac
 done

 #
 # further arg checks
 #

 case "${1:-}" in
    -*|"") ;;
    *)
 	fail "Uknown options in args: $*"
 	;;
 esac

 ## not a necessary check, when using '-F' on receive
 # case ${RECVPOOL} in
 #     ""|*/*)
 #     ;;
 #     *)
 # 	fail "RECVPOOL must not denote the receiving pool name: ${RECVPOOL}"
 # 	;;
 # esac


 #
 # deferred defaults for args
 #

 if [ "x${INDEX:-}" = "x" ]; then
    INDEX="${DUMPDIR}/index.xz"
 fi

 if [ "x${XZ}" = "x" ]; then
    if ! XZ=$(which pixz); then
 	XZ=$(which xz) || fail "xz not found"
    fi
 fi

 SEND_ARGS=""

 if [ "x${LARGEBLOCKS}" != "x" ]; then
    SEND_ARGS="-L"
 fi

 if [ "x${COMPRESSION}" != "x" ]; then
    SEND_ARGS="${SEND_ARGS}${SEND_ARGS:+ }-c"
 fi

 if [ "${ENCRYPTED}" = "y" ]; then
    SEND_ARGS="${SEND_ARGS}${SEND_ARGS:+ }-w"
 else
    SEND_ARGS="${SEND_ARGS}${SEND_ARGS:+ }-e"
 fi


 EACHDIR="${DUMPDIR}/each"

 # environment option: TAC
 #
 # If not set:
 # - use GNU tac if available
 # - else, assume FreeBSD or similar: use tail -r
 #
 # ${TAC} is used to reverse the order of zdb
 # debugging info, whether chronologically or
 # for the structure of the recovered pool
 #
 if [ "x${TAC}" = "x" ]; then
    if ! ( TAC=$(which gtac 2>/dev/null) ||
 	       TAC=$(which tac 2>/dev/null) ); then
 	TAC="tail -r"
    fi
 fi

 case "${QUIET}" in
    y)
 	VERBOSE=""
 	;;
    *)
 	VERBOSE="-v"
 	;;
 esac


 #
 # main script
 #



 info() {
    if [ "${QUIET}" != "y" ]; then
 	msg "$@"
    fi
 }



 ensure_txg() {
    local VDEV COUNT LTXG TMPV

    # when TXG was provided in top-level script args
    if [ "x${TXG}" != "x" ]; then return; fi

    # when TXG was already stored under DUMPDIR
    if [ -e "${DUMPDIR}/TXG" ]; then
 	TXG=$(cat "${DUMPDIR}/TXG")
 	info "Found existing TXG ${TXG}"
 	return
    fi

    # try finding a usable TXG using zdb -d for each usable vdev,
    # with each vedev as determined via zdb -l

    # first loop is to determine vdevs for the pool
    zdb -AAAXe ${POOL} |
        awk '$1 == "path:" {  gsub("['\'']", "", $2); print $2 }' |
        while read VDEV; do
 	    info "trying vdev ${VDEV}";
            # the main driver for this loop is the following zdb call,
            # producing TXG ID and TXG index (count) values from each
            # vdev
 	    zdb -ul -AAAXe ${VDEV} |
 	        awk -v "COUNT=-1" '$1 ~ "^Uberblock" {
                	  sub("Uberblock\\[", "", $1);
                          sub("\\]", "", $1);
                          COUNT=$1;
                          next;
                        }

                        COUNT > 0 && $1 == "txg" {
                          print COUNT " " $3
                        }' | ${TAC} | while read COUNT LTXG ; do
 		# the next loop tries to determine the first usable TXG
 		info "Trying ${VDEV} uberblock nr. ${COUNT} txg ${LTXG}"
 		TMPV=$(zdb -d -t ${LTXG} -AAAXe ${POOL} | head -n1)
 	        if [ "x${TMPV}" != "x" ]; then
 		    info "Found txg [${COUNT}] ${LTXG}"
 		    echo "${LTXG}" > "${DUMPDIR}/TXG"
 		    TXG=${LTXG}
                    # this return call does not appear
                    # to actually return from the function.
                    #
                    # it returns from a subshell, at this
                    # point?
 		    return
 		fi
 	    done
        done

    # store the top-level TXG if now available
    if [ -e "${DUMPDIR}/TXG" ]; then
        TXG=$(head -n1 "${DUMPDIR}/TXG")
        return
    fi

    # try a fallback ..
    info "Using fallback detection for TXG"
    TXG=$(zdb -AAAXe ${POOL} |
 	      awk '/best uberblock/ { print $12 }' |
 	      tail -n1)

 }

 ensure_index() {
    if [ -e "${INDEX}" ]; then
 	info "Using existing index: ${INDEX}"
    else
 	info "Generating index: ${INDEX}"
 	zdb -d -AAAXe -t ${TXG} ${POOL}  |
 	    awk -v "FS=[ ,]" '/^Dataset/ && $3 == "[ZPL]" { print $0 }' |
 	    ${TAC} | ${XZ} > ${INDEX}
    fi
 }

 parse_index_ds() {
    # parse the exiting index, filtering out any snapshots at this point
    ensure_index
    xzcat ${INDEX} | awk -v "FS=[ ,]" '/^Dataset/ && $2 !~ "@" && $3 == "[ZPL]" {
 	  	print $2 " " $6 }'
 }

 parse_index_snap() {
    # second-level recovery support for intermediate snapshots
    #
    # this parses the object set index, separate from parse_index_ds
    #
    # when this is called, the index can be assumed to already exist
    #
    # optional second arg is a snapshot to skip, such as when
    # the snapshot could not be completely sent
    #
    local DNAME="$1";
    shift || fail "script error: parse_index_snap called without dataset name"

    local SKIP="${1:-}"

    if [ "x${SKIP}" = "x" ]; then
        # print the first (chronologically newest) snapshot
        # for this dataset
  	xzcat ${INDEX} |
 	    awk -v "FS=[ ,]" -v "DS=${DNAME}" \
 		'$1 == "Dataset" && $2 ~ DS "@" && $3 == "[ZPL]" { print $2 " " $6; exit }'
    else
 	# parse the reverse-ordered output from the zdb index,
        # printing the first snapshot name in this ordering. This
        # is assumed to be chronologically previous to the skipped
        # snapshot
  	xzcat ${INDEX} |
 		awk -v "FS=[ ,]" -v "DS=${DNAME}" -v "SKIP=${SKIP}"  \
 		'BEGIN { SKIPPING = 1 }
 		$1 == "Dataset" && $2 == DS "@" SKIP && $3 == "[ZPL]" {
 			SKIPPING = 0; next;
 		}
 		$1 == "Dataset" && $2 ~ DS "@" && !SKIPPING && $3 == "[ZPL]" {
 			print $2 " " $6; exit }'
    fi
 }


 handle_stream() {
    local NAME=${1}
    shift || fail "script error: handle_stream called without NAME arg"
    local ID=${1}
    shift || fail "script error: handle_stream called without ID arg"
    local SNAP="${1:-}"

    if [ -e "${EACHDIR}/${ID}.SKIP" ]; then
 	info "skip: ${ID}"
 	return 0
    fi

    if [ "x${RECVDEST}" = "x" ]; then
 	if [ -e "${EACHDIR}/${ID}.xz" ]; then
 	    info "Already recovered: ${ID}"
 	    return 0
 	fi
    else
 	# detect any earlier recovery when ${RECVDEST}
 	#
 	# this handles the recvname in a syntax
 	# as with 'zfs receive -d'
 	#
 	local RECVNAME

 	if [ "${NAME}" = "${POOL}" ]; then
 	    RECVNAME=${RECVDEST}
 	else
 	    RECVNAME=${RECVDEST}/${NAME#*/}
 	fi
 	local RECV_OK=$(zfs list -t snapshot -pH -oname "${RECVNAME}" 2>/dev/null)

 	if  [ "x${RECV_OK}" != "x" ]; then
 	    info "Already recovered: ${ID} ${RECVNAME}"
 	    return
 	fi
    fi

    zdb -eAAAX -t ${TXG} -B ${POOL}/${ID} ${SEND_ARGS} |
 	if [ "x${RECVDEST}" = "x" ]; then
 	    info "recovering ${NAME} [${ID}] to ${EACHDIR}/${ID}.xz"

 	    echo "${NAME}" > ${EACHDIR}/${ID}.name
 	    if ! ${XZ} > ${EACHDIR}/${ID}.xz; then
                # when storing to a DUMPDIR, this may detect
                # some errors in filesystem I/O and may not
                # detect any ZFS error with the sent stream
 		msg "Failed: receive for ${NAME} [${ID}]"
 		return 1
 	    fi
 	else
 	    local RECV_HOW

            ## prototype, for if not using -d in zfs receive
 	    # if [ "${NAME}" = "${POOL}" ]; then
 	    #	RECV_HOW=-e
 	    # else
 	    #	RECV_HOW=-d
 	    # fi

 	    info "recovering ${NAME} [${ID}] onto ${RECVDEST}"

 	    if ! zfs receive -dF ${RECV_HOW} ${VERBOSE} "${RECVDEST}"; then
                # call recursively from here. values
                # set here might not be reflected in
                # the top-level scope of this function
 		handle_snap ${NAME} ${ID} ${SNAP}
 	    else
                ## to remove the individual dump file
 		# rm -f ${EACHDIR}/${ID}.xz
                ## else
 		true
 	    fi
 	fi

 }

 handle_snap() {
    local NAME=${1}
    shift || fail "script error: handle_snap called without NAME arg"
    local ID=${1}
    shift || fail "script error: handle_snap called without ID arg"
    local SNAP=${1}
    local NEXTSNAP NEXTID

    parse_index_snap ${NAME} ${SNAP} | while read NEXTSNAP NEXTID; do
        if [ "x${NEXTSNAP}" = "x" ]; then
 	    info "No further snapshots: ${NAME}"
 	    return
        else
 	    handle_stream ${NAME} ${NEXTID} ${NEXTSNAP}
        fi
    done  || info "No further snapshots: ${NAME}" # parse_index_snap
 }


 #
 # main()
 #

 ## try to not call through to this when debugging via 'source' with BASH

 if [ "x${BASH_SOURCE}" = "x" ] || [ "${BASH_SOURCE}" = "${0}" ]; then

    mkdir -p "${DUMPDIR}"
    mkdir -p "${EACHDIR}"

    ensure_txg
    info "using TXG: ${TXG}"

    ensure_index

    parse_index_ds |
        while read NAME ID; do
 	    handle_stream ${NAME} ${ID}
            ## try to make it interruptable (no signal handling here)
            sleep 1
        done

 fi # BASH_SOURCE test
	#!/bin/sh
	# zpool_recovery.sh
	#
	# usage: zpool_recovery.sh [-p pool]
	# [-r dest_pool/recovery_path \| -d dumpdir]
	# [-t txg_id] [-i index_file] [-z xz_or_pixz_args ]
	# [-x][-n][-b][-w][-q]
	#
	# default pool: tank
	# default dumpdir: $PWD/dump/each
	# default txg_id: Will be determined with zdb
	#
	# other args are descibed below
	#
	# recommended usage:
	#
	# # zfs set mountpoint=off dest_pool/dest_dataset
	# # zpool_recovery.sh -p pool -r dest_pool/dest_dataset
	#
	# This assumes that the dest_pool will have sufficient
	# storage space for the contents of all/most top-level
	# filesystem and volume datasets from the recovered pool,
	# absent of earlier snapshots.
	#
	# this script will try to use zdb -B to produce a zfs-send
	# stream for each filesystem or volume in the pool denoted
	# with '-p'
	#
	# If a dataset cannot be recovered from the top-level
	# volume or filesystem dataset, this script will try
	# to locate the most recent snapshot that can be recovered
	# for the original dataset. Other snapshots will not be
	# recovered. If a dataset does not have any earlier
	# snapshots, this script will continue to the next
	# recoverable dataset.
	#
	#
	# Caveats:
	#
	# it's assumed that the origin pool has not changed
	# across subsequent calls to this script
	#
	# for any dataset ID in which ${DUMPDIR}/${ID}.SKIP exists,
	# the dataset will not be recovered, whether using the
	# dumpdir or recovery_pool destination. The ID value would
	# denote the object set ID for the filesystem, as stored
	# in the index file.
	#
	# partly supported here: recovery for encrypted datasets,
	# mainly in args for zfs-send(8)-style args for zdb -B
	#
	# if the original pool uses encryption, this script can
	# be called with the -x arg to prevent the zdb -B call
	# from using the -e arg like zfs-send, and in which case
	# the -w arg will be used for zfs-send, assuming similar
	# encryption for the receiving pool. (This has not been
	# tested at this time)
	#
	# further documentation is provided in the following


	set -e
	## use set -x to echo script commands (debuging)
	# set -x


	# args
	#
	# [ -p POOLNAME ] : use POOLNAME as the origin pool,
	# default is 'tank'
	#
	# [ -t TXG ] : assume TXG is the best txg value for recovery.
	# default is determined with zdb, then stored
	# in the file <DUMPDIR>/TXG
	#
	# [ -i FILENAME ] : filename for object set info from zdb, compressed
	# with xz or pixz. default is <DUMPDIR>/INDEX.xz
	#
	# [ -x ] : assume pool is encrypted; use -w and not -e as with zfs send
	#
	# [ -n ] : do not use compression as with zfs send -c
	#
	# [ -b ] : do not use large block support as with zfs send -L
	#
	# [ -w ] : use xz, not pixz, even if pixz is installed
	#
	# [ -z ARGSTR ] : arg string (quoted) for xz / pixz
	#
	# [ -q ] : produce less output on stdout
	#
	#
	# Persistence
	#
	# [ -d DUMPDIR ] : use DUMPDIR for persistent data. default: ${PWD}/dump
	#
	# This dumpdir will be used for some persistent data, even when a
	# receiving pool is provided in args.
	#
	# Though not recommended, if no receiving pool is provided in args
	# then the dumpdir will be used for storing all recovered object set
	# streams, each compressed further with xz or pixz. The pathname for
	# these stream files: <DUMPDIR>/each/<ID>.xz
	#
	# These can then be restored using xzcat and zfs receive onto some new
	# pool.
	#
	# [ -r RECV_POOL/PATH ] : receiving pool and dataset path
	#
	# It's assumed by default that this receiving pool can receive
	# streams as from 'zfs send -Lec'. Each of these send-style args
	# can be disabled with individual args to this script, described
	# in the previous, if required for the receiving pool.
	#
	# It's recommended to set 'mountpoint=none' on the receiving dataset,
	# until the recovery process may be considered complete
	#
	# In all cases, additional files used by this script:
	#
	# % <DUMPDIR>/TXG : If exists, the file must contain only the
	# transaction group ID (TXG) to use when dumping from the original
	# pool.
	#
	# If this file does not exist, the TXG file will be created from the
	# numerically largest usable TXG available for purposes of recovery
	# with ZDB.
	#
	# If a -t TXG arg is provided, this value wil override any value
	# stored in <DUMPDIR>/TXG
	#
	# % <DUMPDIR>/index.xz : If exists, the file should contain object set
	# information as produced with zdb, compressed using pixz or xz.
	#
	# If the index file does not exist, the file will be created after
	# parsing debug data from zdb
	#
	# % <DUMPDIR>/each/<ID>.SKIP
	#
	# For any file that exists with this syntax, the object set (i.e
	# filesystem or volume) for the matching ID will not be dumped,
	# whether to a receiving pool or to the "each" dir under the DUMPDIR.
	#
	# The corresponding dataset for each ID can be determined once the
	# index.xz file is created, or by using a zdb command similar to that
	# used when creating the index file in this script, e.g
	#
	# # zdb -d -AAAXe -t ${TXG} ${POOL} \| less
	#
	# The contents of the each/<ID>.SKIP file will not be analyzed by this
	# script. The file can be created generally with touch(1) with a path
	# corresponding to the object set ID for any dataset that should not
	# be recovered with this script.
	#
	# % Only when recovering to DUMPDIR: <DUMPDIR>/each/<ID>.xz
	#
	# When using the DUMPDIR recovery method, the existence of this
	# file will be assumed to indicate that the corresponding ID
	# has already been recovered. If the script is interrupted before
	# the strean file is completely written, the partially recovered
	# stream file should be manually deleted before any subsequent
	# call to this script. As such, the output from this script may
	# generally be stored in this case, such as with 'script -F' in order
	# to be able to determine what TXG files may have been created before
	# any interruption in the send (e.g interruption due to a corrupted
	# dataset, or due to poweroff or system error)
	#
	# For partial sends using the -r RECVPOOL/PATH storage method,
	# any dataset created on receipt of the partial send should be
	# automatically deleted by ZFS, after the send is interrupted
	#
	# (this script does not use resumable sends)
	#
	# % Additional Notes and Known Limitations
	#
	# - this script provides limited handling for signals, e.g interrupt.
	#
	# this signal handling will generally be affected by the shell
	# interpreter under which the script is run, e.g /bin/sh or GNU BASH
	#
	# - limited support for partial sends
	#
	# - partial sends should generally not persist in any receiving pool
	#
	# - if recovering to a dumpdir, partially sent files will not be
	# automatically deleted and may result in incomplete recovery, if
	# each incomplete dump file is not removed before any subsequent
	# script run
	#
	# - The dumpdir support represented one earlier storage method used when
	# develping this script. It's generally recommended to recover to a
	# new zpool, rather than to store the individual send/receive streams
	# to a dumpdir.
	#
	# - Unless not supported by the receiving pool, compression support
	# and large block support should generally not be disabled when
	# running this shell script
	#
	# - During zfs receive, a @--HEAD-- snapshot will be created for every
	# successfully received dataset - except in the case of a dataset
	# recovered from some earlier snapshot. These snapshots will then be
	# used by this script, to determine whether each dataset was
	# completely sent. These individual snapshots should not be deleted
	# from the receiving pool until the recovery process may be considered
	# complete.
	#
	# - This script may not be able to restore any non-inherited mount points
	# or other ZFS properties that were in use for datasets in the original
	# pool
	#
	# it's generally recommended to set mountpoint=none on the receiving
	# ZFS dataset, at least until such time as when the original mountpoints
	# can be restored to the recovered datasets
	#
	# - this script may not detect all incomplete sends, such as when a
	# dataset could not be sent due to corrupted storage data
	#
	# one type of error message that may occur on stderr, during send:
	#
	# dump_backup: dmu_send_obj: Input/output error
	#
	# this error message may indicate a corrupted object set.
	#
	# if any snapshot exists for the corresponding dataset within the original
	# pool, then any of those snapshots may still be usable for purpose of
	# recovery.
	#
	# after detecting an incomplete send as such, this script will try to
	# recover the corresponding dataset from the newest available snapshot
	# for the same dataset
	#
	# additional information presented on stderr for this script may
	# include lines such as the following, corresponding with an error
	# condition such as above:
	#
	# #-- <scriptname> Failed: receive for <dataset_name> [<object_set_id>]
	#
	# - TXG values may be listed on a per-vdev basis with the command
	#
	# # zdb -ul -AAAXe /path/to/vdev \| awk '$1 == "txg" { print $3 }'
	#
	# TXG IDs produced by ZDB may generally be listed in chronological
	# order. This order can be reversed, for displaying any more recent TXG
	# ID first, such as by piping the output of the above command to GNU
	# tac (gtac, from GNU coreutils) or to `tail -r` on FreeBSD systems.
	# This assumes that a numerically larger TXG ID may represent a more
	# recent transaction group.

	#
	# Copyright (c) 2025 Sean Champ. All rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions
	# are met:
	#
	# 1. Redistributions of source code must retain the above copyright
	# notice, this list of conditions and the following disclaimer.
	#
	# 2. Redistributions in binary form must reproduce the above copyright
	# notice, this list of conditions and the following disclaimer in the
	# documentation and/or other materials provided with the distribution.
	#
	# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	# SUCH DAMAGE.


	## try to detect the script name, which may not be available
	## if this script is sourced via sh(1) or BASH
	THIS=$(if THIS=$(readlink -f "$0" 2>/dev/null); then
	basename ${THIS}; else echo "zpool_recovery.sh"; fi)

	msg() {
	echo "#-- ${THIS}: $@" 1>&2
	}

	fail() {
	# 'exit' within a subshell iterator may not exit the script as expected
	msg "$@"
	exit 1
	}

	#
	# args handling
	#

	POOL=tank
	RECVDEST=""
	DUMPDIR=dump
	TXG=""
	ENCRYPTED=""
	COMPRESSION="c"
	LARGEBLOCKS="L"
	XZ=""
	XZARGS=""
	QUIET=""


	while getopts ":p:t:d:i:xnwz:qr:" ARG; do
	case ${ARG} in
	p)
	POOL="${OPTARG}"
	;;
	t)
	TXG="${OPTARG}"
	;;
	d)
	DUMPDIR="${OPTARG}"
	;;
	i)
	INDEX="${OPTARG}"
	;;
	x)
	ENCRYPTED="y"
	;;
	n)
	COMPRESSION=""
	;;
	w)
	XZ="xz"
	;;
	z)
	XZARGS="${XZARGS}${XZARGS:+ }${OPTARG}"
	;;
	q)
	QUIET="y"
	;;
	r)
	RECVDEST="${OPTARG}"
	;;
	*)
	fail "Unknown options in args: ${1}"
	;;
	esac
	done

	#
	# further arg checks
	#

	case "${1:-}" in
	-*\|"") ;;
	*)
	fail "Uknown options in args: $*"
	;;
	esac

	## not a necessary check, when using '-F' on receive
	# case ${RECVPOOL} in
	# ""\|/)
	# ;;
	# *)
	# fail "RECVPOOL must not denote the receiving pool name: ${RECVPOOL}"
	# ;;
	# esac


	#
	# deferred defaults for args
	#

	if [ "x${INDEX:-}" = "x" ]; then
	INDEX="${DUMPDIR}/index.xz"
	fi

	if [ "x${XZ}" = "x" ]; then
	if ! XZ=$(which pixz); then
	XZ=$(which xz) \|\| fail "xz not found"
	fi
	fi

	SEND_ARGS=""

	if [ "x${LARGEBLOCKS}" != "x" ]; then
	SEND_ARGS="-L"
	fi

	if [ "x${COMPRESSION}" != "x" ]; then
	SEND_ARGS="${SEND_ARGS}${SEND_ARGS:+ }-c"
	fi

	if [ "${ENCRYPTED}" = "y" ]; then
	SEND_ARGS="${SEND_ARGS}${SEND_ARGS:+ }-w"
	else
	SEND_ARGS="${SEND_ARGS}${SEND_ARGS:+ }-e"
	fi


	EACHDIR="${DUMPDIR}/each"

	# environment option: TAC
	#
	# If not set:
	# - use GNU tac if available
	# - else, assume FreeBSD or similar: use tail -r
	#
	# ${TAC} is used to reverse the order of zdb
	# debugging info, whether chronologically or
	# for the structure of the recovered pool
	#
	if [ "x${TAC}" = "x" ]; then
	if ! ( TAC=$(which gtac 2>/dev/null) \|\|
	TAC=$(which tac 2>/dev/null) ); then
	TAC="tail -r"
	fi
	fi

	case "${QUIET}" in
	y)
	VERBOSE=""
	;;
	*)
	VERBOSE="-v"
	;;
	esac


	#
	# main script
	#



	info() {
	if [ "${QUIET}" != "y" ]; then
	msg "$@"
	fi
	}



	ensure_txg() {
	local VDEV COUNT LTXG TMPV

	# when TXG was provided in top-level script args
	if [ "x${TXG}" != "x" ]; then return; fi

	# when TXG was already stored under DUMPDIR
	if [ -e "${DUMPDIR}/TXG" ]; then
	TXG=$(cat "${DUMPDIR}/TXG")
	info "Found existing TXG ${TXG}"
	return
	fi

	# try finding a usable TXG using zdb -d for each usable vdev,
	# with each vedev as determined via zdb -l

	# first loop is to determine vdevs for the pool
	zdb -AAAXe ${POOL} \|
	awk '$1 == "path:" { gsub("['\'']", "", $2); print $2 }' \|
	while read VDEV; do
	info "trying vdev ${VDEV}";
	# the main driver for this loop is the following zdb call,
	# producing TXG ID and TXG index (count) values from each
	# vdev
	zdb -ul -AAAXe ${VDEV} \|
	awk -v "COUNT=-1" '$1 ~ "^Uberblock" {
	sub("Uberblock\\[", "", $1);
	sub("\\]", "", $1);
	COUNT=$1;
	next;
	}

	COUNT > 0 && $1 == "txg" {
	print COUNT " " $3
	}' \| ${TAC} \| while read COUNT LTXG ; do
	# the next loop tries to determine the first usable TXG
	info "Trying ${VDEV} uberblock nr. ${COUNT} txg ${LTXG}"
	TMPV=$(zdb -d -t ${LTXG} -AAAXe ${POOL} \| head -n1)
	if [ "x${TMPV}" != "x" ]; then
	info "Found txg [${COUNT}] ${LTXG}"
	echo "${LTXG}" > "${DUMPDIR}/TXG"
	TXG=${LTXG}
	# this return call does not appear
	# to actually return from the function.
	#
	# it returns from a subshell, at this
	# point?
	return
	fi
	done
	done

	# store the top-level TXG if now available
	if [ -e "${DUMPDIR}/TXG" ]; then
	TXG=$(head -n1 "${DUMPDIR}/TXG")
	return
	fi

	# try a fallback ..
	info "Using fallback detection for TXG"
	TXG=$(zdb -AAAXe ${POOL} \|
	awk '/best uberblock/ { print $12 }' \|
	tail -n1)

	}

	ensure_index() {
	if [ -e "${INDEX}" ]; then
	info "Using existing index: ${INDEX}"
	else
	info "Generating index: ${INDEX}"
	zdb -d -AAAXe -t ${TXG} ${POOL} \|
	awk -v "FS=[ ,]" '/^Dataset/ && $3 == "[ZPL]" { print $0 }' \|
	${TAC} \| ${XZ} > ${INDEX}
	fi
	}

	parse_index_ds() {
	# parse the exiting index, filtering out any snapshots at this point
	ensure_index
	xzcat ${INDEX} \| awk -v "FS=[ ,]" '/^Dataset/ && $2 !~ "@" && $3 == "[ZPL]" {
	print $2 " " $6 }'
	}

	parse_index_snap() {
	# second-level recovery support for intermediate snapshots
	#
	# this parses the object set index, separate from parse_index_ds
	#
	# when this is called, the index can be assumed to already exist
	#
	# optional second arg is a snapshot to skip, such as when
	# the snapshot could not be completely sent
	#
	local DNAME="$1";
	shift \|\| fail "script error: parse_index_snap called without dataset name"

	local SKIP="${1:-}"

	if [ "x${SKIP}" = "x" ]; then
	# print the first (chronologically newest) snapshot
	# for this dataset
	xzcat ${INDEX} \|
	awk -v "FS=[ ,]" -v "DS=${DNAME}" \
	'$1 == "Dataset" && $2 ~ DS "@" && $3 == "[ZPL]" { print $2 " " $6; exit }'
	else
	# parse the reverse-ordered output from the zdb index,
	# printing the first snapshot name in this ordering. This
	# is assumed to be chronologically previous to the skipped
	# snapshot
	xzcat ${INDEX} \|
	awk -v "FS=[ ,]" -v "DS=${DNAME}" -v "SKIP=${SKIP}" \
	'BEGIN { SKIPPING = 1 }
	$1 == "Dataset" && $2 == DS "@" SKIP && $3 == "[ZPL]" {
	SKIPPING = 0; next;
	}
	$1 == "Dataset" && $2 ~ DS "@" && !SKIPPING && $3 == "[ZPL]" {
	print $2 " " $6; exit }'
	fi
	}


	handle_stream() {
	local NAME=${1}
	shift \|\| fail "script error: handle_stream called without NAME arg"
	local ID=${1}
	shift \|\| fail "script error: handle_stream called without ID arg"
	local SNAP="${1:-}"

	if [ -e "${EACHDIR}/${ID}.SKIP" ]; then
	info "skip: ${ID}"
	return 0
	fi

	if [ "x${RECVDEST}" = "x" ]; then
	if [ -e "${EACHDIR}/${ID}.xz" ]; then
	info "Already recovered: ${ID}"
	return 0
	fi
	else
	# detect any earlier recovery when ${RECVDEST}
	#
	# this handles the recvname in a syntax
	# as with 'zfs receive -d'
	#
	local RECVNAME

	if [ "${NAME}" = "${POOL}" ]; then
	RECVNAME=${RECVDEST}
	else
	RECVNAME=${RECVDEST}/${NAME#*/}
	fi
	local RECV_OK=$(zfs list -t snapshot -pH -oname "${RECVNAME}" 2>/dev/null)

	if [ "x${RECV_OK}" != "x" ]; then
	info "Already recovered: ${ID} ${RECVNAME}"
	return
	fi
	fi

	zdb -eAAAX -t ${TXG} -B ${POOL}/${ID} ${SEND_ARGS} \|
	if [ "x${RECVDEST}" = "x" ]; then
	info "recovering ${NAME} [${ID}] to ${EACHDIR}/${ID}.xz"

	echo "${NAME}" > ${EACHDIR}/${ID}.name
	if ! ${XZ} > ${EACHDIR}/${ID}.xz; then
	# when storing to a DUMPDIR, this may detect
	# some errors in filesystem I/O and may not
	# detect any ZFS error with the sent stream
	msg "Failed: receive for ${NAME} [${ID}]"
	return 1
	fi
	else
	local RECV_HOW

	## prototype, for if not using -d in zfs receive
	# if [ "${NAME}" = "${POOL}" ]; then
	# RECV_HOW=-e
	# else
	# RECV_HOW=-d
	# fi

	info "recovering ${NAME} [${ID}] onto ${RECVDEST}"

	if ! zfs receive -dF ${RECV_HOW} ${VERBOSE} "${RECVDEST}"; then
	# call recursively from here. values
	# set here might not be reflected in
	# the top-level scope of this function
	handle_snap ${NAME} ${ID} ${SNAP}
	else
	## to remove the individual dump file
	# rm -f ${EACHDIR}/${ID}.xz
	## else
	true
	fi
	fi

	}

	handle_snap() {
	local NAME=${1}
	shift \|\| fail "script error: handle_snap called without NAME arg"
	local ID=${1}
	shift \|\| fail "script error: handle_snap called without ID arg"
	local SNAP=${1}
	local NEXTSNAP NEXTID

	parse_index_snap ${NAME} ${SNAP} \| while read NEXTSNAP NEXTID; do
	if [ "x${NEXTSNAP}" = "x" ]; then
	info "No further snapshots: ${NAME}"
	return
	else
	handle_stream ${NAME} ${NEXTID} ${NEXTSNAP}
	fi
	done \|\| info "No further snapshots: ${NAME}" # parse_index_snap
	}


	#
	# main()
	#

	## try to not call through to this when debugging via 'source' with BASH

	if [ "x${BASH_SOURCE}" = "x" ] \|\| [ "${BASH_SOURCE}" = "${0}" ]; then

	mkdir -p "${DUMPDIR}"
	mkdir -p "${EACHDIR}"

	ensure_txg
	info "using TXG: ${TXG}"

	ensure_index

	parse_index_ds \|
	while read NAME ID; do
	handle_stream ${NAME} ${ID}
	## try to make it interruptable (no signal handling here)
	sleep 1
	done

	fi # BASH_SOURCE test