Skip to content

Instantly share code, notes, and snippets.

@NickCrews
Created April 20, 2026 22:38
Show Gist options
  • Select an option

  • Save NickCrews/1a0a7592eb6731ae94c5dee7df731f16 to your computer and use it in GitHub Desktop.

Select an option

Save NickCrews/1a0a7592eb6731ae94c5dee7df731f16 to your computer and use it in GitHub Desktop.
Monitors the progress of a running pg_restore process, reporting total bytes read and throughput per thread, as well as timing info and ETA.
#!/usr/bin/env bash
# This script monitors the progress of a running pg_restore process.
#
# Use it by starting a `pg_restore`:
# pg_restore --dbname=mydb --jobs=8 /path/to/dumpfile.dump
# Then, in another terminal, run this script:
# ./watch_pg_restore.sh
#
# The script will look for all running `pg_restore` processes,
# identify the dump file being read by each worker (by looking for open file descriptors on FD 4),
# and use `proc_pid_rusage` C function to get the total bytes read from the dump file by each process.
# It will then aggregate this information to show the overall progress of the restore operation,
# including total bytes read, read rate, and estimated time remaining based on the dump size.
#
# I'm not positive this is totally accurate, but it does at least provide a rough estimate of the progress,
# and to see which processes are doing the most work.
#
# This requires either `python` or `cc` to be available on the system to query the read bytes from the processes.
# It also requires several common unix utilities like `lsof`, `pgrep`, `ps`, and `stat`.
# If this doesn't work for you, or you want to customize the output, your favorite coding
# agent should be pretty capable of modifying it. This is how I wrote the script initially.
#
# The output looks something like this (updating every second).
#
# Dump file: /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump
# Worker processes: 7
# Dump size: 80.69 GiB
# Total bytes read (all pg_restore workers): 71.21 GiB
# Progress vs dump size: 88.25%
# Current total read rate: 32.00 MiB/s
# Average read rate since start: 14.60 MiB/s
# Started: Fri Apr 10 11:58:25 2026
# Elapsed: 01:23:15
# Estimated remaining: 00:05:03
# PID FD PATH BYTES READ READ RATE
# -------- ---- -------------------------------------------------------- -------------- --------------
# 7709 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 0 B 0 B/s
# 7724 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 23.94 GiB 32.00 MiB/s
# 7725 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 15.94 GiB 0 B/s
# 7726 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 11.16 GiB 0 B/s
# 7727 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 8.45 GiB 0 B/s
# 7728 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 6.61 GiB 0 B/s
# 7729 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 5.11 GiB 0 B/s
set -euo pipefail
require_cmd() {
if ! command -v "$1" >/dev/null 2>&1; then
echo "Missing required command: $1" >&2
exit 1
fi
}
format_bytes() {
local value="$1"
awk -v value="$value" 'BEGIN {
split("B KiB MiB GiB TiB PiB", units, " ")
unit = 1
while (value >= 1024 && unit < 6) {
value /= 1024
unit++
}
if (unit == 1) {
printf "%d %s", value, units[unit]
} else {
printf "%.2f %s", value, units[unit]
}
}'
}
format_duration() {
local total_seconds="$1"
local hours minutes seconds
if (( total_seconds < 0 )); then
total_seconds=0
fi
hours=$((total_seconds / 3600))
minutes=$(((total_seconds % 3600) / 60))
seconds=$((total_seconds % 60))
printf '%02d:%02d:%02d' "$hours" "$minutes" "$seconds"
}
get_read_bytes_batch_python() {
python3 - "$@" <<'PY'
import ctypes
import sys
RUSAGE_INFO_V2 = 2
class RUsageInfoV2(ctypes.Structure):
_fields_ = [
("ri_uuid", ctypes.c_uint8 * 16),
("ri_user_time", ctypes.c_uint64),
("ri_system_time", ctypes.c_uint64),
("ri_pkg_idle_wkups", ctypes.c_uint64),
("ri_interrupt_wkups", ctypes.c_uint64),
("ri_pageins", ctypes.c_uint64),
("ri_wired_size", ctypes.c_uint64),
("ri_resident_size", ctypes.c_uint64),
("ri_phys_footprint", ctypes.c_uint64),
("ri_proc_start_abstime", ctypes.c_uint64),
("ri_proc_exit_abstime", ctypes.c_uint64),
("ri_child_user_time", ctypes.c_uint64),
("ri_child_system_time", ctypes.c_uint64),
("ri_child_pkg_idle_wkups", ctypes.c_uint64),
("ri_child_interrupt_wkups", ctypes.c_uint64),
("ri_child_pageins", ctypes.c_uint64),
("ri_child_elapsed_abstime", ctypes.c_uint64),
("ri_diskio_bytesread", ctypes.c_uint64),
("ri_diskio_byteswritten", ctypes.c_uint64),
]
libproc = ctypes.CDLL("libproc.dylib")
libproc.proc_pid_rusage.argtypes = [
ctypes.c_int,
ctypes.c_int,
ctypes.POINTER(RUsageInfoV2),
]
libproc.proc_pid_rusage.restype = ctypes.c_int
for pid_arg in sys.argv[1:]:
try:
pid = int(pid_arg)
except ValueError:
continue
info = RUsageInfoV2()
rc = libproc.proc_pid_rusage(pid, RUSAGE_INFO_V2, ctypes.byref(info))
if rc == 0:
print(f"{pid}\t{int(info.ri_diskio_bytesread)}")
else:
print(f"{pid}\t0")
PY
}
build_rusage_probe() {
RUSAGE_PROBE_BIN="$(mktemp -t watch_pg_restore_rusage.XXXXXX)"
local src
src="$(mktemp -t watch_pg_restore_src.XXXXXX)"
cat > "$src" <<'C'
#include <stdio.h>
#include <stdlib.h>
#include <libproc.h>
#include <sys/resource.h>
int main(int argc, char **argv) {
int i;
for (i = 1; i < argc; i++) {
int pid = atoi(argv[i]);
rusage_info_current ri;
int rc = proc_pid_rusage(pid, RUSAGE_INFO_CURRENT, (rusage_info_t *)&ri);
if (rc == 0) {
printf("%d\t%llu\n", pid, ri.ri_diskio_bytesread);
} else {
printf("%d\t0\n", pid);
}
}
return 0;
}
C
cc -x c -O2 -Wall -Wextra -o "$RUSAGE_PROBE_BIN" "$src"
rm -f "$src"
}
get_read_bytes_batch_cc() {
"$RUSAGE_PROBE_BIN" "$@"
}
get_read_bytes_batch() {
if [[ "$READ_BYTES_PROVIDER" == "python3" ]]; then
get_read_bytes_batch_python "$@"
else
get_read_bytes_batch_cc "$@"
fi
}
init_read_bytes_provider() {
if command -v python3 >/dev/null 2>&1; then
READ_BYTES_PROVIDER="python3"
return
fi
if command -v cc >/dev/null 2>&1; then
build_rusage_probe
READ_BYTES_PROVIDER="cc"
return
fi
echo "Missing required command: need either python3 or cc" >&2
exit 1
}
cleanup() {
if (( LAST_FRAME_LINES > 0 )); then
printf '\n'
fi
tput cnorm 2>/dev/null || true
if [[ -n "${RUSAGE_PROBE_BIN:-}" ]]; then
rm -f "$RUSAGE_PROBE_BIN"
fi
}
get_prev_read_bytes() {
local pid="$1"
local i
for i in "${!PREV_PIDS[@]}"; do
if [[ "${PREV_PIDS[$i]}" == "$pid" ]]; then
echo "${PREV_READ_BYTES[$i]}"
return
fi
done
echo 0
}
render_frame() {
local first_pid="$1"
local start_time start_epoch now_epoch elapsed frame_ts delta_secs
local dump_path dump_size total_read_bytes progress eta_seconds
local avg_rate_bytes total_rate_bytes eta_rate worker_count
local -a pids table_lines current_pids current_reads
local pid line fd path read_bytes has_rows prev_read delta_read per_rate
local read_map read_pid read_value
local progress_num frame_lines
mapfile -t pids < <(pgrep -x pg_restore | sort -n)
if [[ "${#pids[@]}" -eq 0 ]]; then
printf 'No running pg_restore processes found.\n'
return 1
fi
start_time="$(ps -o lstart= -p "$first_pid" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//')"
start_epoch="$(date -j -f '%a %b %e %T %Y' "$start_time" '+%s')"
now_epoch="$(date '+%s')"
elapsed=$((now_epoch - start_epoch))
frame_ts="$now_epoch"
delta_secs=0
if (( PREV_FRAME_TS > 0 )); then
delta_secs=$((frame_ts - PREV_FRAME_TS))
if (( delta_secs < 1 )); then
delta_secs=1
fi
fi
dump_path=''
total_read_bytes=0
total_rate_bytes=0
worker_count=0
table_lines=()
current_pids=()
current_reads=()
has_rows=0
read_map="$(get_read_bytes_batch "${pids[@]}")"
for pid in "${pids[@]}"; do
line="$(lsof -a -p "$pid" -d 4 -Fn 2>/dev/null | awk '
/^f/ { fd = substr($0, 2) }
/^n/ { path = substr($0, 2) }
END {
if (fd != "" && path != "") {
printf "%s\t%s", fd, path
}
}
')"
if [[ -z "$line" ]]; then
continue
fi
read_bytes=0
while IFS=$'\t' read -r read_pid read_value; do
if [[ "$read_pid" == "$pid" ]]; then
read_bytes="$read_value"
break
fi
done <<< "$read_map"
if [[ ! "$read_bytes" =~ ^[0-9]+$ ]]; then
read_bytes=0
fi
prev_read="$(get_prev_read_bytes "$pid")"
delta_read=$((read_bytes - prev_read))
if (( delta_read < 0 )); then
delta_read=0
fi
per_rate=0
if (( delta_secs > 0 )); then
per_rate=$((delta_read / delta_secs))
fi
fd="${line%%$'\t'*}"
path="${line#*$'\t'}"
if [[ -z "$dump_path" ]]; then
dump_path="$path"
fi
total_read_bytes=$((total_read_bytes + read_bytes))
total_rate_bytes=$((total_rate_bytes + per_rate))
worker_count=$((worker_count + 1))
table_lines+=("$pid"$'\t'"$fd"$'\t'"$path"$'\t'"$read_bytes"$'\t'"$per_rate")
current_pids+=("$pid")
current_reads+=("$read_bytes")
has_rows=1
done
if [[ "$has_rows" -eq 0 || -z "$dump_path" ]]; then
printf 'No pg_restore dump-file descriptors found on FD 4.\n'
return 1
fi
PREV_PIDS=("${current_pids[@]}")
PREV_READ_BYTES=("${current_reads[@]}")
PREV_FRAME_TS="$frame_ts"
dump_size="$(stat -f '%z' "$dump_path")"
avg_rate_bytes=0
if (( elapsed > 0 )); then
avg_rate_bytes=$((total_read_bytes / elapsed))
fi
progress="0.00"
eta_seconds=0
if (( dump_size > 0 )); then
progress_num="$(awk -v total="$total_read_bytes" -v size="$dump_size" 'BEGIN { printf "%.4f", (total / size) * 100 }')"
progress="$(awk -v p="$progress_num" 'BEGIN { if (p < 0) p = 0; if (p > 100) p = 100; printf "%.2f", p }')"
eta_rate="$total_rate_bytes"
if (( eta_rate < 1 )); then
eta_rate="$avg_rate_bytes"
fi
if (( eta_rate > 0 && total_read_bytes < dump_size )); then
eta_seconds=$(((dump_size - total_read_bytes) / eta_rate))
fi
fi
frame_lines=$((13 + worker_count))
if (( LAST_FRAME_LINES > 0 )); then
tput cuu "$LAST_FRAME_LINES" 2>/dev/null || printf '\033[%dA' "$LAST_FRAME_LINES"
tput ed 2>/dev/null || printf '\033[J'
fi
printf 'Dump file: %s\n' "$dump_path"
printf 'Worker processes: %s\n' "$worker_count"
printf 'Dump size: %s\n' "$(format_bytes "$dump_size")"
printf 'Total bytes read (all pg_restore workers): %s\n' "$(format_bytes "$total_read_bytes")"
printf 'Progress vs dump size: %s%%\n' "$progress"
printf 'Current total read rate: %s/s\n' "$(format_bytes "$total_rate_bytes")"
printf 'Average read rate since start: %s/s\n' "$(format_bytes "$avg_rate_bytes")"
printf 'Started: %s\n' "$start_time"
printf 'Elapsed: %s\n' "$(format_duration "$elapsed")"
printf 'Estimated remaining: %s\n' "$(format_duration "$eta_seconds")"
printf '\n'
printf '%8s %4s %-56s %14s %14s\n' 'PID' 'FD' 'PATH' 'BYTES READ' 'READ RATE'
printf '%8s %4s %-56s %14s %14s\n' '--------' '----' '--------------------------------------------------------' '--------------' '--------------'
for line in "${table_lines[@]}"; do
pid="${line%%$'\t'*}"
line="${line#*$'\t'}"
fd="${line%%$'\t'*}"
line="${line#*$'\t'}"
path="${line%%$'\t'*}"
line="${line#*$'\t'}"
read_bytes="${line%%$'\t'*}"
per_rate="${line##*$'\t'}"
printf '%8s %4s %-56.56s %14s %14s\n' "$pid" "$fd" "$path" "$(format_bytes "$read_bytes")" "$(format_bytes "$per_rate")/s"
done
LAST_FRAME_LINES="$frame_lines"
}
require_cmd date
require_cmd lsof
require_cmd pgrep
require_cmd ps
require_cmd stat
READ_BYTES_PROVIDER=''
init_read_bytes_provider
PREV_FRAME_TS=0
PREV_PIDS=()
PREV_READ_BYTES=()
LAST_FRAME_LINES=0
trap cleanup EXIT
tput civis 2>/dev/null || true
first_pid="$(pgrep -x pg_restore | sort -n | head -n1 || true)"
if [[ -z "$first_pid" ]]; then
echo "No running pg_restore processes found." >&2
exit 1
fi
while true; do
render_frame "$first_pid" || exit 0
sleep 1
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment