Created
April 20, 2026 22:38
-
-
Save NickCrews/1a0a7592eb6731ae94c5dee7df731f16 to your computer and use it in GitHub Desktop.
Monitors the progress of a running pg_restore process, reporting total bytes read and throughput per thread, as well as timing info and ETA.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # This script monitors the progress of a running pg_restore process. | |
| # | |
| # Use it by starting a `pg_restore`: | |
| # pg_restore --dbname=mydb --jobs=8 /path/to/dumpfile.dump | |
| # Then, in another terminal, run this script: | |
| # ./watch_pg_restore.sh | |
| # | |
| # The script will look for all running `pg_restore` processes, | |
| # identify the dump file being read by each worker (by looking for open file descriptors on FD 4), | |
| # and use `proc_pid_rusage` C function to get the total bytes read from the dump file by each process. | |
| # It will then aggregate this information to show the overall progress of the restore operation, | |
| # including total bytes read, read rate, and estimated time remaining based on the dump size. | |
| # | |
| # I'm not positive this is totally accurate, but it does at least provide a rough estimate of the progress, | |
| # and to see which processes are doing the most work. | |
| # | |
| # This requires either `python` or `cc` to be available on the system to query the read bytes from the processes. | |
| # It also requires several common unix utilities like `lsof`, `pgrep`, `ps`, and `stat`. | |
| # If this doesn't work for you, or you want to customize the output, your favorite coding | |
| # agent should be pretty capable of modifying it. This is how I wrote the script initially. | |
| # | |
| # The output looks something like this (updating every second). | |
| # | |
| # Dump file: /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump | |
| # Worker processes: 7 | |
| # Dump size: 80.69 GiB | |
| # Total bytes read (all pg_restore workers): 71.21 GiB | |
| # Progress vs dump size: 88.25% | |
| # Current total read rate: 32.00 MiB/s | |
| # Average read rate since start: 14.60 MiB/s | |
| # Started: Fri Apr 10 11:58:25 2026 | |
| # Elapsed: 01:23:15 | |
| # Estimated remaining: 00:05:03 | |
| # PID FD PATH BYTES READ READ RATE | |
| # -------- ---- -------------------------------------------------------- -------------- -------------- | |
| # 7709 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 0 B 0 B/s | |
| # 7724 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 23.94 GiB 32.00 MiB/s | |
| # 7725 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 15.94 GiB 0 B/s | |
| # 7726 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 11.16 GiB 0 B/s | |
| # 7727 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 8.45 GiB 0 B/s | |
| # 7728 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 6.61 GiB 0 B/s | |
| # 7729 4 /Users/nc/code/dumpduck/.scratch/fec_fitem_sched_a.dump 5.11 GiB 0 B/s | |
| set -euo pipefail | |
| require_cmd() { | |
| if ! command -v "$1" >/dev/null 2>&1; then | |
| echo "Missing required command: $1" >&2 | |
| exit 1 | |
| fi | |
| } | |
| format_bytes() { | |
| local value="$1" | |
| awk -v value="$value" 'BEGIN { | |
| split("B KiB MiB GiB TiB PiB", units, " ") | |
| unit = 1 | |
| while (value >= 1024 && unit < 6) { | |
| value /= 1024 | |
| unit++ | |
| } | |
| if (unit == 1) { | |
| printf "%d %s", value, units[unit] | |
| } else { | |
| printf "%.2f %s", value, units[unit] | |
| } | |
| }' | |
| } | |
| format_duration() { | |
| local total_seconds="$1" | |
| local hours minutes seconds | |
| if (( total_seconds < 0 )); then | |
| total_seconds=0 | |
| fi | |
| hours=$((total_seconds / 3600)) | |
| minutes=$(((total_seconds % 3600) / 60)) | |
| seconds=$((total_seconds % 60)) | |
| printf '%02d:%02d:%02d' "$hours" "$minutes" "$seconds" | |
| } | |
| get_read_bytes_batch_python() { | |
| python3 - "$@" <<'PY' | |
| import ctypes | |
| import sys | |
| RUSAGE_INFO_V2 = 2 | |
| class RUsageInfoV2(ctypes.Structure): | |
| _fields_ = [ | |
| ("ri_uuid", ctypes.c_uint8 * 16), | |
| ("ri_user_time", ctypes.c_uint64), | |
| ("ri_system_time", ctypes.c_uint64), | |
| ("ri_pkg_idle_wkups", ctypes.c_uint64), | |
| ("ri_interrupt_wkups", ctypes.c_uint64), | |
| ("ri_pageins", ctypes.c_uint64), | |
| ("ri_wired_size", ctypes.c_uint64), | |
| ("ri_resident_size", ctypes.c_uint64), | |
| ("ri_phys_footprint", ctypes.c_uint64), | |
| ("ri_proc_start_abstime", ctypes.c_uint64), | |
| ("ri_proc_exit_abstime", ctypes.c_uint64), | |
| ("ri_child_user_time", ctypes.c_uint64), | |
| ("ri_child_system_time", ctypes.c_uint64), | |
| ("ri_child_pkg_idle_wkups", ctypes.c_uint64), | |
| ("ri_child_interrupt_wkups", ctypes.c_uint64), | |
| ("ri_child_pageins", ctypes.c_uint64), | |
| ("ri_child_elapsed_abstime", ctypes.c_uint64), | |
| ("ri_diskio_bytesread", ctypes.c_uint64), | |
| ("ri_diskio_byteswritten", ctypes.c_uint64), | |
| ] | |
| libproc = ctypes.CDLL("libproc.dylib") | |
| libproc.proc_pid_rusage.argtypes = [ | |
| ctypes.c_int, | |
| ctypes.c_int, | |
| ctypes.POINTER(RUsageInfoV2), | |
| ] | |
| libproc.proc_pid_rusage.restype = ctypes.c_int | |
| for pid_arg in sys.argv[1:]: | |
| try: | |
| pid = int(pid_arg) | |
| except ValueError: | |
| continue | |
| info = RUsageInfoV2() | |
| rc = libproc.proc_pid_rusage(pid, RUSAGE_INFO_V2, ctypes.byref(info)) | |
| if rc == 0: | |
| print(f"{pid}\t{int(info.ri_diskio_bytesread)}") | |
| else: | |
| print(f"{pid}\t0") | |
| PY | |
| } | |
| build_rusage_probe() { | |
| RUSAGE_PROBE_BIN="$(mktemp -t watch_pg_restore_rusage.XXXXXX)" | |
| local src | |
| src="$(mktemp -t watch_pg_restore_src.XXXXXX)" | |
| cat > "$src" <<'C' | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <libproc.h> | |
| #include <sys/resource.h> | |
| int main(int argc, char **argv) { | |
| int i; | |
| for (i = 1; i < argc; i++) { | |
| int pid = atoi(argv[i]); | |
| rusage_info_current ri; | |
| int rc = proc_pid_rusage(pid, RUSAGE_INFO_CURRENT, (rusage_info_t *)&ri); | |
| if (rc == 0) { | |
| printf("%d\t%llu\n", pid, ri.ri_diskio_bytesread); | |
| } else { | |
| printf("%d\t0\n", pid); | |
| } | |
| } | |
| return 0; | |
| } | |
| C | |
| cc -x c -O2 -Wall -Wextra -o "$RUSAGE_PROBE_BIN" "$src" | |
| rm -f "$src" | |
| } | |
| get_read_bytes_batch_cc() { | |
| "$RUSAGE_PROBE_BIN" "$@" | |
| } | |
| get_read_bytes_batch() { | |
| if [[ "$READ_BYTES_PROVIDER" == "python3" ]]; then | |
| get_read_bytes_batch_python "$@" | |
| else | |
| get_read_bytes_batch_cc "$@" | |
| fi | |
| } | |
| init_read_bytes_provider() { | |
| if command -v python3 >/dev/null 2>&1; then | |
| READ_BYTES_PROVIDER="python3" | |
| return | |
| fi | |
| if command -v cc >/dev/null 2>&1; then | |
| build_rusage_probe | |
| READ_BYTES_PROVIDER="cc" | |
| return | |
| fi | |
| echo "Missing required command: need either python3 or cc" >&2 | |
| exit 1 | |
| } | |
| cleanup() { | |
| if (( LAST_FRAME_LINES > 0 )); then | |
| printf '\n' | |
| fi | |
| tput cnorm 2>/dev/null || true | |
| if [[ -n "${RUSAGE_PROBE_BIN:-}" ]]; then | |
| rm -f "$RUSAGE_PROBE_BIN" | |
| fi | |
| } | |
| get_prev_read_bytes() { | |
| local pid="$1" | |
| local i | |
| for i in "${!PREV_PIDS[@]}"; do | |
| if [[ "${PREV_PIDS[$i]}" == "$pid" ]]; then | |
| echo "${PREV_READ_BYTES[$i]}" | |
| return | |
| fi | |
| done | |
| echo 0 | |
| } | |
| render_frame() { | |
| local first_pid="$1" | |
| local start_time start_epoch now_epoch elapsed frame_ts delta_secs | |
| local dump_path dump_size total_read_bytes progress eta_seconds | |
| local avg_rate_bytes total_rate_bytes eta_rate worker_count | |
| local -a pids table_lines current_pids current_reads | |
| local pid line fd path read_bytes has_rows prev_read delta_read per_rate | |
| local read_map read_pid read_value | |
| local progress_num frame_lines | |
| mapfile -t pids < <(pgrep -x pg_restore | sort -n) | |
| if [[ "${#pids[@]}" -eq 0 ]]; then | |
| printf 'No running pg_restore processes found.\n' | |
| return 1 | |
| fi | |
| start_time="$(ps -o lstart= -p "$first_pid" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//')" | |
| start_epoch="$(date -j -f '%a %b %e %T %Y' "$start_time" '+%s')" | |
| now_epoch="$(date '+%s')" | |
| elapsed=$((now_epoch - start_epoch)) | |
| frame_ts="$now_epoch" | |
| delta_secs=0 | |
| if (( PREV_FRAME_TS > 0 )); then | |
| delta_secs=$((frame_ts - PREV_FRAME_TS)) | |
| if (( delta_secs < 1 )); then | |
| delta_secs=1 | |
| fi | |
| fi | |
| dump_path='' | |
| total_read_bytes=0 | |
| total_rate_bytes=0 | |
| worker_count=0 | |
| table_lines=() | |
| current_pids=() | |
| current_reads=() | |
| has_rows=0 | |
| read_map="$(get_read_bytes_batch "${pids[@]}")" | |
| for pid in "${pids[@]}"; do | |
| line="$(lsof -a -p "$pid" -d 4 -Fn 2>/dev/null | awk ' | |
| /^f/ { fd = substr($0, 2) } | |
| /^n/ { path = substr($0, 2) } | |
| END { | |
| if (fd != "" && path != "") { | |
| printf "%s\t%s", fd, path | |
| } | |
| } | |
| ')" | |
| if [[ -z "$line" ]]; then | |
| continue | |
| fi | |
| read_bytes=0 | |
| while IFS=$'\t' read -r read_pid read_value; do | |
| if [[ "$read_pid" == "$pid" ]]; then | |
| read_bytes="$read_value" | |
| break | |
| fi | |
| done <<< "$read_map" | |
| if [[ ! "$read_bytes" =~ ^[0-9]+$ ]]; then | |
| read_bytes=0 | |
| fi | |
| prev_read="$(get_prev_read_bytes "$pid")" | |
| delta_read=$((read_bytes - prev_read)) | |
| if (( delta_read < 0 )); then | |
| delta_read=0 | |
| fi | |
| per_rate=0 | |
| if (( delta_secs > 0 )); then | |
| per_rate=$((delta_read / delta_secs)) | |
| fi | |
| fd="${line%%$'\t'*}" | |
| path="${line#*$'\t'}" | |
| if [[ -z "$dump_path" ]]; then | |
| dump_path="$path" | |
| fi | |
| total_read_bytes=$((total_read_bytes + read_bytes)) | |
| total_rate_bytes=$((total_rate_bytes + per_rate)) | |
| worker_count=$((worker_count + 1)) | |
| table_lines+=("$pid"$'\t'"$fd"$'\t'"$path"$'\t'"$read_bytes"$'\t'"$per_rate") | |
| current_pids+=("$pid") | |
| current_reads+=("$read_bytes") | |
| has_rows=1 | |
| done | |
| if [[ "$has_rows" -eq 0 || -z "$dump_path" ]]; then | |
| printf 'No pg_restore dump-file descriptors found on FD 4.\n' | |
| return 1 | |
| fi | |
| PREV_PIDS=("${current_pids[@]}") | |
| PREV_READ_BYTES=("${current_reads[@]}") | |
| PREV_FRAME_TS="$frame_ts" | |
| dump_size="$(stat -f '%z' "$dump_path")" | |
| avg_rate_bytes=0 | |
| if (( elapsed > 0 )); then | |
| avg_rate_bytes=$((total_read_bytes / elapsed)) | |
| fi | |
| progress="0.00" | |
| eta_seconds=0 | |
| if (( dump_size > 0 )); then | |
| progress_num="$(awk -v total="$total_read_bytes" -v size="$dump_size" 'BEGIN { printf "%.4f", (total / size) * 100 }')" | |
| progress="$(awk -v p="$progress_num" 'BEGIN { if (p < 0) p = 0; if (p > 100) p = 100; printf "%.2f", p }')" | |
| eta_rate="$total_rate_bytes" | |
| if (( eta_rate < 1 )); then | |
| eta_rate="$avg_rate_bytes" | |
| fi | |
| if (( eta_rate > 0 && total_read_bytes < dump_size )); then | |
| eta_seconds=$(((dump_size - total_read_bytes) / eta_rate)) | |
| fi | |
| fi | |
| frame_lines=$((13 + worker_count)) | |
| if (( LAST_FRAME_LINES > 0 )); then | |
| tput cuu "$LAST_FRAME_LINES" 2>/dev/null || printf '\033[%dA' "$LAST_FRAME_LINES" | |
| tput ed 2>/dev/null || printf '\033[J' | |
| fi | |
| printf 'Dump file: %s\n' "$dump_path" | |
| printf 'Worker processes: %s\n' "$worker_count" | |
| printf 'Dump size: %s\n' "$(format_bytes "$dump_size")" | |
| printf 'Total bytes read (all pg_restore workers): %s\n' "$(format_bytes "$total_read_bytes")" | |
| printf 'Progress vs dump size: %s%%\n' "$progress" | |
| printf 'Current total read rate: %s/s\n' "$(format_bytes "$total_rate_bytes")" | |
| printf 'Average read rate since start: %s/s\n' "$(format_bytes "$avg_rate_bytes")" | |
| printf 'Started: %s\n' "$start_time" | |
| printf 'Elapsed: %s\n' "$(format_duration "$elapsed")" | |
| printf 'Estimated remaining: %s\n' "$(format_duration "$eta_seconds")" | |
| printf '\n' | |
| printf '%8s %4s %-56s %14s %14s\n' 'PID' 'FD' 'PATH' 'BYTES READ' 'READ RATE' | |
| printf '%8s %4s %-56s %14s %14s\n' '--------' '----' '--------------------------------------------------------' '--------------' '--------------' | |
| for line in "${table_lines[@]}"; do | |
| pid="${line%%$'\t'*}" | |
| line="${line#*$'\t'}" | |
| fd="${line%%$'\t'*}" | |
| line="${line#*$'\t'}" | |
| path="${line%%$'\t'*}" | |
| line="${line#*$'\t'}" | |
| read_bytes="${line%%$'\t'*}" | |
| per_rate="${line##*$'\t'}" | |
| printf '%8s %4s %-56.56s %14s %14s\n' "$pid" "$fd" "$path" "$(format_bytes "$read_bytes")" "$(format_bytes "$per_rate")/s" | |
| done | |
| LAST_FRAME_LINES="$frame_lines" | |
| } | |
| require_cmd date | |
| require_cmd lsof | |
| require_cmd pgrep | |
| require_cmd ps | |
| require_cmd stat | |
| READ_BYTES_PROVIDER='' | |
| init_read_bytes_provider | |
| PREV_FRAME_TS=0 | |
| PREV_PIDS=() | |
| PREV_READ_BYTES=() | |
| LAST_FRAME_LINES=0 | |
| trap cleanup EXIT | |
| tput civis 2>/dev/null || true | |
| first_pid="$(pgrep -x pg_restore | sort -n | head -n1 || true)" | |
| if [[ -z "$first_pid" ]]; then | |
| echo "No running pg_restore processes found." >&2 | |
| exit 1 | |
| fi | |
| while true; do | |
| render_frame "$first_pid" || exit 0 | |
| sleep 1 | |
| done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment