Skip to content

Instantly share code, notes, and snippets.

@mcornea
Created May 12, 2026 12:28
Show Gist options
  • Select an option

  • Save mcornea/095d99b0d5b383d3dcd0f9cf858189c2 to your computer and use it in GitHub Desktop.

Select an option

Save mcornea/095d99b0d5b383d3dcd0f9cf858189c2 to your computer and use it in GitHub Desktop.
run_karpenter_pprof_analysis
#!/bin/bash
# =============================================================================
# Karpenter / kube-apiserver / etcd — pprof Continuous Capture
# =============================================================================
# Usage: ./run_karpenter_pprof_analysis.sh [OPTIONS]
#
# Captures CPU and heap pprof profiles from karpenter, kube-apiserver, and etcd
# every interval (default 60s) until stopped with Ctrl+C.
# On exit, produces a summary analysis of all captured data.
#
# This is for ROSA HCP clusters where karpenter, etcd, and kube-apiserver all
# run in the management cluster's HCP namespace, and kube-apiserver is accessed
# via the hosted cluster kubeconfig.
#
# Options:
# --output-dir DIR Output directory (default: ./pprof-analysis-YYYYMMDD-HHMMSS)
# --interval SECS Capture interval in seconds (default: 60)
# --cpu-duration SECS CPU profile duration in seconds (default: 30)
# --mc-kubeconfig PATH Management cluster kubeconfig
# --hc-kubeconfig PATH Hosted cluster kubeconfig
#
# Requirements:
# - oc
# - go (optional, for pprof analysis at the end)
# =============================================================================
set -uo pipefail
# --------------- Configuration ---------------
OUTPUT_DIR=""
CAPTURE_INTERVAL=60
CPU_DURATION=30
MAX_CAPTURES=0
MC_KUBECONFIG="/home/marius/rosa-create/autonode/mc_kubeconfig"
HC_KUBECONFIG="/home/marius/rosa-create/autonode/hc_kubeconfig"
while [[ $# -gt 0 ]]; do
case "$1" in
--output-dir) OUTPUT_DIR="$2"; shift 2 ;;
--interval) CAPTURE_INTERVAL="$2"; shift 2 ;;
--cpu-duration) CPU_DURATION="$2"; shift 2 ;;
--max-captures) MAX_CAPTURES="$2"; shift 2 ;;
--mc-kubeconfig) MC_KUBECONFIG="$2"; shift 2 ;;
--hc-kubeconfig) HC_KUBECONFIG="$2"; shift 2 ;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
OUTPUT_DIR="${OUTPUT_DIR:-$(pwd)/pprof-analysis-$(date +%Y%m%d-%H%M%S)}"
# Shorthand for oc with the right kubeconfig
mc_oc() { oc --kubeconfig="$MC_KUBECONFIG" "$@"; }
hc_oc() { oc --kubeconfig="$HC_KUBECONFIG" "$@"; }
# --------------- Preflight checks ---------------
echo "=============================================="
echo " Karpenter/KAS/etcd pprof Continuous Capture"
echo "=============================================="
echo ""
if ! command -v oc &>/dev/null; then
echo "ERROR: 'oc' not found in PATH" >&2; exit 1
fi
echo "[*] Verifying kubeconfigs..."
if ! mc_oc whoami &>/dev/null; then
echo "ERROR: MC kubeconfig failed (${MC_KUBECONFIG})" >&2; exit 1
fi
MC_USER=$(mc_oc whoami 2>/dev/null)
echo " MC: logged in as $MC_USER"
if ! hc_oc whoami &>/dev/null; then
echo "ERROR: HC kubeconfig failed (${HC_KUBECONFIG})" >&2; exit 1
fi
HC_USER=$(hc_oc whoami 2>/dev/null)
echo " HC: logged in as $HC_USER"
# --------------- Discover HCP namespace ---------------
echo ""
echo "[*] Discovering HCP namespace..."
HCP_NS=$(mc_oc get pods -A -l app=karpenter --no-headers -o custom-columns=NS:.metadata.namespace 2>/dev/null | head -1)
if [[ -z "$HCP_NS" ]]; then
echo "ERROR: Could not find karpenter pod / HCP namespace" >&2; exit 1
fi
echo " HCP namespace: $HCP_NS"
# --------------- Discover pods ---------------
echo ""
echo "[*] Discovering pods..."
# Karpenter
KARPENTER_POD=$(mc_oc get pods -n "$HCP_NS" -l app=karpenter --no-headers -o custom-columns=NAME:.metadata.name 2>/dev/null | head -1)
if [[ -z "$KARPENTER_POD" ]]; then
echo "ERROR: No karpenter pod found" >&2; exit 1
fi
echo " Karpenter: $KARPENTER_POD"
# etcd
mapfile -t ETCD_PODS < <(mc_oc get pods -n "$HCP_NS" -l app=etcd --no-headers -o custom-columns=NAME:.metadata.name 2>/dev/null | sort)
if [[ ${#ETCD_PODS[@]} -eq 0 ]]; then
echo "ERROR: No etcd pods found" >&2; exit 1
fi
echo " etcd: ${ETCD_PODS[*]}"
# kube-apiserver
mapfile -t KAS_PODS < <(mc_oc get pods -n "$HCP_NS" -l app=kube-apiserver --no-headers -o custom-columns=NAME:.metadata.name 2>/dev/null | sort)
if [[ ${#KAS_PODS[@]} -eq 0 ]]; then
echo "ERROR: No kube-apiserver pods found" >&2; exit 1
fi
echo " kube-apiserver: ${KAS_PODS[*]}"
# --------------- etcd TLS cert paths ---------------
ETCD_CACERT="/etc/etcd/tls/etcd-ca/ca.crt"
ETCD_CERT="/etc/etcd/tls/client/etcd-client.crt"
ETCD_KEY="/etc/etcd/tls/client/etcd-client.key"
# --------------- Verify pprof endpoints ---------------
echo ""
echo "[*] Verifying pprof endpoints..."
# etcd
FIRST_ETCD="${ETCD_PODS[0]}"
ETCD_HTTP=$(mc_oc exec -n "$HCP_NS" "$FIRST_ETCD" -c etcd -- \
curl -s -o /dev/null -w '%{http_code}' \
--cacert "$ETCD_CACERT" --cert "$ETCD_CERT" --key "$ETCD_KEY" \
https://localhost:2379/debug/pprof/heap 2>&1)
if [[ "$ETCD_HTTP" != "200" ]]; then
echo "ERROR: etcd pprof returned HTTP $ETCD_HTTP" >&2; exit 1
fi
echo " etcd pprof: OK (HTTP 200)"
# kube-apiserver — pipe through wc -c to avoid null bytes in command substitution
KAS_SIZE=$(hc_oc get --raw /debug/pprof/heap 2>/dev/null | wc -c)
if [[ "$KAS_SIZE" -lt 100 ]]; then
echo "ERROR: kube-apiserver pprof not accessible (got $KAS_SIZE bytes)" >&2; exit 1
fi
echo " kube-apiserver pprof: OK ($KAS_SIZE bytes)"
# --------------- Enable karpenter profiling ---------------
echo ""
echo "[*] Checking karpenter profiling..."
KARPENTER_ARGS=$(mc_oc get deployment karpenter -n "$HCP_NS" -o jsonpath='{.spec.template.spec.containers[0].args}' 2>/dev/null)
PPROF_LOCAL_PORT=16060
if echo "$KARPENTER_ARGS" | grep -q -- '--enable-profiling'; then
echo " --enable-profiling already set"
else
echo " Patching karpenter deployment to add --enable-profiling..."
mc_oc patch deployment karpenter -n "$HCP_NS" --type=json \
-p '[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--enable-profiling"}]'
echo " Waiting for rollout..."
mc_oc rollout status deployment/karpenter -n "$HCP_NS" --timeout=120s
# Re-discover pod name after rollout
KARPENTER_POD=$(mc_oc get pods -n "$HCP_NS" -l app=karpenter --no-headers -o custom-columns=NAME:.metadata.name 2>/dev/null | head -1)
echo " New karpenter pod: $KARPENTER_POD"
fi
# Verify karpenter pprof via port-forward (container may not have curl)
mc_oc port-forward -n "$HCP_NS" "pod/$KARPENTER_POD" ${PPROF_LOCAL_PORT}:8080 &>/dev/null &
VALIDATE_PF_PID=$!
sleep 3
KARP_HTTP=$(curl -s -o /dev/null -w '%{http_code}' "http://localhost:${PPROF_LOCAL_PORT}/debug/pprof/heap" 2>&1)
kill $VALIDATE_PF_PID 2>/dev/null; wait $VALIDATE_PF_PID 2>/dev/null
if [[ "$KARP_HTTP" != "200" ]]; then
echo "WARNING: karpenter pprof returned HTTP $KARP_HTTP (profiling may not be enabled yet)" >&2
else
echo " karpenter pprof: OK (HTTP 200)"
fi
# --------------- Create output directories ---------------
echo ""
echo "[*] Output directory: $OUTPUT_DIR"
mkdir -p "$OUTPUT_DIR/profiles"
# Save metadata
cat > "$OUTPUT_DIR/metadata.txt" <<METADATA
Date: $(date -u '+%Y-%m-%dT%H:%M:%SZ')
HCP Namespace: $HCP_NS
MC Kubeconfig: $MC_KUBECONFIG
HC Kubeconfig: $HC_KUBECONFIG
Karpenter Pod: $KARPENTER_POD
etcd Pods: ${ETCD_PODS[*]}
KAS Pods: ${KAS_PODS[*]}
Capture Interval: ${CAPTURE_INTERVAL}s
CPU Profile Duration: ${CPU_DURATION}s
Script: $0
METADATA
# =============================================================================
# Capture functions
# =============================================================================
capture_karpenter() {
local OUTDIR="$1" LABEL="$2"
# Start port-forward for this capture round (container may not have curl)
mc_oc port-forward -n "$HCP_NS" "pod/$KARPENTER_POD" ${PPROF_LOCAL_PORT}:8080 &>/dev/null &
local PF_PID=$!
sleep 2
echo " [karpenter] heap..."
curl -s "http://localhost:${PPROF_LOCAL_PORT}/debug/pprof/heap" \
> "${OUTDIR}/karpenter_heap_${LABEL}.pb.gz" 2>/dev/null || true
echo " [karpenter] cpu (${CPU_DURATION}s)..."
curl -s --max-time $((CPU_DURATION + 10)) \
"http://localhost:${PPROF_LOCAL_PORT}/debug/pprof/profile?seconds=${CPU_DURATION}" \
> "${OUTDIR}/karpenter_cpu_${LABEL}.pb.gz" 2>/dev/null || true
kill $PF_PID 2>/dev/null; wait $PF_PID 2>/dev/null
}
capture_etcd() {
local OUTDIR="$1" LABEL="$2"
for i in "${!ETCD_PODS[@]}"; do
local POD="${ETCD_PODS[$i]}"
echo " [etcd-${i}] heap..."
mc_oc exec -n "$HCP_NS" "$POD" -c etcd -- \
curl -s --cacert "$ETCD_CACERT" --cert "$ETCD_CERT" --key "$ETCD_KEY" \
https://localhost:2379/debug/pprof/heap \
> "${OUTDIR}/etcd_${i}_heap_${LABEL}.pb.gz" 2>/dev/null || true
echo " [etcd-${i}] cpu (${CPU_DURATION}s)..."
mc_oc exec -n "$HCP_NS" "$POD" -c etcd -- \
curl -s --cacert "$ETCD_CACERT" --cert "$ETCD_CERT" --key "$ETCD_KEY" \
"https://localhost:2379/debug/pprof/profile?seconds=${CPU_DURATION}" \
> "${OUTDIR}/etcd_${i}_cpu_${LABEL}.pb.gz" 2>/dev/null || true
done
}
capture_kas() {
local OUTDIR="$1" LABEL="$2"
echo " [kas] heap..."
hc_oc get --raw /debug/pprof/heap \
> "${OUTDIR}/kas_heap_${LABEL}.pb.gz" 2>/dev/null || true
echo " [kas] cpu (${CPU_DURATION}s)..."
hc_oc get --raw "/debug/pprof/profile?seconds=${CPU_DURATION}" \
> "${OUTDIR}/kas_cpu_${LABEL}.pb.gz" 2>/dev/null || true
}
# =============================================================================
# Inline summary helper
# =============================================================================
_heap_inuse_mib() {
# Extract total inuse_space from a .pb.gz heap profile using go tool pprof
local PB="$1"
if [[ -f "$PB" ]] && command -v go &>/dev/null; then
go tool pprof -top -inuse_space "$PB" 2>/dev/null | awk '/^Showing/{
# "Showing nodes accounting for 61.64MB, 100% of 61.64MB total"
for(i=1;i<=NF;i++){
if($i=="of"){
val=$(i+1)
gsub(/MB/,"",val)
if(val ~ /GB/) { gsub(/GB/,"",val); val=val*1024 }
printf "%.1f", val
exit
}
}
}'
fi
}
print_heap_summary() {
local OUTDIR="$1" LABEL="$2"
echo -n " Heap sizes (inuse): "
# Karpenter
local KARP_HA
KARP_HA=$(_heap_inuse_mib "${OUTDIR}/karpenter_heap_${LABEL}.pb.gz")
echo -n "karpenter=${KARP_HA:-?}MiB "
# etcd
for i in "${!ETCD_PODS[@]}"; do
local HA
HA=$(_heap_inuse_mib "${OUTDIR}/etcd_${i}_heap_${LABEL}.pb.gz")
echo -n "etcd-${i}=${HA:-?}MiB "
done
# KAS
local KAS_HA
KAS_HA=$(_heap_inuse_mib "${OUTDIR}/kas_heap_${LABEL}.pb.gz")
echo "kas=${KAS_HA:-?}MiB"
}
# =============================================================================
# Analysis helpers (run on Ctrl+C exit)
# =============================================================================
generate_heap_texts() {
# Generate text heap dumps from .pb.gz files for memstats extraction
echo " Generating heap text dumps from .pb.gz profiles..."
for PB in "$OUTPUT_DIR"/profiles/*_heap_*.pb.gz; do
[[ -f "$PB" ]] || continue
local TXT="${PB%.pb.gz}.txt"
[[ -f "$TXT" ]] && continue # skip if already exists
go tool pprof -text -inuse_space "$PB" > "$TXT" 2>/dev/null || true
done
}
extract_memstats() {
local SUMMARY="$OUTPUT_DIR/memstats_summary.tsv"
echo -e "component\tcapture\tinuse_space_MiB" > "$SUMMARY"
for PB in "$OUTPUT_DIR"/profiles/*_heap_*.pb.gz; do
[[ -f "$PB" ]] || continue
local FNAME COMPONENT LABEL
FNAME=$(basename "$PB" .pb.gz)
# Parse component and label from filename patterns:
# karpenter_heap_0001, etcd_0_heap_0001, kas_heap_0001
if [[ "$FNAME" =~ ^karpenter_heap_(.+)$ ]]; then
COMPONENT="karpenter"
LABEL="${BASH_REMATCH[1]}"
elif [[ "$FNAME" =~ ^etcd_([0-9]+)_heap_(.+)$ ]]; then
COMPONENT="etcd-${BASH_REMATCH[1]}"
LABEL="${BASH_REMATCH[2]}"
elif [[ "$FNAME" =~ ^kas_heap_(.+)$ ]]; then
COMPONENT="kas"
LABEL="${BASH_REMATCH[1]}"
else
continue
fi
local INUSE
INUSE=$(_heap_inuse_mib "$PB")
echo -e "${COMPONENT}\t${LABEL}\t${INUSE:-0}" >> "$SUMMARY"
done
echo " MemStats summary: $SUMMARY"
}
run_pprof_top_heap() {
local TOPFILE="$OUTPUT_DIR/top_allocations.txt"
if ! command -v go &>/dev/null; then
echo " SKIP: 'go' not in PATH, cannot run pprof top analysis" | tee "$TOPFILE"
return
fi
{
echo "================================================================================"
echo " Top Heap Allocators (go tool pprof -top -inuse_space)"
echo "================================================================================"
for COMPONENT in karpenter etcd_0 etcd_1 etcd_2 kas; do
local PEAK_PROFILE=""
local PEAK_SIZE=0
for PB in "$OUTPUT_DIR"/profiles/${COMPONENT}_heap_*.pb.gz; do
[[ -f "$PB" ]] || continue
local SZ
SZ=$(stat -c%s "$PB" 2>/dev/null || echo 0)
if (( SZ > PEAK_SIZE )); then
PEAK_SIZE=$SZ
PEAK_PROFILE="$PB"
fi
done
if [[ -z "$PEAK_PROFILE" ]]; then
continue
fi
echo ""
echo "--- ${COMPONENT} peak: $(basename "$PEAK_PROFILE") ---"
echo ""
go tool pprof -top -inuse_space "$PEAK_PROFILE" 2>&1 || echo "(pprof failed)"
done
} > "$TOPFILE" 2>&1
echo " Top allocations: $TOPFILE"
}
run_pprof_top_cpu() {
local TOPFILE="$OUTPUT_DIR/top_cpu.txt"
if ! command -v go &>/dev/null; then
echo " SKIP: 'go' not in PATH, cannot run pprof CPU analysis" | tee "$TOPFILE"
return
fi
{
echo "================================================================================"
echo " Top CPU Consumers (go tool pprof -top)"
echo "================================================================================"
for COMPONENT in karpenter etcd_0 etcd_1 etcd_2 kas; do
local PEAK_PROFILE=""
local PEAK_SIZE=0
for PB in "$OUTPUT_DIR"/profiles/${COMPONENT}_cpu_*.pb.gz; do
[[ -f "$PB" ]] || continue
local SZ
SZ=$(stat -c%s "$PB" 2>/dev/null || echo 0)
if (( SZ > PEAK_SIZE )); then
PEAK_SIZE=$SZ
PEAK_PROFILE="$PB"
fi
done
if [[ -z "$PEAK_PROFILE" ]]; then
continue
fi
echo ""
echo "--- ${COMPONENT} peak CPU: $(basename "$PEAK_PROFILE") ---"
echo ""
go tool pprof -top "$PEAK_PROFILE" 2>&1 || echo "(pprof failed)"
done
} > "$TOPFILE" 2>&1
echo " Top CPU: $TOPFILE"
}
generate_svgs() {
local SVGDIR="$OUTPUT_DIR/svgs"
mkdir -p "$SVGDIR"
if ! command -v go &>/dev/null; then
echo " SKIP: 'go' not in PATH, cannot generate SVGs"
return
fi
for COMPONENT in karpenter etcd_0 etcd_1 etcd_2 kas; do
# Heap SVG — pick the largest (peak) profile
local PEAK_HEAP="" PEAK_SIZE=0
for PB in "$OUTPUT_DIR"/profiles/${COMPONENT}_heap_*.pb.gz; do
[[ -f "$PB" ]] || continue
local SZ
SZ=$(stat -c%s "$PB" 2>/dev/null || echo 0)
if (( SZ > PEAK_SIZE )); then
PEAK_SIZE=$SZ
PEAK_HEAP="$PB"
fi
done
if [[ -n "$PEAK_HEAP" ]]; then
local BNAME
BNAME=$(basename "$PEAK_HEAP" .pb.gz)
echo " [svg] ${BNAME} heap..."
go tool pprof -svg -inuse_space "$PEAK_HEAP" > "${SVGDIR}/${BNAME}_inuse.svg" 2>/dev/null || true
go tool pprof -svg -alloc_space "$PEAK_HEAP" > "${SVGDIR}/${BNAME}_alloc.svg" 2>/dev/null || true
fi
# CPU SVG — pick the largest profile
local PEAK_CPU="" PEAK_SIZE=0
for PB in "$OUTPUT_DIR"/profiles/${COMPONENT}_cpu_*.pb.gz; do
[[ -f "$PB" ]] || continue
local SZ
SZ=$(stat -c%s "$PB" 2>/dev/null || echo 0)
if (( SZ > PEAK_SIZE )); then
PEAK_SIZE=$SZ
PEAK_CPU="$PB"
fi
done
if [[ -n "$PEAK_CPU" ]]; then
local BNAME
BNAME=$(basename "$PEAK_CPU" .pb.gz)
echo " [svg] ${BNAME} cpu..."
go tool pprof -svg "$PEAK_CPU" > "${SVGDIR}/${BNAME}.svg" 2>/dev/null || true
fi
done
echo " SVGs: $SVGDIR"
}
generate_report() {
local REPORT="$OUTPUT_DIR/analysis_report.txt"
local CAPTURE_COUNT
CAPTURE_COUNT=$(ls "$OUTPUT_DIR"/profiles/karpenter_heap_*.pb.gz 2>/dev/null | wc -l)
{
echo "================================================================================"
echo " Karpenter / kube-apiserver / etcd — pprof Analysis Report"
echo "================================================================================"
echo ""
echo "HCP Namespace: $HCP_NS"
echo "Start: $START_TS"
echo "End: $(date -u '+%Y-%m-%dT%H:%M:%SZ')"
echo "Interval: ${CAPTURE_INTERVAL}s"
echo "CPU Duration: ${CPU_DURATION}s"
echo "Captures: $CAPTURE_COUNT"
echo "Karpenter Pod: $KARPENTER_POD"
echo "etcd Pods: ${ETCD_PODS[*]}"
echo "KAS Pods: ${KAS_PODS[*]}"
echo ""
echo "================================================================================"
echo " Go Heap MemStats"
echo "================================================================================"
echo ""
column -t -s$'\t' "$OUTPUT_DIR/memstats_summary.tsv" 2>/dev/null || \
cat "$OUTPUT_DIR/memstats_summary.tsv" 2>/dev/null || echo "(not available)"
echo ""
echo "================================================================================"
echo " Top Heap Allocators"
echo "================================================================================"
echo ""
cat "$OUTPUT_DIR/top_allocations.txt" 2>/dev/null || echo "(not available)"
echo ""
echo "================================================================================"
echo " Top CPU Consumers"
echo "================================================================================"
echo ""
cat "$OUTPUT_DIR/top_cpu.txt" 2>/dev/null || echo "(not available)"
echo ""
echo "================================================================================"
echo " Files"
echo "================================================================================"
echo ""
find "$OUTPUT_DIR" -type f | sort | sed "s|$OUTPUT_DIR/||"
} > "$REPORT"
echo ""
echo "=============================================="
echo " Analysis complete!"
echo "=============================================="
echo " Report: $REPORT"
echo " Output: $OUTPUT_DIR"
echo " Captures: $CAPTURE_COUNT"
echo ""
echo " Interactive exploration:"
echo " # Karpenter heap (peak)"
echo " go tool pprof -http=:8080 \$(ls -S $OUTPUT_DIR/profiles/karpenter_heap_*.pb.gz | head -1)"
echo ""
echo " # Karpenter CPU (peak)"
echo " go tool pprof -http=:8080 \$(ls -S $OUTPUT_DIR/profiles/karpenter_cpu_*.pb.gz | head -1)"
echo ""
echo " # etcd heap (peak)"
echo " go tool pprof -http=:8080 \$(ls -S $OUTPUT_DIR/profiles/etcd_0_heap_*.pb.gz | head -1)"
echo ""
echo " # KAS heap (peak)"
echo " go tool pprof -http=:8080 \$(ls -S $OUTPUT_DIR/profiles/kas_heap_*.pb.gz | head -1)"
echo ""
}
# =============================================================================
# Cleanup on exit (Ctrl+C or SIGTERM)
# =============================================================================
RUNNING=true
CAPTURE_PIDS=""
run_analysis() {
echo ""
echo "[*] Running analysis on captured data..."
generate_heap_texts
extract_memstats
run_pprof_top_heap
run_pprof_top_cpu
generate_svgs
generate_report
}
cleanup() {
# Guard against re-entrant signals
trap '' SIGINT SIGTERM
echo ""
echo ""
echo "[*] Caught signal — stopping..."
RUNNING=false
# Kill any running capture subprocesses (use SIGTERM since children ignore SIGINT)
if [[ -n "$CAPTURE_PIDS" ]]; then
echo "[*] Killing capture subprocesses..."
kill -TERM $CAPTURE_PIDS 2>/dev/null
wait $CAPTURE_PIDS 2>/dev/null
fi
run_analysis
exit 0
}
trap cleanup SIGINT SIGTERM
# =============================================================================
# MAIN: continuous capture loop
# =============================================================================
START_TS=$(date -u '+%Y-%m-%dT%H:%M:%SZ')
START_EPOCH=$(date +%s)
echo ""
echo "=============================================="
echo " Capturing profiles every ${CAPTURE_INTERVAL}s"
echo " CPU profiles: ${CPU_DURATION}s each"
echo " Press Ctrl+C to stop and run analysis"
echo "=============================================="
echo ""
CAPTURE_NUM=0
while $RUNNING; do
CAPTURE_NUM=$((CAPTURE_NUM + 1))
ELAPSED=$(( $(date +%s) - START_EPOCH ))
ELAPSED_MIN=$(( ELAPSED / 60 ))
ELAPSED_SEC=$(( ELAPSED % 60 ))
LABEL=$(printf "%04d" "$CAPTURE_NUM")
echo "[$(date -u '+%H:%M:%S')] Capture #${CAPTURE_NUM} (T+${ELAPSED_MIN}m${ELAPSED_SEC}s)..."
# Run captures in subshells that ignore SIGINT so only the parent catches it
(trap '' SIGINT; capture_karpenter "$OUTPUT_DIR/profiles" "$LABEL") &
PID_KARP=$!
(trap '' SIGINT; capture_etcd "$OUTPUT_DIR/profiles" "$LABEL") &
PID_ETCD=$!
(trap '' SIGINT; capture_kas "$OUTPUT_DIR/profiles" "$LABEL") &
PID_KAS=$!
CAPTURE_PIDS="$PID_KARP $PID_ETCD $PID_KAS"
# Wait for children. If SIGINT arrives, the trap fires (children ignore it).
# The trap sets RUNNING=false and kills children, so we check after wait.
wait $CAPTURE_PIDS 2>/dev/null
CAPTURE_PIDS=""
$RUNNING || break
print_heap_summary "$OUTPUT_DIR/profiles" "$LABEL"
# Stop after max captures if set
if (( MAX_CAPTURES > 0 && CAPTURE_NUM >= MAX_CAPTURES )); then
echo ""
echo "[*] Reached max captures ($MAX_CAPTURES), stopping..."
RUNNING=false
break
fi
# Wait for next interval, checking RUNNING every second
WAITED=0
while (( WAITED < CAPTURE_INTERVAL )) && $RUNNING; do
sleep 1
WAITED=$((WAITED + 1))
done
done
# If we exited the loop via max-captures (not signal), run analysis
if ! $RUNNING; then
run_analysis
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment