Skip to content

Instantly share code, notes, and snippets.

@AlexVanderbist
Last active May 15, 2025 07:10
Show Gist options
  • Save AlexVanderbist/4b7978ce73b569bedb1ae3f4eb91caa2 to your computer and use it in GitHub Desktop.
Save AlexVanderbist/4b7978ce73b569bedb1ae3f4eb91caa2 to your computer and use it in GitHub Desktop.
Monitor Horizon memory usage

Example output:

Current Memory Usage of 'horizon:work' Processes:
PID: 753786, Memory Usage: 93.9414 MB
PID: 753805, Memory Usage: 93.8633 MB
PID: 753839, Memory Usage: 93.8438 MB
PID: 823499, Memory Usage: 96.0391 MB
PID: 823657, Memory Usage: 184.316 MB
PID: 823658, Memory Usage: 99.6406 MB
PID: 823947, Memory Usage: 98.0547 MB
PID: 824555, Memory Usage: 97.4531 MB
PID: 825144, Memory Usage: 126.383 MB
PID: 825641, Memory Usage: 99.832 MB
PID: 825917, Memory Usage: 98.7461 MB
PID: 826417, Memory Usage: 96.2305 MB
Maximum Memory Usage: 484.316 MB
Current Average Memory Usage: 106.52 MB
Overall Average Memory Usage: 233.79 MB
#!/bin/bash
# --- Configuration ---
PROCESS_NAME_PATTERN="horizon:work"
SLEEP_INTERVAL=1 # Seconds
VERBOSE_PID_MEMORY=true # Set to false to hide individual PID memory usage
USE_CLEAR=true # Set to true to clear screen on each update
# --- Initialization ---
# Overall statistics accumulators
overall_max_memory=0.00
overall_total_memory=0.00
overall_count_readings=0
overall_sum_of_squares=0.00
# Set LC_NUMERIC to C to ensure consistent decimal point (e.g., for awk, sort)
export LC_NUMERIC=C
echo "Starting memory monitoring for processes containing '$PROCESS_NAME_PATTERN'..."
echo "Press Ctrl+C to stop."
# Give a moment for the initial message to be read before the first clear
sleep 2
# --- Main Loop ---
while true; do
if [ "$USE_CLEAR" = true ]; then
clear
fi
current_timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo # Add a blank line for readability
echo "=== Memory Usage Statistics ($current_timestamp) ==="
# Get PIDs of processes matching the pattern
# mapfile reads lines into an array. Handles no PIDs gracefully (array will be empty).
mapfile -t pids < <(pgrep -f "$PROCESS_NAME_PATTERN")
# --- Current Iteration Variables ---
current_total_memory=0.00
current_process_count=0
current_memory_values=() # Array for current iteration's memory values for percentiles
if [ ${#pids[@]} -eq 0 ]; then
echo "No processes found matching '$PROCESS_NAME_PATTERN'."
current_average_memory_display="0.00"
p90_memory_current_display="0.00"
p95_memory_current_display="0.00"
else
if [ "$VERBOSE_PID_MEMORY" = true ]; then
echo "--- Individual Process Memory (VmRSS) ---"
fi
for pid in "${pids[@]}"; do
# Check if status file is readable and process likely still exists
if [ -r "/proc/$pid/status" ]; then
# Get VmRSS in kB from /proc/[pid]/status
# Suppress errors if proc disappears between pgrep and awk
mem_usage_kb=$(awk '/VmRSS/{print $2}' "/proc/$pid/status" 2>/dev/null)
if [ -n "$mem_usage_kb" ] && [[ "$mem_usage_kb" =~ ^[0-9]+$ ]]; then
# Perform calculations and updates in a single awk call for efficiency
read -r mem_usage_mb \
current_total_memory \
overall_total_memory \
overall_sum_of_squares \
overall_max_memory < <(awk -v kb="$mem_usage_kb" \
-v ctm="$current_total_memory" \
-v otm="$overall_total_memory" \
-v oss="$overall_sum_of_squares" \
-v omaxm="$overall_max_memory" \
'BEGIN{
mb = kb/1024;
new_ctm = ctm + mb;
new_otm = otm + mb;
new_oss = oss + (mb*mb);
new_omaxm = (mb > omaxm) ? mb : omaxm;
printf "%.2f %.2f %.2f %.2f %.2f\n", mb, new_ctm, new_otm, new_oss, new_omaxm;
}')
if [ "$VERBOSE_PID_MEMORY" = true ]; then
echo "PID: $pid, Memory: $mem_usage_mb MB"
fi
current_process_count=$((current_process_count + 1))
current_memory_values+=("$mem_usage_mb")
overall_count_readings=$((overall_count_readings + 1))
else
if [ "$VERBOSE_PID_MEMORY" = true ]; then
echo "PID: $pid, Memory: Could not read VmRSS."
fi
fi
else
if [ "$VERBOSE_PID_MEMORY" = true ]; then
echo "PID: $pid, Memory: /proc/$pid/status not readable (process may have exited)."
fi
fi
done # End of PID loop
# --- Calculate Current Iteration Statistics ---
if [ "$current_process_count" -gt 0 ]; then
current_average_memory_display=$(awk -v total="$current_total_memory" -v count="$current_process_count" 'BEGIN{printf "%.2f", total/count}')
# Percentile calculations
# Removed 'local' as these are not in a function context here
p90_val_temp=0
p95_val_temp=0
if [ "$current_process_count" -eq 1 ]; then
p90_val_temp=${current_memory_values[0]}
p95_val_temp=${current_memory_values[0]}
else # More than 1 element
# Sort values numerically for percentile calculation
IFS=$'\n' read -r -d '' -a sorted_current_memory_values < <(printf "%s\n" "${current_memory_values[@]}" | sort -n)
unset IFS
# P90 - using R-7, NIST recommended method: rank k = ceil(P * N)
# Index in 0-based array is k-1
p90_rank=$(awk -v p="0.90" -v c="$current_process_count" 'BEGIN { r = p * c; if (c == 0) {print 0; exit} rank = int(r); if (r > rank) rank++; if (rank==0 && p>0 && c>0) rank=1; if (rank > c) rank = c; print rank }')
p90_idx=$((p90_rank - 1))
# Boundary checks for index
if [ "$p90_idx" -lt 0 ]; then p90_idx=0; fi
# No need to check p90_idx >= current_process_count if rank is capped at c
p90_val_temp=${sorted_current_memory_values[$p90_idx]:-0}
# P95
p95_rank=$(awk -v p="0.95" -v c="$current_process_count" 'BEGIN { r = p * c; if (c == 0) {print 0; exit} rank = int(r); if (r > rank) rank++; if (rank==0 && p>0 && c>0) rank=1; if (rank > c) rank = c; print rank }')
p95_idx=$((p95_rank - 1))
if [ "$p95_idx" -lt 0 ]; then p95_idx=0; fi
p95_val_temp=${sorted_current_memory_values[$p95_idx]:-0}
fi
p90_memory_current_display=$(printf "%.2f" "$p90_val_temp")
p95_memory_current_display=$(printf "%.2f" "$p95_val_temp")
else # No processes with readable status found in this iteration
current_average_memory_display="0.00"
p90_memory_current_display="0.00"
p95_memory_current_display="0.00"
if [ ${#pids[@]} -gt 0 ]; then
echo "No matching processes with readable memory status in this iteration."
fi
fi
fi # End of if pids found
# --- Display Current Iteration Statistics ---
echo "--- Current Batch Statistics ---"
echo "Processes Found this Iteration: $current_process_count"
echo "Average Memory (current batch): $current_average_memory_display MB"
echo "P90 Memory (current batch): $p90_memory_current_display MB"
echo "P95 Memory (current batch): $p95_memory_current_display MB"
# --- Calculate and Display Overall Statistics ---
overall_average_memory_display="0.00"
overall_std_dev_display="0.00"
overall_max_memory_display=$(printf "%.2f" "$overall_max_memory")
if [ "$overall_count_readings" -gt 0 ]; then
read -r overall_average_memory_display overall_std_dev_display < <(awk -v total="$overall_total_memory" \
-v count="$overall_count_readings" \
-v sum_sq="$overall_sum_of_squares" \
'BEGIN{
avg = total/count;
variance = sum_sq/count - avg*avg;
if (variance < 1e-9) variance = 0;
std_dev = sqrt(variance);
printf "%.2f %.2f\n", avg, std_dev;
}')
fi
echo "--- Overall Statistics (cumulative) ---"
echo "Total Samples Collected: $overall_count_readings"
echo "Overall Average Memory: $overall_average_memory_display MB"
echo "Maximum Memory Seen: $overall_max_memory_display MB"
echo "Overall Standard Deviation: $overall_std_dev_display MB"
echo "---------------------------------------------"
sleep "$SLEEP_INTERVAL"
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment