Skip to content

Instantly share code, notes, and snippets.

@detain
Created April 19, 2026 23:06
Show Gist options
  • Select an option

  • Save detain/cb164e116d5cdad70dd3a527ff5c7021 to your computer and use it in GitHub Desktop.

Select an option

Save detain/cb164e116d5cdad70dd3a527ff5c7021 to your computer and use it in GitHub Desktop.
The script measures node resource pressure for VPS placement by combining CPU usage, steal time, run queue, memory availability, and aggregated disk I/O latency/utilization into normalized scores, then computes a weighted TOTAL_PRESSURE (IO > CPU > MEM) indicating system saturation.
#!/bin/bash
# PURPOSE: Compute TOTAL_PRESSURE for VPS placement (lower = more free capacity)
# Model: IO > CPU > Memory. This is a node-level saturation score combining CPU scheduling pressure,
# storage latency/load, and memory exhaustion into a single normalized metric.
# =========================================================
# CPU BASE SIGNALS (system-wide, multi-core aggregated)
# =========================================================
# mpstat already aggregates across all logical CPUs:
# CPU_IDLE = idle time %, CPU_STEAL = hypervisor time stolen (VM contention)
# CPU_USAGE converts idle → active utilization
read CPU_IDLE CPU_STEAL <<< $(mpstat 1 1 | awk '/Average/ {print $NF, $(NF-1)}')
CPU_USAGE=$(awk -v idle="$CPU_IDLE" 'BEGIN{printf "%.2f",100-idle}')
# CORES = scheduling capacity baseline used to normalize run queue pressure
CORES=$(nproc)
# RUN_QUEUE = number of runnable tasks (from loadavg 1min sample)
# RUN_QUEUE_NORM = per-core saturation indicator (>=1 means oversubscribed CPU)
RUN_QUEUE=$(awk '{split($4,a,"/");print a[1]}' /proc/loadavg)
RUN_QUEUE_NORM=$(awk -v rq="$RUN_QUEUE" -v c="$CORES" 'BEGIN{printf "%.4f", rq/c}')
# CPU_STEAL_NORM = steal time weighted by actual CPU usage (idle systems shouldn't over-penalize steal)
CPU_STEAL_NORM=$(awk -v s="$CPU_STEAL" -v u="$CPU_USAGE" 'BEGIN{
printf "%.4f", (s/100)*(u/100)
}')
# CPU_CAPACITY = theoretical raw compute capacity (cores × avg MHz)
# Used for observability / future heterogeneity scaling (not directly in score yet)
CPU_MHZ=$(awk -F: '/cpu MHz/ {sum+=$2;n++} END{printf "%.2f", sum/n}' /proc/cpuinfo)
CPU_CAPACITY=$(awk -v c="$CORES" -v mhz="$CPU_MHZ" 'BEGIN{print c*mhz}')
# =========================================================
# MEMORY (system-wide pressure model)
# =========================================================
# MemAvailable is used instead of MemFree because it includes reclaimable cache
# MEM_PRESSURE = fraction of memory under real pressure (0 = free, 1 = exhausted)
MEM_AVAILABLE=$(awk '/MemAvailable/ {print $2}' /proc/meminfo)
MEM_TOTAL=$(awk '/MemTotal/ {print $2}' /proc/meminfo)
# =========================================================
# IO (aggregate all disks into node-level storage pressure)
# =========================================================
# Uses iostat extended stats:
# r/w = IOPS, await = latency (ms), util = disk saturation %
# Weighted by IOPS so busy disks dominate signal
# Includes:
# - avg latency (aa)
# - worst latency (ma)
# - avg utilization (au)
# - worst utilization (mu)
IO_PRESSURE=$(iostat -x 1 2 | awk '
BEGIN{b=0}
# detect header boundary and only use second sample (more stable than boot stats)
$1=="Device"{b++;next}
b==2 && $1 !~ /^(loop|ram|fd|sr|dm-)/ {
r=$4; w=$5; await=$10; util=$NF;
iops=r+w;
ti+=iops; wa+=await*iops; wu+=util*iops;
if(await>ma)ma=await;
if(util>mu)mu=util;
}
END{
if(ti>0){aa=wa/ti;au=wu/ti}else{aa=0;au=0}
# blend average system behavior with worst-case disk bottleneck
fa=(aa*0.7)+(ma*0.3);
fu=(au*0.7)+(mu*0.3);
# normalize into pressure score (latency + utilization)
io=(fa/5.0)+(fu/100.0);
if(io>1)io=1;
if(io<0)io=0;
printf "%.4f", io;
}')
# =========================================================
# CPU PRESSURE MODEL (scheduler + contention aware)
# =========================================================
# Combines:
# u = utilization (load)
# rq = run queue per core (scheduling pressure)
# st = steal time (hypervisor contention)
CPU_PRESSURE=$(awk \
-v u="$CPU_USAGE" \
-v rq="$RUN_QUEUE_NORM" \
-v st="$CPU_STEAL_NORM" '
BEGIN{
# CPU pressure reflects saturation + virtualization overhead
cpu = (u/100)*0.6 + rq*0.3 + st*0.1;
if(cpu>1)cpu=1;
if(cpu<0)cpu=0;
printf "%.4f", cpu;
}')
# =========================================================
# MEMORY PRESSURE MODEL
# =========================================================
# Fraction of memory NOT available (higher = worse)
MEM_PRESSURE=$(awk \
-v f="$MEM_AVAILABLE" \
-v t="$MEM_TOTAL" '
BEGIN{
m = 1-(f/t);
if(m>1)m=1;
if(m<0)m=0;
printf "%.4f", m;
}')
# =========================================================
# TOTAL NODE PRESSURE SCORE
# =========================================================
# Weighted system-wide saturation model:
# IO dominates because storage latency is typically first bottleneck in VPS environments
# CPU is secondary scheduling constraint
# Memory is tertiary but still critical for stability
TOTAL_PRESSURE=$(awk \
-v io="$IO_PRESSURE" \
-v cpu="$CPU_PRESSURE" \
-v mem="$MEM_PRESSURE" '
BEGIN{
total = (io*0.5) + (cpu*0.3) + (mem*0.2);
printf "%.4f", total;
}')
# =========================================================
# OUTPUT (raw + normalized signals for external scheduler ingestion)
# =========================================================
echo "CORES=$CORES CPU_MHZ=$CPU_MHZ CPU_CAPACITY=$CPU_CAPACITY"
echo "CPU_USAGE=$CPU_USAGE CPU_STEAL=$CPU_STEAL_NORM RUN_QUEUE=$RUN_QUEUE RUN_QUEUE_NORM=$RUN_QUEUE_NORM"
echo "IO_PRESSURE=$IO_PRESSURE"
echo "MEM_AVAILABLE=$MEM_AVAILABLE MEM_TOTAL=$MEM_TOTAL"
echo "CPU_PRESSURE=$CPU_PRESSURE"
echo "MEM_PRESSURE=$MEM_PRESSURE"
echo "TOTAL_PRESSURE=$TOTAL_PRESSURE"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment