detain · April 19, 2026 23:06
diff --git a/calc_system_saturation.sh b/calc_system_saturation.sh
 #!/bin/bash
 # PURPOSE: Compute TOTAL_PRESSURE for VPS placement (lower = more free capacity)
 # Model: IO > CPU > Memory. This is a node-level saturation score combining CPU scheduling pressure,
 # storage latency/load, and memory exhaustion into a single normalized metric.

 # =========================================================
 # CPU BASE SIGNALS (system-wide, multi-core aggregated)
 # =========================================================
 # mpstat already aggregates across all logical CPUs:
 # CPU_IDLE = idle time %, CPU_STEAL = hypervisor time stolen (VM contention)
 # CPU_USAGE converts idle → active utilization
 read CPU_IDLE CPU_STEAL <<< $(mpstat 1 1 | awk '/Average/ {print $NF, $(NF-1)}')
 CPU_USAGE=$(awk -v idle="$CPU_IDLE" 'BEGIN{printf "%.2f",100-idle}')

 # CORES = scheduling capacity baseline used to normalize run queue pressure
 CORES=$(nproc)

 # RUN_QUEUE = number of runnable tasks (from loadavg 1min sample)
 # RUN_QUEUE_NORM = per-core saturation indicator (>=1 means oversubscribed CPU)
 RUN_QUEUE=$(awk '{split($4,a,"/");print a[1]}' /proc/loadavg)
 RUN_QUEUE_NORM=$(awk -v rq="$RUN_QUEUE" -v c="$CORES" 'BEGIN{printf "%.4f", rq/c}')

 # CPU_STEAL_NORM = steal time weighted by actual CPU usage (idle systems shouldn't over-penalize steal)
 CPU_STEAL_NORM=$(awk -v s="$CPU_STEAL" -v u="$CPU_USAGE" 'BEGIN{
    printf "%.4f", (s/100)*(u/100)
 }')

 # CPU_CAPACITY = theoretical raw compute capacity (cores × avg MHz)
 # Used for observability / future heterogeneity scaling (not directly in score yet)
 CPU_MHZ=$(awk -F: '/cpu MHz/ {sum+=$2;n++} END{printf "%.2f", sum/n}' /proc/cpuinfo)
 CPU_CAPACITY=$(awk -v c="$CORES" -v mhz="$CPU_MHZ" 'BEGIN{print c*mhz}')

 # =========================================================
 # MEMORY (system-wide pressure model)
 # =========================================================
 # MemAvailable is used instead of MemFree because it includes reclaimable cache
 # MEM_PRESSURE = fraction of memory under real pressure (0 = free, 1 = exhausted)
 MEM_AVAILABLE=$(awk '/MemAvailable/ {print $2}' /proc/meminfo)
 MEM_TOTAL=$(awk '/MemTotal/ {print $2}' /proc/meminfo)

 # =========================================================
 # IO (aggregate all disks into node-level storage pressure)
 # =========================================================
 # Uses iostat extended stats:
 # r/w = IOPS, await = latency (ms), util = disk saturation %
 # Weighted by IOPS so busy disks dominate signal
 # Includes:
 #   - avg latency (aa)
 #   - worst latency (ma)
 #   - avg utilization (au)
 #   - worst utilization (mu)
 IO_PRESSURE=$(iostat -x 1 2 | awk '
 BEGIN{b=0}
 # detect header boundary and only use second sample (more stable than boot stats)
 $1=="Device"{b++;next}
 b==2 && $1 !~ /^(loop|ram|fd|sr|dm-)/ {
    r=$4; w=$5; await=$10; util=$NF;
    iops=r+w;
    ti+=iops; wa+=await*iops; wu+=util*iops;
    if(await>ma)ma=await;
    if(util>mu)mu=util;
 }
 END{
    if(ti>0){aa=wa/ti;au=wu/ti}else{aa=0;au=0}
    # blend average system behavior with worst-case disk bottleneck
    fa=(aa*0.7)+(ma*0.3);
    fu=(au*0.7)+(mu*0.3);

    # normalize into pressure score (latency + utilization)
    io=(fa/5.0)+(fu/100.0);
    if(io>1)io=1;
    if(io<0)io=0;

    printf "%.4f", io;
 }')

 # =========================================================
 # CPU PRESSURE MODEL (scheduler + contention aware)
 # =========================================================
 # Combines:
 #   u  = utilization (load)
 #   rq = run queue per core (scheduling pressure)
 #   st = steal time (hypervisor contention)
 CPU_PRESSURE=$(awk \
 -v u="$CPU_USAGE" \
 -v rq="$RUN_QUEUE_NORM" \
 -v st="$CPU_STEAL_NORM" '
 BEGIN{
    # CPU pressure reflects saturation + virtualization overhead
    cpu = (u/100)*0.6 + rq*0.3 + st*0.1;

    if(cpu>1)cpu=1;
    if(cpu<0)cpu=0;

    printf "%.4f", cpu;
 }')

 # =========================================================
 # MEMORY PRESSURE MODEL
 # =========================================================
 # Fraction of memory NOT available (higher = worse)
 MEM_PRESSURE=$(awk \
 -v f="$MEM_AVAILABLE" \
 -v t="$MEM_TOTAL" '
 BEGIN{
    m = 1-(f/t);
    if(m>1)m=1;
    if(m<0)m=0;
    printf "%.4f", m;
 }')

 # =========================================================
 # TOTAL NODE PRESSURE SCORE
 # =========================================================
 # Weighted system-wide saturation model:
 # IO dominates because storage latency is typically first bottleneck in VPS environments
 # CPU is secondary scheduling constraint
 # Memory is tertiary but still critical for stability
 TOTAL_PRESSURE=$(awk \
 -v io="$IO_PRESSURE" \
 -v cpu="$CPU_PRESSURE" \
 -v mem="$MEM_PRESSURE" '
 BEGIN{
    total = (io*0.5) + (cpu*0.3) + (mem*0.2);
    printf "%.4f", total;
 }')

 # =========================================================
 # OUTPUT (raw + normalized signals for external scheduler ingestion)
 # =========================================================
 echo "CORES=$CORES CPU_MHZ=$CPU_MHZ CPU_CAPACITY=$CPU_CAPACITY"
 echo "CPU_USAGE=$CPU_USAGE CPU_STEAL=$CPU_STEAL_NORM RUN_QUEUE=$RUN_QUEUE RUN_QUEUE_NORM=$RUN_QUEUE_NORM"
 echo "IO_PRESSURE=$IO_PRESSURE"
 echo "MEM_AVAILABLE=$MEM_AVAILABLE MEM_TOTAL=$MEM_TOTAL"
 echo "CPU_PRESSURE=$CPU_PRESSURE"
 echo "MEM_PRESSURE=$MEM_PRESSURE"
 echo "TOTAL_PRESSURE=$TOTAL_PRESSURE"
	#!/bin/bash
	# PURPOSE: Compute TOTAL_PRESSURE for VPS placement (lower = more free capacity)
	# Model: IO > CPU > Memory. This is a node-level saturation score combining CPU scheduling pressure,
	# storage latency/load, and memory exhaustion into a single normalized metric.

	# =========================================================
	# CPU BASE SIGNALS (system-wide, multi-core aggregated)
	# =========================================================
	# mpstat already aggregates across all logical CPUs:
	# CPU_IDLE = idle time %, CPU_STEAL = hypervisor time stolen (VM contention)
	# CPU_USAGE converts idle → active utilization
	read CPU_IDLE CPU_STEAL <<< $(mpstat 1 1 \| awk '/Average/ {print $NF, $(NF-1)}')
	CPU_USAGE=$(awk -v idle="$CPU_IDLE" 'BEGIN{printf "%.2f",100-idle}')

	# CORES = scheduling capacity baseline used to normalize run queue pressure
	CORES=$(nproc)

	# RUN_QUEUE = number of runnable tasks (from loadavg 1min sample)
	# RUN_QUEUE_NORM = per-core saturation indicator (>=1 means oversubscribed CPU)
	RUN_QUEUE=$(awk '{split($4,a,"/");print a[1]}' /proc/loadavg)
	RUN_QUEUE_NORM=$(awk -v rq="$RUN_QUEUE" -v c="$CORES" 'BEGIN{printf "%.4f", rq/c}')

	# CPU_STEAL_NORM = steal time weighted by actual CPU usage (idle systems shouldn't over-penalize steal)
	CPU_STEAL_NORM=$(awk -v s="$CPU_STEAL" -v u="$CPU_USAGE" 'BEGIN{
	printf "%.4f", (s/100)*(u/100)
	}')

	# CPU_CAPACITY = theoretical raw compute capacity (cores × avg MHz)
	# Used for observability / future heterogeneity scaling (not directly in score yet)
	CPU_MHZ=$(awk -F: '/cpu MHz/ {sum+=$2;n++} END{printf "%.2f", sum/n}' /proc/cpuinfo)
	CPU_CAPACITY=$(awk -v c="$CORES" -v mhz="$CPU_MHZ" 'BEGIN{print c*mhz}')

	# =========================================================
	# MEMORY (system-wide pressure model)
	# =========================================================
	# MemAvailable is used instead of MemFree because it includes reclaimable cache
	# MEM_PRESSURE = fraction of memory under real pressure (0 = free, 1 = exhausted)
	MEM_AVAILABLE=$(awk '/MemAvailable/ {print $2}' /proc/meminfo)
	MEM_TOTAL=$(awk '/MemTotal/ {print $2}' /proc/meminfo)

	# =========================================================
	# IO (aggregate all disks into node-level storage pressure)
	# =========================================================
	# Uses iostat extended stats:
	# r/w = IOPS, await = latency (ms), util = disk saturation %
	# Weighted by IOPS so busy disks dominate signal
	# Includes:
	# - avg latency (aa)
	# - worst latency (ma)
	# - avg utilization (au)
	# - worst utilization (mu)
	IO_PRESSURE=$(iostat -x 1 2 \| awk '
	BEGIN{b=0}
	# detect header boundary and only use second sample (more stable than boot stats)
	$1=="Device"{b++;next}
	b==2 && $1 !~ /^(loop\|ram\|fd\|sr\|dm-)/ {
	r=$4; w=$5; await=$10; util=$NF;
	iops=r+w;
	ti+=iops; wa+=awaitiops; wu+=utiliops;
	if(await>ma)ma=await;
	if(util>mu)mu=util;
	}
	END{
	if(ti>0){aa=wa/ti;au=wu/ti}else{aa=0;au=0}
	# blend average system behavior with worst-case disk bottleneck
	fa=(aa0.7)+(ma0.3);
	fu=(au0.7)+(mu0.3);

	# normalize into pressure score (latency + utilization)
	io=(fa/5.0)+(fu/100.0);
	if(io>1)io=1;
	if(io<0)io=0;

	printf "%.4f", io;
	}')

	# =========================================================
	# CPU PRESSURE MODEL (scheduler + contention aware)
	# =========================================================
	# Combines:
	# u = utilization (load)
	# rq = run queue per core (scheduling pressure)
	# st = steal time (hypervisor contention)
	CPU_PRESSURE=$(awk \
	-v u="$CPU_USAGE" \
	-v rq="$RUN_QUEUE_NORM" \
	-v st="$CPU_STEAL_NORM" '
	BEGIN{
	# CPU pressure reflects saturation + virtualization overhead
	cpu = (u/100)0.6 + rq0.3 + st*0.1;

	if(cpu>1)cpu=1;
	if(cpu<0)cpu=0;

	printf "%.4f", cpu;
	}')

	# =========================================================
	# MEMORY PRESSURE MODEL
	# =========================================================
	# Fraction of memory NOT available (higher = worse)
	MEM_PRESSURE=$(awk \
	-v f="$MEM_AVAILABLE" \
	-v t="$MEM_TOTAL" '
	BEGIN{
	m = 1-(f/t);
	if(m>1)m=1;
	if(m<0)m=0;
	printf "%.4f", m;
	}')

	# =========================================================
	# TOTAL NODE PRESSURE SCORE
	# =========================================================
	# Weighted system-wide saturation model:
	# IO dominates because storage latency is typically first bottleneck in VPS environments
	# CPU is secondary scheduling constraint
	# Memory is tertiary but still critical for stability
	TOTAL_PRESSURE=$(awk \
	-v io="$IO_PRESSURE" \
	-v cpu="$CPU_PRESSURE" \
	-v mem="$MEM_PRESSURE" '
	BEGIN{
	total = (io0.5) + (cpu0.3) + (mem*0.2);
	printf "%.4f", total;
	}')

	# =========================================================
	# OUTPUT (raw + normalized signals for external scheduler ingestion)
	# =========================================================
	echo "CORES=$CORES CPU_MHZ=$CPU_MHZ CPU_CAPACITY=$CPU_CAPACITY"
	echo "CPU_USAGE=$CPU_USAGE CPU_STEAL=$CPU_STEAL_NORM RUN_QUEUE=$RUN_QUEUE RUN_QUEUE_NORM=$RUN_QUEUE_NORM"
	echo "IO_PRESSURE=$IO_PRESSURE"
	echo "MEM_AVAILABLE=$MEM_AVAILABLE MEM_TOTAL=$MEM_TOTAL"
	echo "CPU_PRESSURE=$CPU_PRESSURE"
	echo "MEM_PRESSURE=$MEM_PRESSURE"
	echo "TOTAL_PRESSURE=$TOTAL_PRESSURE"
No results found