Created
May 23, 2025 16:16
-
-
Save LouisFaure/db4e108c026a579f16d2ab5c85f30656 to your computer and use it in GitHub Desktop.
Check available resources in SLURM node
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # This script parses the output of 'scontrol show node <node_name>' | |
| # to determine the available CPU, memory, and GPUs for a new task. | |
| # Check if a node name is provided as an argument | |
| if [ -z "$1" ]; then | |
| echo "Usage: $0 <node_name>" | |
| echo "Example: $0 iscg009" | |
| exit 1 | |
| fi | |
| NODE_NAME="$1" | |
| echo "Fetching scontrol data for node: $NODE_NAME..." | |
| # Execute scontrol and capture its output | |
| SCONTROL_OUTPUT=$(scontrol show node "$NODE_NAME" 2>&1) | |
| # Check if scontrol command was successful | |
| if [ $? -ne 0 ]; then | |
| echo "Error running 'scontrol show node $NODE_NAME'. Please ensure the node name is correct and you have appropriate permissions." | |
| echo "Scontrol output:\n$SCONTROL_OUTPUT" | |
| exit 1 | |
| fi | |
| # --- Parse CPU information --- | |
| CPU_EFFECTIVE=$(echo "$SCONTROL_OUTPUT" | grep -oP 'CPUEfctv=\K[0-9]+') | |
| CPU_ALLOCATED=$(echo "$SCONTROL_OUTPUT" | grep -oP 'CPUAlloc=\K[0-9]+') | |
| # --- Parse Memory information --- | |
| MEM_CFGTRES_RAW=$(echo "$SCONTROL_OUTPUT" | grep -oP 'CfgTRES=.*mem=\K[0-9]+[MG]?') | |
| MEM_ALLOCTRES_RAW=$(echo "$SCONTROL_OUTPUT" | grep -oP 'AllocTRES=.*mem=\K[0-9]+[MG]?') | |
| # --- Parse GPU information --- | |
| # Gres=gpu:h100:8(S:0-1) -> Total GPUs | |
| # CfgTRES=...,gres/gpu=8,... -> Total GPUs (alternative, could be missing type) | |
| # AllocTRES=...,gres/gpu=6,... -> Allocated GPUs (also could be missing type) | |
| # Prioritize parsing from CfgTRES and AllocTRES for consistency with other TRES values. | |
| # CfgTRES=cpu=56,mem=1031308M,billing=56,gres/gpu=8,gres/gpu:h100=8 | |
| # AllocTRES=cpu=33,mem=972G,gres/gpu=6,gres/gpu:h100=6 | |
| # Extract total configured GPUs from CfgTRES | |
| # Looks for "gres/gpu=<number>" or "gres/gpu:<type>=<number>" if specific type is needed. | |
| # Let's try to get the general 'gres/gpu=<num>' first. | |
| TOTAL_GPUS_CFG=$(echo "$SCONTROL_OUTPUT" | grep -oP 'CfgTRES=.*gres/gpu=\K[0-9]+' | head -n 1) # Added head -n 1 in case it appears twice | |
| # Extract allocated GPUs from AllocTRES | |
| ALLOCATED_GPUS=$(echo "$SCONTROL_OUTPUT" | grep -oP 'AllocTRES=.*gres/gpu=\K[0-9]+' | head -n 1) # Added head -n 1 | |
| # Fallback: If CfgTRES 'gres/gpu' is not found, try 'Gres=' line for total GPUs. | |
| # This assumes the format "Gres=gpu:type:count" or similar. | |
| if [ -z "$TOTAL_GPUS_CFG" ]; then | |
| GRES_LINE=$(echo "$SCONTROL_OUTPUT" | grep -oP 'Gres=\K.*') | |
| if [[ "$GRES_LINE" =~ gpu:[^:]*([0-9]+) ]]; then | |
| TOTAL_GPUS_CFG="${BASH_REMATCH[1]}" | |
| fi | |
| fi | |
| # Default to 0 if no GPU information is found | |
| TOTAL_GPUS_CFG=${TOTAL_GPUS_CFG:-0} | |
| ALLOCATED_GPUS=${ALLOCATED_GPUS:-0} | |
| # Function to convert memory string (e.g., "972G", "1031308M") to MB | |
| convert_mem_to_mb() { | |
| local mem_str="$1" | |
| local value | |
| local unit | |
| if [[ "$mem_str" =~ ([0-9]+)([MG]) ]]; then | |
| value="${BASH_REMATCH[1]}" | |
| unit="${BASH_REMATCH[2]}" | |
| elif [[ "$mem_str" =~ ([0-9]+) ]]; then # Assume MB if no unit | |
| value="${BASH_REMATCH[1]}" | |
| unit="M" | |
| else | |
| echo "Error: Could not parse memory string '$mem_str'" >&2 | |
| echo "0" | |
| return | |
| fi | |
| if [ "$unit" == "G" ]; then | |
| echo "$((value * 1024))" | |
| else # Assume M (MB) | |
| echo "$value" | |
| fi | |
| } | |
| MEM_CFGTRES_MB=$(convert_mem_to_mb "$MEM_CFGTRES_RAW") | |
| MEM_ALLOCTRES_MB=$(convert_mem_to_mb "$MEM_ALLOCTRES_RAW") | |
| # --- Calculate Free Resources --- | |
| FREE_CPU=0 | |
| if [[ -n "$CPU_EFFECTIVE" && -n "$CPU_ALLOCATED" ]]; then | |
| FREE_CPU=$((CPU_EFFECTIVE - CPU_ALLOCATED)) | |
| fi | |
| FREE_MEMORY_MB=0 | |
| if [[ -n "$MEM_CFGTRES_MB" && -n "$MEM_ALLOCTRES_MB" ]]; then | |
| FREE_MEMORY_MB=$((MEM_CFGTRES_MB - MEM_ALLOCTRES_MB)) | |
| fi | |
| FREE_GPUS=0 | |
| if [[ -n "$TOTAL_GPUS_CFG" && -n "$ALLOCATED_GPUS" ]]; then | |
| FREE_GPUS=$((TOTAL_GPUS_CFG - ALLOCATED_GPUS)) | |
| fi | |
| # Convert free memory to GB for readability | |
| FREE_MEMORY_GB=$(awk "BEGIN {printf \"%.2f\", $FREE_MEMORY_MB / 1024}") | |
| # --- Output Results --- | |
| echo "--------------------------------------------------" | |
| echo "Resource Availability for Node: $NODE_NAME" | |
| echo "--------------------------------------------------" | |
| echo "Total Effective CPUs: $CPU_EFFECTIVE" | |
| echo "Allocated CPUs: $CPU_ALLOCATED" | |
| echo "Free CPUs: $FREE_CPU" | |
| echo "" | |
| echo "Total Configured Memory: $(printf "%.2fG" "$(awk "BEGIN {printf \"%.2f\", $MEM_CFGTRES_MB / 1024}")") ($MEM_CFGTRES_MB MB)" | |
| echo "Allocated Memory: $(printf "%.2fG" "$(awk "BEGIN {printf \"%.2f\", $MEM_ALLOCTRES_MB / 1024}")") ($MEM_ALLOCTRES_MB MB)" | |
| echo "Free Memory: ${FREE_MEMORY_GB}G ($FREE_MEMORY_MB MB)" | |
| echo "" | |
| echo "Total Configured GPUs: $TOTAL_GPUS_CFG" | |
| echo "Allocated GPUs: $ALLOCATED_GPUS" | |
| echo "Free GPUs: $FREE_GPUS" | |
| echo "--------------------------------------------------" | |
| # Example usage for checking specific resource needs: | |
| # if (( FREE_CPU >= 1 && FREE_MEMORY_MB >= 4096 && FREE_GPUS >= 1 )); then | |
| # echo "Node has at least 1 CPU, 4GB memory, and 1 GPU available." | |
| # else | |
| # echo "Node does NOT meet the minimum requirements of 1 CPU, 4GB memory, and 1 GPU." | |
| # fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment