Created
May 30, 2025 16:47
-
-
Save LouisFaure/bd6001026d2e207f6b874d09264dc9ea to your computer and use it in GitHub Desktop.
Check all node resources
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # --- Configuration --- | |
| PARTITION_NAME="componc_gpu" # Define the partition name here | |
| # --- Function to convert memory string (e.g., "972G", "1031308M") to MB --- | |
| convert_mem_to_mb() { | |
| local mem_str="$1" | |
| local value | |
| local unit | |
| if [[ "$mem_str" =~ ([0-9]+)([MG]) ]]; then | |
| value="${BASH_REMATCH[1]}" | |
| unit="${BASH_REMATCH[2]}" | |
| elif [[ "$mem_str" =~ ([0-9]+) ]]; then # Assume MB if no unit | |
| value="${BASH_REMATCH[1]}" | |
| unit="M" | |
| else | |
| # Handle cases where memory string might be just "0" or empty | |
| echo "0" | |
| return | |
| fi | |
| if [ "$unit" == "G" ]; then | |
| echo "$((value * 1024))" | |
| else # Assume M (MB) | |
| echo "$value" | |
| fi | |
| } | |
| # --- Function to fetch and process data for a single node --- | |
| # This function will now return a space-separated string of values | |
| get_node_data() { | |
| local NODE_NAME="$1" | |
| SCONTROL_OUTPUT=$(scontrol show node "$NODE_NAME" 2>&1) | |
| if [ $? -ne 0 ]; then | |
| # Return a default set of values for error | |
| # NodeName State CPU_T CPU_A CPU_F Mem_T Mem_A Mem_F GPU_T GPU_A GPU_F | |
| echo "$NODE_NAME ERROR N/A N/A N/A N/A N/A N/A N/A N/A N/A" | |
| return | |
| fi | |
| # --- Parse CPU information --- | |
| CPU_EFFECTIVE=$(echo "$SCONTROL_OUTPUT" | grep -oP 'CPUEfctv=\K[0-9]+' | head -n 1) | |
| CPU_ALLOCATED=$(echo "$SCONTROL_OUTPUT" | grep -oP 'CPUAlloc=\K[0-9]+' | head -n 1) | |
| # --- Parse Memory information --- | |
| MEM_CFGTRES_RAW=$(echo "$SCONTROL_OUTPUT" | grep -oP 'CfgTRES=.*mem=\K[0-9]+[MG]?' | head -n 1) | |
| MEM_ALLOCTRES_RAW=$(echo "$SCONTROL_OUTPUT" | grep -oP 'AllocTRES=.*mem=\K[0-9]+[MG]?' | head -n 1) | |
| MEM_CFGTRES_MB=$(convert_mem_to_mb "$MEM_CFGTRES_RAW") | |
| MEM_ALLOCTRES_MB=$(convert_mem_to_mb "$MEM_ALLOCTRES_RAW") | |
| # --- Parse GPU information --- | |
| TOTAL_GPUS_CFG=$(echo "$SCONTROL_OUTPUT" | grep -oP 'CfgTRES=.*gres/gpu=\K[0-9]+' | head -n 1) | |
| ALLOCATED_GPUS=$(echo "$SCONTROL_OUTPUT" | grep -oP 'AllocTRES=.*gres/gpu=\K[0-9]+' | head -n 1) | |
| # Fallback to Gres= line if CfgTRES 'gres/gpu' is not found | |
| if [ -z "$TOTAL_GPUS_CFG" ]; then | |
| GRES_LINE=$(echo "$SCONTROL_OUTPUT" | grep -oP 'Gres=\K.*' | head -n 1) | |
| if [[ "$GRES_LINE" =~ gpu:[^:]*([0-9]+) ]]; then | |
| TOTAL_GPUS_CFG="${BASH_REMATCH[1]}" | |
| fi | |
| fi | |
| TOTAL_GPUS_CFG=${TOTAL_GPUS_CFG:-0} | |
| ALLOCATED_GPUS=${ALLOCATED_GPUS:-0} | |
| # --- Calculate Free Resources --- | |
| FREE_CPU=0 | |
| if [[ -n "$CPU_EFFECTIVE" && -n "$CPU_ALLOCATED" ]]; then | |
| FREE_CPU=$((CPU_EFFECTIVE - CPU_ALLOCATED)) | |
| fi | |
| FREE_MEMORY_MB=0 | |
| if [[ -n "$MEM_CFGTRES_MB" && -n "$MEM_ALLOCTRES_MB" ]]; then | |
| FREE_MEMORY_MB=$((MEM_CFGTRES_MB - MEM_ALLOCTRES_MB)) | |
| fi | |
| FREE_GPUS=0 | |
| if [[ -n "$TOTAL_GPUS_CFG" && -n "$ALLOCATED_GPUS" ]]; then | |
| FREE_GPUS=$((TOTAL_GPUS_CFG - ALLOCATED_GPUS)) | |
| fi | |
| # Determine node state | |
| NODE_STATE=$(echo "$SCONTROL_OUTPUT" | grep -oP 'State=\K[A-Za-z]+\s*' | head -n 1) | |
| NODE_STATE_CLEANED=$(echo "$NODE_STATE" | xargs) # Trim whitespace | |
| # Format memory in GB for output | |
| MEM_CFGTRES_GB=$(awk "BEGIN {printf \"%.2f\", ${MEM_CFGTRES_MB:-0} / 1024}") | |
| MEM_ALLOCTRES_GB=$(awk "BEGIN {printf \"%.2f\", ${MEM_ALLOCTRES_MB:-0} / 1024}") | |
| FREE_MEMORY_GB=$(awk "BEGIN {printf \"%.2f\", ${FREE_MEMORY_MB:-0} / 1024}") | |
| # Output the collected data as a space-separated string | |
| echo "$NODE_NAME $NODE_STATE_CLEANED ${CPU_EFFECTIVE:-N/A} ${CPU_ALLOCATED:-N/A} ${FREE_CPU:-N/A} ${MEM_CFGTRES_GB:-N/A} ${MEM_ALLOCTRES_GB:-N/A} ${FREE_MEMORY_GB:-N/A} ${TOTAL_GPUS_CFG:-N/A} ${ALLOCATED_GPUS:-N/A} ${FREE_GPUS:-N/A}" | |
| } | |
| # --- Main Script Logic --- | |
| echo "Gathering resource information for all nodes in partition '$PARTITION_NAME'..." | |
| echo "States considered 'available': IDLE, MIXED, ALLOCATED (DRAINED are excluded)." | |
| echo "" | |
| # Get a list of all nodes in the specified partition that are not 'drained'. | |
| NODES_RAW=$(sinfo -p "$PARTITION_NAME" -t idle,alloc,mixed -h -o "%N" 2>/dev/null) | |
| processed_nodes=() | |
| OLD_IFS=$IFS | |
| IFS=',' | |
| for component in $NODES_RAW; do | |
| if [[ $component =~ ^([a-zA-Z]+)\[([0-9]+)-([0-9]+)\]$ ]]; then | |
| prefix="${BASH_REMATCH[1]}" | |
| start_num=$((10#${BASH_REMATCH[2]})) | |
| end_num=$((10#${BASH_REMATCH[3]})) | |
| padding_length=${#BASH_REMATCH[2]} | |
| for (( i=start_num; i<=end_num; i++ )); do | |
| formatted_num=$(printf "%0${padding_length}d" "$i") | |
| processed_nodes+=("${prefix}${formatted_num}") | |
| done | |
| else | |
| processed_nodes+=("$component") | |
| fi | |
| done | |
| IFS=$OLD_IFS | |
| if [ ${#processed_nodes[@]} -eq 0 ]; then | |
| echo "No available nodes found in partition '$PARTITION_NAME' (or partition name is incorrect)." | |
| exit 0 | |
| fi | |
| # Prepare header for the table | |
| # The header should exactly match the number and order of columns returned by get_node_data | |
| HEADER="Node State CPU_T CPU_A CPU_F Mem_T Mem_A Mem_F GPU_T GPU_A GPU_F" | |
| # Create a temporary file to store the data and header | |
| TEMP_DATA_FILE=$(mktemp) | |
| # Write the header to the temp file | |
| echo "$HEADER" > "$TEMP_DATA_FILE" | |
| # Loop through each node and collect its data, writing to the temp file | |
| for NODE in "${processed_nodes[@]}"; do | |
| get_node_data "$NODE" >> "$TEMP_DATA_FILE" | |
| done | |
| # Use column -t to print the table from the temporary file | |
| # -t: create a table | |
| # -o " ": use a single space as column separator for output (optional, but good for clarity) | |
| # -N: no header (we want to use our own) OR you can use -s to define the input separator and apply column -t on all of it | |
| column -t "$TEMP_DATA_FILE" | |
| # Clean up the temporary file | |
| rm "$TEMP_DATA_FILE" | |
| echo "" # Add a newline after the table | |
| echo "Script finished." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment