Last active
November 7, 2021 17:24
-
-
Save nathanieltarshish/a401f8434aebad22755013dc58988385 to your computer and use it in GitHub Desktop.
network topology profiler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
Help() | |
{ | |
# Display Help | |
echo "Probes the network topology of a SLURM cluster." | |
echo | |
echo "Syntax: network_topology.sh [-h -p partition]" | |
echo "options:" | |
echo "p only profile specific partition." | |
echo "h Print this Help." | |
echo | |
echo "Nodes have the fastest communication with other nodes that " | |
echo "are connected to the same physical switch. This script displays " | |
echo "the switches defined in the cluster's topology.conf file. The " | |
echo "resources (nodes, cpus, memory, features) associated with each " | |
echo "switch are also shown. For jobs, use sbatch --switches=<count> " | |
} | |
switches=$(scontrol show topology) | |
while getopts ":hp" option; do | |
case $option in | |
h) # display Help | |
Help | |
exit;; | |
p) # only profile a partition | |
partition=$2 | |
switches=$(scontrol show topology | grep $partition) | |
esac | |
done | |
#slurm text block -> array of single switch info per line | |
readarray -t switches <<<"$switches" | |
for switch in "${switches[@]}"; do | |
#convert info from string to array | |
switch=($switch) | |
name=${switch[0]: +11} | |
level=${switch[1]: +6} | |
nodes=${switch[3]: +6} | |
#check if leaf switch | |
if [ $level -eq 0 ]; then | |
info=$(sinfo -n $nodes -O "Nodelist:400,CPUs,Memory,Features" -e --noheader) | |
#check if info is empty. This occurs if nodes no longer exist on cluster but | |
#are still listed in an out-of-date SLURM topology.conf file | |
if [ ! -z "$info" ]; then | |
readarray -t info <<<"$info" | |
echo "------------ Switch "$name"----------------" | |
data="Nodes | CPUs | Memory | Features \n" | |
for identical_group_info in "${info[@]}"; do | |
identical_group_info=($identical_group_info) | |
identical_nodes=${identical_group_info[0]} | |
identical_nodes=($identical_nodes) | |
IFS="," read -a identical_nodes <<< $identical_nodes | |
cpus=${identical_group_info[1]} | |
memory=$((${identical_group_info[2]}/1024)) | |
features=${identical_group_info[3]} | |
number=${#identical_nodes[@]} | |
data="$data $number | $cpus | $memory Gb | $features \n" | |
done | |
echo -e $data | column -t -s '|' | |
echo "---------------------------------------------" | |
else | |
echo "------------ Switch "$name"----------------" | |
echo "sinfo did not return information for nodes: "$nodes | |
echo "These nodes may no longer exist and switch" | |
echo "configuration is out-of-date. " | |
echo "---------------------------------------------" | |
fi | |
fi | |
done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment