Last active
October 26, 2021 15:04
-
-
Save rmcgibbo/b846e52f4bec17a8597f to your computer and use it in GitHub Desktop.
Summarize free slots on SLURM queues [script].
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from __future__ import print_function, division | |
import os | |
import re | |
import sys | |
import itertools | |
from pprint import pprint | |
import subprocess | |
from distutils.spawn import find_executable | |
from collections import defaultdict | |
from collections import namedtuple | |
SCONTROL_BIN = find_executable('scontrol') | |
status = namedtuple('status', ['partition', 'np_alloc', 'np_total']) | |
def main(): | |
count = collect() | |
lines = [['Number of nodes', 'Partition', 'Utilization', 'Free slots']] | |
lines.append(['-' * len(e) for e in lines[0]]) | |
for k in sorted(count.keys(), key=lambda c: c.np_alloc / c.np_total): | |
if k.np_total == k.np_alloc: | |
continue | |
lines.append([count[k], k.partition, '%s/%s' % | |
(k.np_alloc, k.np_total), k.np_total - k.np_alloc]) | |
print('Summary of SLURM nodes with free slots\n') | |
print(format_table(lines)) | |
def collect(): | |
count = defaultdict(lambda: 0) | |
nodes = scontrol_show('node') | |
n2p = nodes_to_partition() | |
for node in nodes: | |
try: | |
partition = n2p[node['NodeHostName']] | |
except KeyError as e: | |
continue | |
count[status(partition, int(node['CPUAlloc']), int(node['CPUTot']))] += 1 | |
return count | |
def nodes_to_partition(): | |
"""Mapping from NodeHostName to PartitionName for each | |
node""" | |
partitions = scontrol_show('partition') | |
n2p = defaultdict(lambda: []) | |
for partition in partitions: | |
these_nodes = set() | |
for group in partition['Nodes'].split(','): | |
these_nodes.update(expand_bracket(group)) | |
for node in these_nodes: | |
n2p[node].append(partition['PartitionName']) | |
for key, value in n2p.items(): | |
n2p[key] = ','.join(value) | |
return dict(n2p) | |
def scontrol_show(entity): | |
"""Wrapper around the `scontrol show` SLURM utility | |
Parameters | |
---------- | |
entity : {partition, job, node} | |
The type of entity to query scontrol for | |
Returns | |
------- | |
vals : list of dics | |
Each element in the list is a dict containing the information about | |
one of the requested entities on the system. | |
""" | |
if not os.path.exists(SCONTROL_BIN): | |
raise RuntimeError('This script is for SLURM systems only') | |
comm = subprocess.Popen([SCONTROL_BIN, 'show', entity], | |
stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
stdout, stderr = comm.communicate() | |
assert stderr == '' | |
lines = stdout.splitlines() | |
# individual sections are marked by blank lines | |
breaks = [-1] + [i for i, l in enumerate(lines) if l == ''] | |
# list of tuples with the start/end index of each section | |
sections = [(breaks[i] + 1, breaks[i + 1]) for i in range(len(breaks) - 1)] | |
values = [] | |
for i, j in sections: | |
entry = {} | |
for line in lines[i:j]: | |
for items in line.split(): | |
try: | |
key, val = items.split('=') | |
except ValueError: | |
pass | |
entry[key] = val | |
values.append(entry) | |
return values | |
def expand_bracket(s): | |
"""Expand SLURM's bracket notation | |
Example | |
------- | |
>>> expand_bracket("sh-1-[1-5]") | |
['sh-1-1', 'sh-1-2', 'sh-1-3', 'sh-1-4', 'sh-1-5'] | |
>>> expand_bracket('sh-1-[1-5,11-12]') | |
['sh-1-1', 'sh-1-2', 'sh-1-3', 'sh-1-4', 'sh-1-11', 'sh-1-12'] | |
""" | |
m = re.match('(.*)\[(\d+)\-(\d+)(?:,(\d+)\-(\d+))*\]', s) | |
if not m: | |
return [s] | |
groups = [g for g in m.groups() if g is not None] | |
prefix = groups[0] | |
returnvalue = [] | |
for i in range(1, len(groups), 2): | |
leading_zeros = groups[i][0] == '0' | |
n_chars = len(groups[i]) | |
first = int(groups[i]) | |
last = int(groups[i+1]) | |
for j in range(first, last + 1): | |
if leading_zeros: | |
suffix = ('%0{n_chars}d'.format(n_chars=n_chars)) % j | |
else: | |
suffix = str(j) | |
returnvalue.append('%s%s' % (prefix, suffix)) | |
return returnvalue | |
def format_table(rows): | |
cols = zip(*rows) | |
col_widths = [max(len(str(value)) + 2 for value in col) for col in cols] | |
format = ' '.join(['%%-%ds' % width for width in col_widths]) | |
lines = [] | |
for row in rows: | |
lines.append(format % tuple(row)) | |
return '\n'.join(lines) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ free-slots | |
Summary of SLURM nodes with free slots | |
Number of nodes Partition Utilization Free slots | |
--------------- --------- ----------- ---------- | |
4 gpu 0/16 16 | |
2 normal 0/16 16 | |
2 dev 0/16 16 | |
1 gpu 12/16 4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment