Skip to content

Instantly share code, notes, and snippets.

@FrankGrimm
Last active June 8, 2020 13:46
Show Gist options
  • Save FrankGrimm/f4f6060341a27953c68d62ad89e8e127 to your computer and use it in GitHub Desktop.
Save FrankGrimm/f4f6060341a27953c68d62ad89e8e127 to your computer and use it in GitHub Desktop.
SLURM squeue: show running cluster processes and tail their respective log files
#!/usr/bin/env python3
import string
import os
import os.path
import subprocess
import sys
import json
import getpass
p = subprocess.Popen(['squeue', '-o', '%all'], stdout = subprocess.PIPE,
stderr = subprocess.PIPE, shell = False)
stdout, stderr = p.communicate()
stderr = stderr.decode("utf-8").strip()
if stderr != '':
print("STDERR: %s" % stderr)
sys.exit(1)
header = None
stdout = stdout.decode("utf-8")
jobs = []
filtered = 0
for line in stdout.split("\n"):
if line.strip() == '':
continue
line = line.split("|")
if header is None:
header = list(map(lambda k: k.lower(), line))
continue
entry = {}
for idx, key in enumerate(header):
val = line[idx].strip()
if val == "(null)" or val == 'null':
val = ''
if key.strip() == '':
continue
while key.strip() in entry:
key = key.strip() + "_"
entry[key.strip()] = val
job_user = entry['user']
if job_user == getpass.getuser():
jobs.append(entry)
else:
filtered += 1
print("%s jobs (%s filtered)" % (len(jobs), filtered))
def retrieve_slurm_options(command):
opts = {}
if not command or command.strip() == '':
return None, None
command = command.strip()
if not os.path.exists(command):
return None, None
with open(command, "rt") as infile:
for line in infile:
line = line.strip()
if not line.startswith("#SBATCH "):
continue
line = line[len("#SBATCH "):]
if "=" in line:
line = line.split("=", 1)
elif " " in line:
line = line.split(" ", 1)
else:
if line.startswith("-") and not line.startswith("--"):
line = [line[1], line[2:]]
else:
print("malformed line %s" % line)
if line[0].startswith("--"):
line[0] = line[0][2:]
if line[0].startswith("-"):
line[0] = line[0][1:]
line[0] = line[0].strip()
k, v = line
if k == 'p':
k = 'partition'
elif k == 'c':
k = 'cpus'
elif k == 'o':
k = 'output'
opts[k] = v
command_bname = os.path.basename(command)
return opts, command_bname
def tail_output(job, n=10):
job['tail'] = None
if not job or not job['slurm_options'] or not 'output' in job['slurm_options']:
return
output_filename = job['slurm_options']['output']
output_filename = output_filename.replace("%t", "0")
output_filename = output_filename.replace("%j", job['jobid'])
work_dir = None
if 'work_dir' in job:
work_dir = job['work_dir']
if not work_dir is None:
output_filename = os.path.join(work_dir, output_filename)
tailed = []
with open(output_filename, "rt") as infile:
for line in infile:
line = line.strip()
tmp = line
line = []
skip = 0
for idx, c in enumerate(tmp):
if skip > 0:
skip -= 1
continue
if not c in string.printable:
if ord(c) == 27 and len(tmp) > idx + 2 and tmp[idx+1] == "[":
skip=2
continue
else:
line.append(c)
line = "".join(line)
# line = "".join(filter(lambda c: c in string.printable, line))
line = line.strip()
if line == '':
continue
tailed.append(line)
if len(tailed) > n:
tailed.pop(0)
job['tail'] = tailed
for job in jobs:
job_id = job['jobid']
job_command = job['command']
job['slurm_options'], job['command_bname'] = retrieve_slurm_options(job_command)
tail_output(job, 3)
print("[job] %s (%s) partition: %s host: %s cpus: %s %s" % (job_id, job['command_bname'], job['partition'] or '', job['exec_host'] or '', job['cpus'] or '', job['gres']))
print("\t%s started: %s time: %s" % ( job['state'], job['start_time'], job['time'] ))
if 'tail' in job:
print("\t" + "\n\t".join(job['tail']))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment