Last active
June 8, 2020 13:46
-
-
Save FrankGrimm/f4f6060341a27953c68d62ad89e8e127 to your computer and use it in GitHub Desktop.
SLURM squeue: show running cluster processes and tail their respective log files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import string | |
import os | |
import os.path | |
import subprocess | |
import sys | |
import json | |
import getpass | |
p = subprocess.Popen(['squeue', '-o', '%all'], stdout = subprocess.PIPE, | |
stderr = subprocess.PIPE, shell = False) | |
stdout, stderr = p.communicate() | |
stderr = stderr.decode("utf-8").strip() | |
if stderr != '': | |
print("STDERR: %s" % stderr) | |
sys.exit(1) | |
header = None | |
stdout = stdout.decode("utf-8") | |
jobs = [] | |
filtered = 0 | |
for line in stdout.split("\n"): | |
if line.strip() == '': | |
continue | |
line = line.split("|") | |
if header is None: | |
header = list(map(lambda k: k.lower(), line)) | |
continue | |
entry = {} | |
for idx, key in enumerate(header): | |
val = line[idx].strip() | |
if val == "(null)" or val == 'null': | |
val = '' | |
if key.strip() == '': | |
continue | |
while key.strip() in entry: | |
key = key.strip() + "_" | |
entry[key.strip()] = val | |
job_user = entry['user'] | |
if job_user == getpass.getuser(): | |
jobs.append(entry) | |
else: | |
filtered += 1 | |
print("%s jobs (%s filtered)" % (len(jobs), filtered)) | |
def retrieve_slurm_options(command): | |
opts = {} | |
if not command or command.strip() == '': | |
return None, None | |
command = command.strip() | |
if not os.path.exists(command): | |
return None, None | |
with open(command, "rt") as infile: | |
for line in infile: | |
line = line.strip() | |
if not line.startswith("#SBATCH "): | |
continue | |
line = line[len("#SBATCH "):] | |
if "=" in line: | |
line = line.split("=", 1) | |
elif " " in line: | |
line = line.split(" ", 1) | |
else: | |
if line.startswith("-") and not line.startswith("--"): | |
line = [line[1], line[2:]] | |
else: | |
print("malformed line %s" % line) | |
if line[0].startswith("--"): | |
line[0] = line[0][2:] | |
if line[0].startswith("-"): | |
line[0] = line[0][1:] | |
line[0] = line[0].strip() | |
k, v = line | |
if k == 'p': | |
k = 'partition' | |
elif k == 'c': | |
k = 'cpus' | |
elif k == 'o': | |
k = 'output' | |
opts[k] = v | |
command_bname = os.path.basename(command) | |
return opts, command_bname | |
def tail_output(job, n=10): | |
job['tail'] = None | |
if not job or not job['slurm_options'] or not 'output' in job['slurm_options']: | |
return | |
output_filename = job['slurm_options']['output'] | |
output_filename = output_filename.replace("%t", "0") | |
output_filename = output_filename.replace("%j", job['jobid']) | |
work_dir = None | |
if 'work_dir' in job: | |
work_dir = job['work_dir'] | |
if not work_dir is None: | |
output_filename = os.path.join(work_dir, output_filename) | |
tailed = [] | |
with open(output_filename, "rt") as infile: | |
for line in infile: | |
line = line.strip() | |
tmp = line | |
line = [] | |
skip = 0 | |
for idx, c in enumerate(tmp): | |
if skip > 0: | |
skip -= 1 | |
continue | |
if not c in string.printable: | |
if ord(c) == 27 and len(tmp) > idx + 2 and tmp[idx+1] == "[": | |
skip=2 | |
continue | |
else: | |
line.append(c) | |
line = "".join(line) | |
# line = "".join(filter(lambda c: c in string.printable, line)) | |
line = line.strip() | |
if line == '': | |
continue | |
tailed.append(line) | |
if len(tailed) > n: | |
tailed.pop(0) | |
job['tail'] = tailed | |
for job in jobs: | |
job_id = job['jobid'] | |
job_command = job['command'] | |
job['slurm_options'], job['command_bname'] = retrieve_slurm_options(job_command) | |
tail_output(job, 3) | |
print("[job] %s (%s) partition: %s host: %s cpus: %s %s" % (job_id, job['command_bname'], job['partition'] or '', job['exec_host'] or '', job['cpus'] or '', job['gres'])) | |
print("\t%s started: %s time: %s" % ( job['state'], job['start_time'], job['time'] )) | |
if 'tail' in job: | |
print("\t" + "\n\t".join(job['tail'])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment