Created
October 1, 2019 15:01
-
-
Save ebirn/cf52876120648d7d85501fcbf185ff07 to your computer and use it in GitHub Desktop.
magic slurm node list parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
node_lists = [ | |
"clip-g1-[0-1],clip-g2-[2-3]", | |
"clip-g1-0,clip-g2-0", | |
"clip-g1-0,clip-g2-1", | |
"clip-g1-1", | |
"clip-a-[1,3,5]", | |
"clip-b-[1-3,5]", | |
"clip-c-[1-3,5,9-12]", | |
"clip-d-[5,9-12]", | |
"clip-e-[5,9],clip-e-[15-19]", | |
"clip-f-[5,9],clip-f-[15,17]", | |
"clip-f-5,clip-f-[15,17]", | |
"clip-f-[5,9],clip-f-175" | |
] | |
# we assume index parts are sane | |
def expand_idx(idx): | |
if idx.startswith('['): | |
idx = idx[1:-1] | |
idx_parts = idx.split(',') | |
indexes = [] | |
for part in idx_parts: | |
if "-" in part: | |
start, stop = part.split('-', 2) | |
indexes.extend(range(int(start), int(stop)+1)) | |
else: | |
indexes.append(int(part)) | |
indexes.sort() | |
return indexes | |
def expand_nodestr(nodestri, idx_list): | |
return [nodestri + str(idx) for idx in idx_list] | |
index_pat = re.compile(r'((clip-(\w+)-)(\d+|\[[\d\,\-]+\]),?)') | |
for node_str in node_lists: | |
print node_str | |
for match in index_pat.finditer(node_str): | |
print " ", match.groups() | |
#full = match.group(1) | |
prefix = match.group(2) | |
node_type = match.group(3) | |
idx = match.group(4) | |
idx_list = expand_idx(idx) | |
print "prefix> ", prefix | |
print "type> ", node_type | |
print "idx> ", idx | |
print "count> ", len(idx_list) | |
print "list> ", expand_nodestr(prefix, idx_list) | |
~ |
10 more lines, but skips regexp, and adds error checking:
node_lists = [
"clip-g1-[0-1],clip-g2-[2-3]",
"clip-g1-0,clip-g2-0",
"clip-g1-0,clip-g2-1",
"clip-g1-1",
"clip-a-[1,3,5]",
"clip-b-[1-3,5]",
"clip-c-[1-3,5,9-12]",
"clip-d-[5,9-12]",
"clip-e-[5,9],clip-e-[15-19]",
"clip-f-[5,9],clip-f-[15,17]",
"clip-f-5,clip-f-[15,17]",
"clip-f-[5,9],clip-f-175"
]
def parse_int(s):
for i,c in enumerate(s):
if c not in "0123456789":
return int(s[:i]), s[i:]
return int(s), ""
def parse_brackets(s):
# parse a "bracket" expression (including closing ']')
lst = []
while len(s) > 0:
if s[0] == ',':
s = s[1:]
continue
if s[0] == ']':
return lst, s[1:]
a, s = parse_int(s)
assert len(s) > 0, f"Missing closing ']'"
if s[0] in ',]':
lst.append(a)
elif s[0] == '-':
b, s = parse_int(s[1:])
lst.extend(range(a,b+1))
assert len(s) > 0, f"Missing closing ']'"
def parse_node(s):
# parse a "node" expression
for i,c in enumerate(s):
if c == ',': # name,...
return [ s[:i] ], s[i+1:]
if c == '[': # name[v],...
b, rest = parse_brackets(s[i+1:])
if len(rest) > 0:
assert rest[0] == ',', f"Expected comma after brackets in {s[i:]}"
rest = rest[1:]
return [s[:i]+str(z) for z in b], rest
return [ s ], ""
def parse_list(s):
lst = []
while len(s) > 0:
v, s = parse_node(s)
lst.extend(v)
return lst
for s in node_lists:
print(s)
print(parse_list(s))
For naming patterns with padding with zeros, such as cl001
.
node_lists = [
"clip-g1-[0-1],clip-g2-[2-3]",
"clip-g1-0,clip-g2-0",
"clip-g1-0,clip-g2-1",
"clip-g1-1",
"clip-a-[1,3,5]",
"clip-b-[1-3,5]",
"clip-c-[1-3,5,9-12]",
"clip-d-[5,9-12]",
"clip-e-[5,9],clip-e-[15-19]",
"clip-f-[5,9],clip-f-[15,17]",
"clip-f-5,clip-f-[15,17]",
"clip-f-[5,9],clip-f-175",
"cg[1-2]",
"cg[001-002]",
"cl[028,044,054]",
"cg[001-002],cl[001-003,005-006,009-046]",
]
def parse_int(s):
for i, c in enumerate(s):
if c not in "0123456789":
return s[:i], s[i:]
return s, ""
def parse_brackets(s):
# parse a "bracket" expression (including closing ']')
lst = []
while len(s) > 0:
if s[0] == ',':
s = s[1:]
continue
if s[0] == ']':
return lst, s[1:]
a, s = parse_int(s)
assert len(s) > 0, "Missing closing ']'"
if s[0] in ',]':
lst.append(a)
elif s[0] == '-':
b, s = parse_int(s[1:])
assert int(a) <= int(b), 'Invalid range'
# A leading zero on a lower boundary suggests that the
# numerical part of the node name is padded with zeros,
# e.g. nia0001.
#
# Just a single 0 on the lower boundary suggests a numerical
# range without padding, e.g. nia[0-4].
if a != '0' and a.startswith('0'):
assert len(a) == len(b), \
(
'Boundaries of a ranged string with padding '
'must have the same length.'
)
lst.extend(
[str(x).zfill(len(a)) for x in range(int(a), int(b)+1)]
)
elif a != '0' and b.startswith('0'):
raise ValueError('Could not determine the padding style.')
# If no padding is detected, simply use the range.
else:
lst.extend(
[str(x) for x in range(int(a), int(b)+1)]
)
assert len(s) > 0, "Missing closing ']'"
def parse_node(s):
# parse a "node" expression
for i, c in enumerate(s):
if c == ',': # name,...
return [s[:i]], s[i+1:]
if c == '[': # name[v],...
b, rest = parse_brackets(s[i+1:])
if len(rest) > 0:
assert rest[0] == ',', \
f"Expected comma after brackets in {s[i:]}"
rest = rest[1:]
return [s[:i] + z for z in b], rest
return [s], ""
def parse_list(s):
lst = []
while len(s) > 0:
v, s = parse_node(s)
lst.extend(v)
return lst
for s in node_lists:
print(s)
print(parse_list(s))
… using EBNF grammar to parse - https://github.com/commonism/slurm_node_list_parser
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
regex support by @rembart. could not have done this without him