Created
February 1, 2016 17:13
-
-
Save treydock/9320af1c966a972692d2 to your computer and use it in GitHub Desktop.
SLURM Lua job submit plugin - 15.08
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--[[ | |
SLURM job submit filter for QOS | |
Some code and ideas pulled from https://github.com/edf-hpc/slurm-llnl-misc-plugins/blob/master/job_submit/job_submit.lua | |
--]] | |
--########################################################################-- | |
-- | |
-- Define constant | |
-- | |
--########################################################################-- | |
PARTITION_TO_QOS = { | |
["hepx"] = { qos = "hepx" }, | |
["idhmc"] = { qos = "idhmc" }, | |
["serial"] = { qos = "general" }, | |
["serial-long"] = { qos = "long" }, | |
["mpi-core8"] = { qos = "mpi" }, | |
["mpi-core32"] = { qos = "mpi" }, | |
["mpi-core32-4g"] = { qos = "mpi" }, | |
["background"] = { qos = "background" }, | |
["background-4g"] = { qos = "background" }, | |
["grid"] = { qos = "grid" }, | |
["interactive"] = { qos = "interactive" }, | |
["default"] = { qos = "general" } | |
} | |
CMS_LOCAL_QOS = "cms-local" | |
-- UNUSED | |
--[[ | |
PARTITION_ROUTES = { | |
['something'] = 'something-else', | |
} | |
]] | |
--########################################################################-- | |
-- | |
-- Define functions | |
-- | |
--########################################################################-- | |
--========================================================================-- | |
function dump(o) | |
if type(o) == 'table' then | |
local s = '{ ' | |
for k,v in pairs(o) do | |
if type(k) ~= 'number' then k = '"'..k..'"' end | |
s = s .. '['..k..'] = ' .. dump(v) .. ',' | |
end | |
return s .. '} ' | |
else | |
return tostring(o) | |
end | |
end | |
function os.capture(cmd) | |
-- Read the output of a system command | |
-- cmd : command to be executed | |
local f = assert(io.popen(cmd, 'r')) | |
local s = assert(f:read('*a')) | |
f:close() | |
s = string.gsub(s, '^%s+', '') | |
s = string.gsub(s, '%s+$', '') | |
s = string.gsub(s, '[\n\r]+', ' ') | |
return s | |
end | |
--========================================================================-- | |
-- UNUSED | |
--[[ | |
function reroute_job(job_desc, routeT) | |
local partition = routeT["part"] | |
log_info("slurm_job_submit#reroute_job: Setting partition to %s", partition) | |
job_desc.partition = partition | |
if job_desc.qos == nil then | |
log_info("slurm_job_submit#reroute_job: Setting QOS to %s", qos) | |
job_desc.qos = qos | |
end | |
end | |
]] | |
--========================================================================-- | |
function default_partition(part_list) | |
-- Return the name of the default partition | |
-- part_list : list of partitions | |
for name, part in pairs(part_list) do | |
if part.default_flag == 1 then | |
return name | |
end | |
end | |
end | |
--========================================================================-- | |
function get_partition(part_list, name) | |
-- Return the partition matching name | |
-- part_list : list of partitions | |
-- name : partition name | |
for part_name, part in pairs(part_list) do | |
if part_name == name then | |
return part | |
end | |
end | |
end | |
--========================================================================-- | |
function check_cms_local(job_desc, submit_uid, account) | |
if account ~= "hepx" then | |
return nil | |
end | |
if job_desc.name == "CMS_CRAB2" then | |
--slurm.log_info("slurm_job_submit#check_cms_local: job_desc.name matched CMS_CRAB2") | |
return CMS_LOCAL_QOS | |
end | |
--local username_cmd = "getent passwd " .. submit_uid .. "| awk -F':' '{print tolower($1)}'" | |
--local username | |
--username = os.capture(username_cmd) | |
--slurm.log_info("slurm_job_submit#check_cms_local: passwd -> %s", username) | |
--slurm.log_info("slurm_job_submit#check_cms_local: not a local CMS job") | |
return nil | |
end | |
--========================================================================-- | |
function check_allow_qos(qos, allow_qos) | |
local allowed_qos = false | |
for q in string.gmatch(allow_qos, "([^,]+)") do | |
if q ~= nil and q == qos then | |
allowed_qos = true | |
end | |
end | |
return allowed_qos | |
end | |
--========================================================================-- | |
function get_partition_qos(partition, allow_qos) | |
local qos = nil | |
local part = partition or 'NONE' | |
local partT = PARTITION_TO_QOS[part] or PARTITION_TO_QOS["default"] | |
slurm.log_debug("slurm_job_submit#get_partition_qos: partition: %s", part) | |
if allow_qos ~= nil then | |
local qoses = {} | |
local i = 0 | |
for qos in string.gmatch(allow_qos, "([^,]+)") do | |
qoses[i] = qos | |
i = i + 1 | |
end | |
qos = qoses[0] | |
end | |
if qos ~= nil then | |
slurm.log_debug("slurm_job_submit#get_partition_qos: partition: %s matched to qos: %s", part, qos) | |
end | |
return qos | |
end | |
--########################################################################-- | |
-- | |
-- SLURM job_submit/lua interface: | |
-- | |
--########################################################################-- | |
function slurm_job_submit(job_desc, part_list, submit_uid) | |
local account = job_desc.account or job_desc.default_account | |
local partition = job_desc.partition or default_partition(part_list) | |
local partition_rec = get_partition(part_list, partition) | |
local part_qos = partition_rec.qos | |
local allow_qos = partition_rec.allow_qos | |
--local cms_local_qos = check_cms_local(job_desc, submit_uid, account) | |
local default_qos = job_desc.default_qos | |
local qos = nil | |
--[[if cms_local_qos ~= nil then | |
slurm.log_info("slurm_job_submit: job from uid %d, setting qos value: %s", submit_uid, cms_local_qos) | |
job_desc.qos = cms_local_qos | |
end]] | |
if job_desc.qos == nil then | |
-- First check for a default QOS and if allowed by partition | |
if default_qos ~= nil then | |
slurm.log_debug("slurm_job_submit: found default QOS %s", default_qos) | |
if allow_qos ~= nil then | |
slurm.log_debug("slurm_job_submit: partition allow_qos %s", allow_qos) | |
local allowed_qos = check_allow_qos(default_qos, allow_qos) | |
if allowed_qos then | |
slurm.log_debug("slurm_job_submit: QOS %s allowed in partition %s", default_qos, partition) | |
qos = default_qos | |
end | |
end | |
end | |
-- Find partition's default QOS if qos is still undefined | |
if qos == nil then | |
slurm.log_debug("slurm_job_submit: no default qos found") | |
qos = get_partition_qos(partition, allow_qos, part_qos) | |
end | |
if qos == nil and part_qos ~= nil then | |
slurm.log_debug("slurm_job_submit: no default qos or allowed qos found, assign partition qos") | |
qos = part_qos | |
end | |
if qos ~= nil then | |
slurm.log_info("slurm_job_submit: job from uid %d, setting qos value: %s", submit_uid, qos) | |
job_desc.qos = qos | |
end | |
end | |
return slurm.SUCCESS | |
end | |
function slurm_job_modify(job_desc, job_rec, part_list, modify_uid) | |
local current_partition = job_rec.partition | |
local new_partition = job_desc.partition or current_partition | |
local qos = nil | |
-- If changing partition | |
if current_partition ~= new_partition then | |
local new_part_rec = get_partition(part_list, new_partition) | |
-- If qos was not specified | |
if job_desc.qos == nil then | |
local cur_qos = job_rec.qos | |
local allow_qos = new_part_rec.allow_qos | |
-- Check if current QOS is allowed in new partition | |
if allow_qos ~= nil then | |
slurm.log_debug("slurm_job_modify: current QOS %s", cur_qos) | |
slurm.log_debug("slurm_job_modify: partition allow_qos %s", allow_qos) | |
local allowed_qos = check_allow_qos(cur_qos, allow_qos) | |
if allowed_qos then | |
slurm.log_debug("slurm_job_modify: QOS %s allowed in partition %s", cur_qos, new_partition) | |
qos = cur_qos | |
end | |
end | |
if qos == nil then | |
slurm.log_debug("slurm_job_modify: getting partition qos") | |
qos = get_partition_qos(new_partition, allow_qos) | |
end | |
if qos ~= nil then | |
slurm.log_info("slurm_job_modify: for job %u from uid %d, qos value: %s", job_rec.job_id, modify_uid, qos) | |
job_desc.qos = qos | |
end | |
end | |
-- If time was not specified | |
-- Instead of nil SLURM returns 4294967294 (unsigned int) | |
local time_limit = job_desc.time_limit | |
local part_max_time = new_part_rec.max_time | |
if (time_limit == nil or time_limit == 4294967294) then | |
if time_limit > part_max_time then | |
slurm.log_info("slurm_job_modify: for job %u from uid %d, time_limit value: %s", job_rec.job_id, modify_uid, part_max_time) | |
job_desc.time_limit = part_max_time | |
end | |
end | |
end | |
return slurm.SUCCESS | |
end | |
slurm.log_info("initialized") | |
return slurm.SUCCESS |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment