Skip to content

Instantly share code, notes, and snippets.

@hornos
Created October 14, 2014 12:39
Show Gist options
  • Select an option

  • Save hornos/fd5ca4ea4ecd9d1f433a to your computer and use it in GitHub Desktop.

Select an option

Save hornos/fd5ca4ea4ecd9d1f433a to your computer and use it in GitHub Desktop.
Slurm SGI UV monitor
#!/bin/bash
gbn=$(basename ${BASH_SOURCE})
gdn=$(dirname ${BASH_SOURCE})
rc="${HOME}/.${gbn}rc.$$"
function help/jobmon() {
cat <<EOF
Usage: smonitor -j JOBID
EOF
exit 1
}
function jobmon/stop() {
if test -r "${rc}" ; then
rm -f "${rc}"
fi
exit 1
}
trap jobmon/stop INT TERM EXIT
function jobmon/stdout() {
scontrol show job $1 | \
grep StdOut | \
awk -F'=' '{print $2}'
}
function jobmon/physcpubind() {
cat $1 | \
grep physcpubind | \
awk -F: '{print $2}'
}
function jobmon/membind() {
cat $1 | \
grep membind | \
awk -F: '{print $2}'
}
function jobmon/nodebind() {
cat $1 | \
grep nodebind | \
awk -F: '{print $2}'
}
function jobmon/pcpu() {
stdout=$(jobmon/stdout $1)
# stdout="/home/htom/slurm/mpi-96/slurm.out"
if ! test -r ${stdout}; then
echo "Not found: $stdout"
exit 1
fi
pcpu=$(jobmon/physcpubind ${stdout})
}
function jobmon/mems() {
stdout=$(jobmon/stdout $1)
# stdout="/home/htom/slurm/mpi-96/slurm.out"
if ! test -r ${stdout}; then
echo "Not found: $stdout"
exit 1
fi
mems=$(jobmon/membind ${stdout})
}
function jobmon/nodes() {
stdout=$(jobmon/stdout $1)
# stdout="/home/htom/slurm/mpi-96/slurm.out"
if ! test -r ${stdout}; then
echo "Not found: $stdout"
exit 1
fi
nodes=$(jobmon/nodebind ${stdout})
}
### args
_g=true
_t="nodes"
_h="uv"
while getopts hgn:c:m: o; do
case "$o" in
g) _g=true
;;
n)
_j=$OPTARG
_t="nodes"
jobmon/nodes $_j
;;
m)
_j=$OPTARG
_t="mems"
jobmon/mems $_j
;;
c)
_j=$OPTARG
_t="cpus"
jobmon/pcpu $_j
;;
h|*) help/sgemon;;
esac
done
if ! ${_g} ; then
echo "$ids"
exit 0
fi
cat > "${rc}" << EOF
#kmchart
# run: pmchart -c uv.boot
version 1
chart style plot antialiasing on
EOF
case "${_t}" in
nodes)
for n in $nodes; do
echo $n | \
awk -v h=${_h} '{
print "plot color #ff0000 host",h,"metric kernel.pernode.cpu.sys instance node" $0
print "plot color #00ff00 host",h,"metric kernel.pernode.cpu.user instance node" $0
print "plot color #ffff00 host",h,"metric kernel.pernode.cpu.idle instance node" $0
print "plot color #0000ff host",h,"metric kernel.pernode.cpu.wait.total instance node" $0
}' >> "${rc}"
done
;;
mems)
# echo "Nodes for job ${_j}: $ids"
for m in $mems; do
echo $m | \
awk -v h=${_h} '{
print "plot color #ff0000 host",h,"metric mem.numa.util.used instance node" $0
print "plot color #00ff00 host",h,"metric mem.numa.util.free instance node" $0
print "plot color #ffff00 host",h,"metric mem.numa.util.active instance node" $0
print "plot color #0000ff host",h,"metric mem.numa.util.inactive instance node" $0
}' >> "${rc}"
done
;;
cpus)
# echo "CPUs for job ${_j}: $ids"
echo $pcpu
for c in $pcpu; do
echo $c | \
awk -v h=${_h} '{
print "plot color #ff0000 host",h,"metric kernel.percpu.cpu.sys instance cpu" $0
print "plot color #00ff00 host",h,"metric kernel.percpu.cpu.user instance cpu" $0
print "plot color #ffff00 host",h,"metric kernel.percpu.cpu.idle instance cpu" $0
print "plot color #0000ff host",h,"metric kernel.percpu.cpu.wait.total instance cpu" $0
}' >> "${rc}"
done
# cat $rc
;;
esac
pmchart -c "${rc}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment