Created
October 14, 2014 12:39
-
-
Save hornos/fd5ca4ea4ecd9d1f433a to your computer and use it in GitHub Desktop.
Slurm SGI UV monitor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| gbn=$(basename ${BASH_SOURCE}) | |
| gdn=$(dirname ${BASH_SOURCE}) | |
| rc="${HOME}/.${gbn}rc.$$" | |
| function help/jobmon() { | |
| cat <<EOF | |
| Usage: smonitor -j JOBID | |
| EOF | |
| exit 1 | |
| } | |
| function jobmon/stop() { | |
| if test -r "${rc}" ; then | |
| rm -f "${rc}" | |
| fi | |
| exit 1 | |
| } | |
| trap jobmon/stop INT TERM EXIT | |
| function jobmon/stdout() { | |
| scontrol show job $1 | \ | |
| grep StdOut | \ | |
| awk -F'=' '{print $2}' | |
| } | |
| function jobmon/physcpubind() { | |
| cat $1 | \ | |
| grep physcpubind | \ | |
| awk -F: '{print $2}' | |
| } | |
| function jobmon/membind() { | |
| cat $1 | \ | |
| grep membind | \ | |
| awk -F: '{print $2}' | |
| } | |
| function jobmon/nodebind() { | |
| cat $1 | \ | |
| grep nodebind | \ | |
| awk -F: '{print $2}' | |
| } | |
| function jobmon/pcpu() { | |
| stdout=$(jobmon/stdout $1) | |
| # stdout="/home/htom/slurm/mpi-96/slurm.out" | |
| if ! test -r ${stdout}; then | |
| echo "Not found: $stdout" | |
| exit 1 | |
| fi | |
| pcpu=$(jobmon/physcpubind ${stdout}) | |
| } | |
| function jobmon/mems() { | |
| stdout=$(jobmon/stdout $1) | |
| # stdout="/home/htom/slurm/mpi-96/slurm.out" | |
| if ! test -r ${stdout}; then | |
| echo "Not found: $stdout" | |
| exit 1 | |
| fi | |
| mems=$(jobmon/membind ${stdout}) | |
| } | |
| function jobmon/nodes() { | |
| stdout=$(jobmon/stdout $1) | |
| # stdout="/home/htom/slurm/mpi-96/slurm.out" | |
| if ! test -r ${stdout}; then | |
| echo "Not found: $stdout" | |
| exit 1 | |
| fi | |
| nodes=$(jobmon/nodebind ${stdout}) | |
| } | |
| ### args | |
| _g=true | |
| _t="nodes" | |
| _h="uv" | |
| while getopts hgn:c:m: o; do | |
| case "$o" in | |
| g) _g=true | |
| ;; | |
| n) | |
| _j=$OPTARG | |
| _t="nodes" | |
| jobmon/nodes $_j | |
| ;; | |
| m) | |
| _j=$OPTARG | |
| _t="mems" | |
| jobmon/mems $_j | |
| ;; | |
| c) | |
| _j=$OPTARG | |
| _t="cpus" | |
| jobmon/pcpu $_j | |
| ;; | |
| h|*) help/sgemon;; | |
| esac | |
| done | |
| if ! ${_g} ; then | |
| echo "$ids" | |
| exit 0 | |
| fi | |
| cat > "${rc}" << EOF | |
| #kmchart | |
| # run: pmchart -c uv.boot | |
| version 1 | |
| chart style plot antialiasing on | |
| EOF | |
| case "${_t}" in | |
| nodes) | |
| for n in $nodes; do | |
| echo $n | \ | |
| awk -v h=${_h} '{ | |
| print "plot color #ff0000 host",h,"metric kernel.pernode.cpu.sys instance node" $0 | |
| print "plot color #00ff00 host",h,"metric kernel.pernode.cpu.user instance node" $0 | |
| print "plot color #ffff00 host",h,"metric kernel.pernode.cpu.idle instance node" $0 | |
| print "plot color #0000ff host",h,"metric kernel.pernode.cpu.wait.total instance node" $0 | |
| }' >> "${rc}" | |
| done | |
| ;; | |
| mems) | |
| # echo "Nodes for job ${_j}: $ids" | |
| for m in $mems; do | |
| echo $m | \ | |
| awk -v h=${_h} '{ | |
| print "plot color #ff0000 host",h,"metric mem.numa.util.used instance node" $0 | |
| print "plot color #00ff00 host",h,"metric mem.numa.util.free instance node" $0 | |
| print "plot color #ffff00 host",h,"metric mem.numa.util.active instance node" $0 | |
| print "plot color #0000ff host",h,"metric mem.numa.util.inactive instance node" $0 | |
| }' >> "${rc}" | |
| done | |
| ;; | |
| cpus) | |
| # echo "CPUs for job ${_j}: $ids" | |
| echo $pcpu | |
| for c in $pcpu; do | |
| echo $c | \ | |
| awk -v h=${_h} '{ | |
| print "plot color #ff0000 host",h,"metric kernel.percpu.cpu.sys instance cpu" $0 | |
| print "plot color #00ff00 host",h,"metric kernel.percpu.cpu.user instance cpu" $0 | |
| print "plot color #ffff00 host",h,"metric kernel.percpu.cpu.idle instance cpu" $0 | |
| print "plot color #0000ff host",h,"metric kernel.percpu.cpu.wait.total instance cpu" $0 | |
| }' >> "${rc}" | |
| done | |
| # cat $rc | |
| ;; | |
| esac | |
| pmchart -c "${rc}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment