Last active
February 26, 2018 16:28
-
-
Save caljess599/0fe3e0230c7c4ac758d661bd2af3c124 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# name: whonode | |
# location: /bin/whonode | |
# # FUNCTIONS # # | |
usage () { | |
printf "usage: %s [-c] [-h] [-n cvpost<num> ] [-j JobID]* [-u user]* [-r] [-s]\n*flag can be passed multiple times\nFor complete documentation, go to https://staff.nrao.edu/wiki/bin/view/CIS/ClusterKnowHowVisible\n" $0 >&2 | |
exit 2 | |
} | |
transform(){ | |
#time elapsed | |
jid=$(echo "$1" | awk '{print $1}') | |
counter=1 | |
for id in $jid; do | |
allnodeid=$(echo "$QSTATF" | sed -n "/$id/,/exec_port/p" | grep "exec_host" | awk '{print $3}') | |
echo "$1" | awk 'FNR=='$counter' { if ($9 == "--") { printf "'"${allnodeid/\/0+cvpost0/+}"'\t%s\t%-8s\t%s\t%s\t%s\tBATCH JOB\t%s\tBATCH JOB\n", $1, $2, $5, $6, $7, $10} else { printf "'"${allnodeid/\/0+cvpost0/+}"'\t%s\t%-8s\t%s\t%s\t%s\t%s\t%s\t%s\n", $1, $2, $5, $6, $7, $9, $10, $11 } }' | |
counter=$((counter+1)) | |
done | |
} | |
transform2(){ | |
#hours left | |
jid=$(echo "$1" | awk '{print $1}') | |
counter=1 | |
for id in $jid; do | |
allnodeid=$(echo "$QSTATF" | sed -n "/$id/,/exec_port/p" | grep "exec_host" | awk '{print $3}') | |
echo "$1" | awk 'FNR=='$counter' {if ($9 == "--") {printf "'"${allnodeid/\/0+cvpost0/+}"'\t%s\t%-8s\t%s\t%s\t%s\tBATCH JOB\t%s\tN/A\n", $1, $2, $5, $6, $7, $10 } else {printf "'"${allnodeid/\/0+cvpost0/+}"'\t%s\t%-8s\t%s\t%s\t%s\t%s\t%s\t%s\n", $1, $2, $5, $6, $7, $9, $10, $9-$11 } }' | |
counter=$((counter+1)) | |
done | |
} | |
headline(){ | |
printf "Node Name\tJob ID\t\t\tUsername\tSessID\tNodes\tTasks\tTime Rsvd\tStatus\tTime Elapsed\n" | |
} | |
headline2(){ | |
printf "Node Name\tJob ID\t\t\tUsername\tSessID\tNodes\tTasks\tTime Rsvd\tStatus\tHrs Left\n" | |
} | |
altheadline(){ | |
printf "Job ID\t\t\tUsername\tSessID\tNodes\tTasks\tTime Rsvd\tStatus\tHrs Left\n" | |
} | |
calc(){ bc -l <<< "$@"; } | |
# # # GLOBAL VARIABLES # # # | |
PATH=/opt/services/torque/bin:$PATH | |
#temporary values | |
#TOTALNODES=63 | |
#TOTALIJNODES=61 | |
# regular values | |
TOTALNODES=64 | |
# these two must add up to TOTALNODES | |
TOTALIJNODES=59 | |
TOTALBATCHONLY=5 | |
#two calls to qstat, output preserved and reused | |
QSTATA=$(qstat -a) | |
QSTATF=$(qstat -f) | |
# # # MAIN PROGRAM # # # | |
main(){ | |
# # ARGUMENTS # # | |
cflag= | |
uflag= | |
while getopts 'chj:n:rsu:x' OPTION | |
do | |
case $OPTION in | |
c) cflag=1 | |
;; | |
h) hflag=1 | |
;; | |
j) jflag=1 | |
jval+=("$OPTARG") | |
;; | |
n) nflag=1 | |
nval=("$OPTARG") | |
;; | |
r) rflag=1 | |
;; | |
s) sflag=1 | |
;; | |
u) uflag=1 | |
uval+=("$OPTARG") | |
;; | |
?) usage | |
exit | |
esac | |
done | |
shift $(($OPTIND - 1)) | |
# # LOGIC # # | |
if [ "$hflag" ]; then | |
echo "****************** W H O N O D E H E L P *********************" | |
echo "Usage: whonode [-c] [-h] [-n cvpost<num> ] [-j JobID]* [-u user]* [-r] [-s]" | |
echo "* indicates that flag can be passed multiple times" | |
echo by default, \'whonode\' printout includes time elapsed and is sorted by most time elapsed first | |
echo | |
echo Various flags can modify this list: | |
echo "-r | replaces time elapsed with hours remaining" | |
echo "-s | sorts by nodename instead of by elapsed time. Can be combined with -r" | |
echo "-u username | prints reservations only for a specified username. Can be combined with -r" | |
echo | |
echo Other commands: | |
echo "-c | prints only a count of reserved nodes and pending reservations" | |
echo "-h | prints this help message" | |
echo "-j JobID | prints cvpostnode corresponding to given JobID, e.g., 4789" | |
echo "-n cvpostnode | prints out who is on the specified node, e.g., cvpost061" | |
echo | |
echo "Complete documentation is available at https://staff.nrao.edu/wiki/bin/view/CIS/ClusterKnowHowVisible" | |
exit | |
fi | |
# count how many nodes are in use (batch vs interactive) and how many reservations queued | |
# count how many JOBS of each type (batch vs. interactive) | |
#[root@cvpost-master ~]# qstat -a | grep "[0-9].cvpost-serv" | awk '$3 == "$type"' #where type is either 'batch' or 'interact' | |
# $1 $2 $3 $4 $5 $6 $7 $8 $9 $10 $10 | |
# Job ID Username Queue Jobname SessID NDS TSK Memory Reqt Status Elsp | |
#5153.cvpost-serv c***** batch c4r2 --- 145643 1 1 245gb -- R 00:00:00 | |
#4876.cvpost-serv t***** interact interactive_j 8478 1 1 -- 1080:00:0 R 665:22:21 | |
# the jobID is the key to counting how many (and which) nodes are in use (and in what way) | |
# list of all jobs | |
allnodes=$(echo "$QSTATA" | grep "[0-9].cvpost-serv") # gets ALL the jobs, including those pending | |
# current (as opposed to pending jobs) are distinguished by the lack of a SessID | |
# the lists of ALL jobs (IJs and BJs), assigned/requested|running/queued is used for the transform functions | |
assigned=$(echo "$allnodes" | awk '$5 !~ /--/') | |
reserved=$(echo "$allnodes" | awk '$5 ~ /--/') | |
# segretated lists are needed for counting | |
# list of assigned interactive-node jobs | |
assignedijs=$(echo "$allnodes" | awk '($3 == "interact") && ($5 !~ /--/)') | |
# list of running batch jobs | |
runningbjs=$(echo "$allnodes" | awk '($3 == "batch") && ($5 !~ /--/)') | |
# AND NOW for the pending side.... | |
# list of requested interactive-node jobs | |
requestedijs=$(echo "$allnodes" | awk '($3 == "interact") && ($5 ~ /--/)') | |
# list queued batch jobs | |
queuedbjs=$(echo "$allnodes" | awk '($3 == "batch") && ($5 ~ /--/)') | |
# NOW FOR THE STATS (NUMBER OF JOBS/NODES ETC) | |
# Now the number of nodes in use | |
# For the interactive-node jobs (IJs), we use the same strategy as before (count the NDS ($6) | |
assignedijsnum=$(echo "$assignedijs" | awk '{s+=$6} END {print s}') | |
requestedijsnum=$(echo "$requestedijs" | awk '{s+=$6} END {print s}') | |
# We can just wc -l to get the number of batch jobs running | |
if [ -z "$runningbjs" ]; then | |
runningbjscount=0 | |
else | |
runningbjscount=$(echo "$runningbjs" | wc -l) | |
fi | |
# For the NUMBER OF NODES the running batch jobs (BJs) are consuming, we have to use qstat (because batch jobs can share nodes) | |
runningbjsnum=$(for i in $(echo "$runningbjs" | awk '{print $1}'); do echo "$QSTATF" | sed -n "/$i/,/exec_port/p" | grep exec_host | awk '{print $3}'; done | uniq -w 9 | wc -l) | |
#grammar police! | |
if [ $runningbjsnum -eq 1 ]; then | |
runningbjsnumnode=node | |
runningbjsnumnodeverb=is | |
else | |
runningbjsnumnode=nodes | |
runningbjsnumnodeverb=are | |
fi | |
# For the queued BJs, a simple count will do (if the string is not empty) | |
if [ -z "$queuedbjs" ]; then | |
queuedbjsnum=0 | |
else | |
queuedbjsnum=$(echo "$queuedbjs" | wc -l) | |
fi | |
#grammar police! | |
if [ $queuedbjsnum -eq 1 ];then | |
queuedbjsnumjob=job | |
else | |
queuedbjsnumjob=jobs | |
fi | |
# total nodes currently in use (BJs and IJs) | |
totalnodesinuse=$(calc "$assignedijsnum" + "$runningbjsnum") | |
# determine node of given JobID (qstat -f) | |
if [ "$jflag" ]; then | |
for val in "${jval[@]}"; do | |
result=$(echo "$QSTATF" | sed -n "/$val.cvpost/,/exec_port/p" | grep exec_host | awk '{print $3}') | |
if [ -z $result ]; then | |
echo JobID $val cannot be found | |
else | |
echo JobID $val is on $result | |
fi | |
done | |
exit | |
fi | |
# determine reserver (if present) of given node (qstat -f) | |
if [ "$nflag" ]; then | |
#parse nval | |
if [[ $nval =~ cvpost00[1-9]|cvpost0[1-5][0-9]|cvpost06[0-4] ]]; then | |
var=$(echo "$QSTATF" | grep -B11 $nval | grep Job_Owner | awk '{print $3}') | |
var=${var%@*} | |
if [ -z $var ]; then | |
echo "$nval is not currently reserved" | |
else | |
echo "$nval is reserved by $var" | |
fi | |
else | |
echo "$nval is not a valid cluster node name" | |
exit | |
fi | |
fi | |
#no task-specific flags passed; print all info for all users (in descending order by node) | |
#including count of nodes reserved | |
if [[ ! "$cflag" && ! "$uflag" && ! "$jflag" && ! "$nflag" ]]; then | |
#which headline/output? print hours left or time elapsed? | |
if [ "$rflag" ]; then | |
# print hours left | |
headline2 | |
# now sort column | |
if [ "$sflag" ]; then | |
# sort by nodename | |
transform2 "$assigned" | sort -r -k1 | |
else | |
#sort by 9th column | |
transform2 "$assigned" | sort -n -k9 | |
fi | |
else | |
#print time elapsed | |
headline | |
# now sort column | |
if [ "$sflag" ]; then | |
# sort by nodename | |
transform "$assigned" | sort -r -k1 | |
else | |
#sort by 9th column | |
transform "$assigned" | sort -n -r -k9 | |
fi | |
fi | |
echo | |
echo ---------------------------------------- | |
if [ $assignedijsnum -eq 1 ]; then | |
# only 1 node reserved--unlikely but let's have proper grammar if so | |
echo "$assignedijsnum (out of $TOTALIJNODES) HPC node is currently reserved for interactive use." | |
else | |
# zero or >1 nodes reserved | |
echo "$assignedijsnum (out of $TOTALIJNODES) HPC nodes are currently reserved for interactive use." | |
fi | |
if [ $runningbjscount -eq 1 ]; then | |
echo $runningbjscount batch job is currently running on $runningbjsnum $runningbjsnumnode. | |
else | |
echo $runningbjscount batch jobs are currently running on $runningbjsnum $runningbjsnumnode. | |
fi | |
echo $TOTALBATCHONLY nodes are permanently reserved for batch jobs. | |
echo | |
echo Total nodes in use: $totalnodesinuse out of $TOTALNODES | |
echo ---------------------------------------- | |
if [ $requestedijsnum -eq 0 ]; then | |
echo There are no pending node reservations. | |
elif [ $requestedijsnum -eq 1 ]; then | |
echo There is $requestedijsnum node reservation. | |
echo "$requestedijs" | |
else | |
echo Reservations are currently pending for $requestedijsnum nodes: | |
echo "$requestedijs" | |
fi | |
if [ $queuedbjsnum -eq 0 ]; then | |
echo There are no batch jobs currently queued. | |
elif [ $queuedbjsnum -eq 1 ]; then | |
echo There is $queuedbjsnum batch job currently queued. | |
echo "$queuedbjs" | |
else | |
echo $queuedbjsnum jobs are currently in the batch-job queue: | |
echo "$queuedbjs" | |
fi | |
exit | |
fi | |
#count flag but not user flag, i.e., count all reserved nodes (qstat -a) | |
if [[ "$cflag" && ! "$uflag" ]]; then | |
if [ $requestedijsnum -eq 1 ]; then | |
# super unlikely that $assignedijsnum nodes will be zero, so not coding for it | |
# only 1 reservation pending | |
echo "$assignedijsnum (out of $TOTALIJNODES) nodes are currently reserved, with $requestedijsnum reservation pending." | |
else | |
# zero or > 1 reservation pending | |
echo "$assignedijsnum (out of $TOTALIJNODES) nodes are currently reserved, with $requestedijsnum reservations pending." | |
fi | |
echo $runningbjsnum $runningbjsnumnode $runningbjsnumnodeverb in use for batch jobs, with $queuedbjsnum $queuedbjsnumjob in the queue. | |
echo $TOTALBATCHONLY nodes are permanently reserved for batch jobs. | |
exit | |
fi | |
# user flag only (list reserved nodes by specified user) | |
# yes, it is plain that this should be reformulated not to redo the transform() function | |
if [ "$uflag" ]; then | |
if [ "$cflag" ]; then | |
echo $assignednum nodes are currently reserved, with $reservednum node reservations pending. | |
fi | |
for val in "${uval[@]}"; do | |
echo | |
assnodes=$(echo "$assigned" | grep $val) # gets ALL the nodes (including batch jobs) belonging to a user | |
# uses the built-in count of ONLY INTERACTIVE nodes per res to total reserved | |
assnodenum=$(echo "$assignedijs" | grep $val | awk '{s+=$6} END {print s}') | |
if [ ! -z $assnodenum ]; then | |
if [ "$rflag" ]; then | |
headline2 | |
else | |
headline | |
fi | |
fi | |
jid=$(echo "$assnodes" | awk '{print $1}') | |
count=1 | |
for id in $jid; do | |
nodeid=$(echo "$QSTATF" | sed -n "/$id/,/exec_port/p" | grep "exec_host" | awk '{print $3}') | |
if [ "$rflag" ]; then | |
echo "$assnodes" | awk 'FNR=='$count' { if ($9 == "--") { printf "'"${nodeid/\/0+cvpost0/+}"'\t%s\t%-8s\t%s\t%s\t%s\tBATCH JOB\t%s\tN/A\n", $1, $2, $5, $6, $7, $10 } else { printf "'"${nodeid/\/0+cvpost0/+}"'\t%s\t%-8s\t%s\t%s\t%s\t%s\t%s\t%s\n", $1, $2, $5, $6, $7, $9, $10, $9-$11 } }' | |
else | |
echo "$assnodes" | awk 'FNR=='$count' { if ($9 == "--") { printf "'"${nodeid/\/0+cvpost0/+}"'\t%s\t%-8s\t%s\t%s\t%s\tBATCH JOB\t%s\tBATCH JOB\n", $1, $2, $5, $6, $7, $10 } else { printf "'"${nodeid/\/0+cvpost0/+}"'\t%s\t%-8s\t%s\t%s\t%s\t%s\t%s\t%s\n", $1, $2, $5, $6, $7, $9, $10, $11 } }' | |
fi | |
count=$((count+1)) | |
done | |
if [ -z $assnodenum ]; then | |
echo "User $val has zero nodes reserved." | |
elif [ $assnodenum -eq 1 ]; then | |
echo "User $val has $assnodenum node reserved." | |
else | |
echo "User $val has $assnodenum nodes reserved." | |
fi | |
done | |
for val in "${uval[@]}"; do | |
echo | |
#altheadline | |
resnodes=$(echo "$reserved" | grep $val) # gets ALL the nodes belonging to a user | |
# uses the built-in count of nodes per res to total actual nodes reserved | |
resnodenum=$(echo "$reserved" | grep $val | awk '{s+=$6} END {print s}') | |
if [ ! -z $resnodenum ]; then | |
altheadline | |
fi | |
jid=$(echo "$resnodes" | awk '{print $1}') | |
count=1 | |
for id in $jid; do | |
nodeid=$(echo "$QSTATF" | sed -n "/$id/,/exec_port/p" | grep "exec_host" | awk '{print $3}') | |
if [ "$rflag" ]; then | |
echo "$resnodes" | awk 'FNR=='$count' { if ($9 == "--") { printf "%s\t%-8s\t%s\t%s\t%s\tBATCH JOB\t%s\tN/A\n", $1, $2, $5, $6, $7, $10 } else { printf "%s\t%-8s\t%s\t%s\t%s\t%s\t%s\t%s\n", $1, $2, $5, $6, $7, $9, $10, $9-$11 } }' | |
else | |
echo "$resnodes" | awk 'FNR=='$count' { if ($9 == "--") { printf "%s\t%-8s\t%s\t%s\t%s\tBATCH JOB\t%s\tBATCH JOB\n", $1, $2, $5, $6, $7, $10} else { printf "%s\t%-8s\t%s\t%s\t%s\t%s\t%s\t%s\n", $1, $2, $5, $6, $7, $9, $10, $11 } }' | |
fi | |
count=$((count+1)) | |
done | |
if [ -z $resnodenum ]; then | |
echo "User $val has zero reservations in the queue." | |
elif [ $resnodenum -eq 1 ]; then | |
echo "User $val has $resnodenum reservation in the queue." | |
else | |
echo "User $val has $resnodenum reservations in the queue." | |
fi | |
done | |
fi | |
} # END OF MAIN | |
main "$@" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment