Last active
December 14, 2015 16:40
-
-
Save stephen-soltesz/c6fac242079ac1b153a8 to your computer and use it in GitHub Desktop.
Run with: ./fetch.py --user iupui_ndt --nodelist ../plsync/good.ndt.txt --threadcount=40 --timeout 300 --script check_ndt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
( | |
parent=$( pstree -a -A -p | grep ndtd | head -1 | tr ',' ' ' | awk '{print $2}' ) | |
ps ax -opid,args \ | |
| grep ndtd \ | |
| grep -v grep \ | |
| awk 'BEGIN { | |
start_time_index = 22 ; | |
utime_index = 14 ; | |
stime_index = 15 ; | |
reset = 0; | |
# Read the start_time of the awk process. | |
# All older processes will have an earlier (smaller) start_time. | |
"cat /proc/self/stat" |& getline result ; split(result, proc) ; | |
awk_start = proc[start_time_index] ; | |
} | |
{ | |
# Read the start_time of each NDT process. | |
sprintf("cat /proc/%d/stat", $1) |& getline result ; split(result, proc) ; | |
cpu_time = proc[utime_index] + proc[stime_index] ; | |
t_diff = awk_start - proc[start_time_index] ; | |
# Report all processes that have run for more than 300 seconds. | |
# This will always apply to the parent ndtd, so exclude PIDs that match the parent. | |
# /proc/<pid>/stat starttime increments sysconf(_SC_CLK_TCK) every | |
# second. This is 100 on M-Lab (and most systems); | |
# So, 5 minutes will be 5*60 seconds == 30000 ticks. | |
if ('$parent' != $1) { | |
if (t_diff > 30000 && cpu_time > 600 ) { | |
print "ppid", '$parent', "cpu", cpu_time, "tdiff", t_diff, result ; | |
} | |
} | |
}' | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I then generate a simple report from the logs using: