Skip to content

Instantly share code, notes, and snippets.

@naquad
Created July 3, 2012 09:34
Show Gist options
  • Save naquad/3038752 to your computer and use it in GitHub Desktop.
Save naquad/3038752 to your computer and use it in GitHub Desktop.
#!/bin/bash
DELAY=5
MIN_USAGE=20
MAX_USAGE=95
WAIT_FOR=10
MAX_RESTARTS_PER_SECOND=5
warn(){
echo "$@" >&2
}
child_pcpu(){
ps -e o pid,ppid,pcpu,comm hk +start_time | awk -v"target=$1" '
BEGIN{
sum = 0;
found = 0;
seen_sleep = 0;
ppids[target] = 1;
}
$2 in ppids{
if(seen_sleep == 0 && $4 == "sleep"){
seen_sleep = 1;
}else{
sum += $3;
++found;
}
ppids[$1] = 1;
}
END{
if(found)
print int(sum);
}'
}
run_cmd(){
local restarts=0
local last_restart=
while true
do
"$@"
local restart="`date +%s`"
[ -n "$last_restart" -a $((restart - last_restart)) -gt 1 ] && restarts="0"
last_restart="$restart"
[ "$((++restarts))" -gt "$MAX_RESTARTS_PER_SECOND" ] && {
warn "$1 exiting too fast. Waiting $DELAY seconds..."
sleep "$DELAY"
}
warn "$1 exited. Restarting..."
done
}
stop_child(){
kill $1 `ps -e o pid,ppid hk +start_time | awk -v"target=$1" '
BEGIN{
ppids[target] = 1;
}
$2 in ppids{
print $1;
ppids[$1] = 1;
}
'`
}
while true
do
run_cmd "$@" &
WATCHDOG_PID="$!"
trap "stop_child '$WATCHDOG_PID'; wait '$WATCHDOG_PID' &>/dev/null;exit 1" QUIT TERM INT
WAITING=0
while true
do
sleep 1
LOAD="`child_pcpu "$WATCHDOG_PID"`"
if [ -z "$LOAD" ] || [ "$LOAD" -ge "$MIN_USAGE" -a "$LOAD" -le "$MAX_USAGE" ]
then
WAITING=0
continue
fi
[ "$((++WAITING))" -ge "$WAIT_FOR" ] && {
warn "Child has to low processor usage. Considreing it idle. Restarting..."
stop_child "$WATCHDOG_PID"
wait "$WATCHDOG_PID" &>/dev/null
WAITING=0
break
}
done
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment