Created
February 24, 2015 16:48
-
-
Save earthgecko/5588dc17c8ebe2a7c082 to your computer and use it in GitHub Desktop.
skyline horizon watcher
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#### snype_skyline_horizon.sh #### | |
# | |
#### | |
# DESCRIPTION | |
# This mitigates against horizon running, but not populating redis and horizon | |
# not running properly in general. | |
# | |
# THIS IS JUST A GIST | |
# THIS IS JUST A GIST | |
# Deal with logging and variables such as SERVER, statsd_node and if you have | |
# monit in the mix, etc yourself and your own skyline log paths etc these are | |
# are just bits for reference for a pattern to run via cron every minute | |
# Example script log is just flushed here on every run | |
LOGFILE=/var/log/skyline/snype_skyline_horizon.log | |
> $LOGFILE | |
function log () { | |
local log_string="$1" | |
local output_type="$2" | |
echo "$(date +%Y%m%d%H%M%S) - $output_type: $log_string" >> $LOGFILE | |
} | |
sleep 30 | |
HORIZON_RESTART=0 | |
if [ ! -f /var/log/skyline/horizon.log ]; then | |
CHECK_PROCS=$(ps aux | grep -v grep | grep -c "horizon-agent.py start") | |
if [ $CHECK_PROCS -eq 0 ]; then | |
log "No horizon processes running" error | |
HORIZON_RESTART=1 | |
else | |
log "horizon processes running, but no logfile, restarting horizon" error | |
HORIZON_RESTART=1 | |
fi | |
fi | |
if [ $HORIZON_RESTART -eq 0 ]; then | |
if [ $HORIZON_RESTART -eq 0 ]; then | |
LOGFILE_EPOCH=$(stat --format=%Y /var/log/skyline/horizon.log) | |
SECONDS_DIFFERENCE=$(( $TIMESTAMP - $LOGFILE_EPOCH )) | |
if [ $SECONDS_DIFFERENCE -gt 180 ]; then | |
log "The horizon log was last modified $SECONDS_DIFFERENCE seconds ago" warn | |
log "horizon set to restart" notice | |
HORIZON_RESTART=1 | |
fi | |
fi | |
fi | |
if [ $HORIZON_RESTART -eq 0 ]; then | |
# This mitigates against horizon running, but not populating redis | |
EMPTY_QUEUE=$(tail -n 30 /var/log/skyline/horizon.log | grep -c "worker queue is empty and timed out") | |
if [ $EMPTY_QUEUE -gt 3 ]; then | |
log "The horizon log reports worker queue is empty and timed out" warn | |
tail -n 30 /var/log/skyline/horizon.log | grep "worker queue is empty and timed out" >> $LOGFILE | |
log "horizon set to restart" notice | |
HORIZON_RESTART=1 | |
fi | |
fi | |
if [ $HORIZON_RESTART -eq 1 ]; then | |
# Mitigate monit race condition | |
log "Stopping monit" notice | |
/sbin/service monit stop >> $LOGFILE | |
log "Restarting horizon" notice | |
/etc/init.d/horizon stop >> $LOGFILE | |
sleep 10 | |
/etc/init.d/horizon start >> $LOGFILE | |
log "Starting monit" notice | |
/sbin/service monit start >> $LOGFILE | |
echo "$HOSTNAME.skyline.horizon.restarted:$TIMESTAMP|g"| nc -w 5 -u $statsd_node 8125 | |
log "Submitted $HOSTNAME.skyline.horizon.restarted:$TIMESTAMP|g to $statsd_node" notice | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment