Skip to content

Instantly share code, notes, and snippets.

@ncracker
Created June 6, 2019 16:29
Show Gist options
  • Save ncracker/52207c5091f8f06ea82df7a5a54b3de5 to your computer and use it in GitHub Desktop.
Save ncracker/52207c5091f8f06ea82df7a5a54b3de5 to your computer and use it in GitHub Desktop.
#!/bin/sh
###############################################################################
# datadog-agent
#
# Inspired by Boxed Ice <[email protected]>
# Forked by Datadog, Inc. <[email protected]>
#
# Licensed under Simplified BSD License (see LICENSE)
#
###############################################################################
#
# chkconfig: 345 99 01
# description: Datadog Monitoring Agent
### BEGIN INIT INFO
# Provides: datadog-agent
# Short-Description: Start and start datadog-agent
# Description: datadog-agent is the monitoring agent component for Datadog
# Required-Start: $remote_fs $network
# Required-Stop: $remote_fs $network
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
### END INIT INFO
AGENTPATH="/opt/datadog-agent/agent/agent.py"
AGENTCONF="/etc/dd-agent/datadog.conf"
AGENTSYSCONFIG="/etc/sysconfig/datadog-agent"
DOGSTATSDPATH="/opt/datadog-agent/agent/dogstatsd.py"
KILL_PATH="/opt/datadog-agent/embedded/bin/kill"
AGENTUSER="dd-agent"
PIDPATH="/var/run/dd-agent/"
PROG="datadog-agent"
LOCKFILE=/var/lock/subsys/$PROG
FORWARDERPATH="/opt/datadog-agent/agent/ddagent.py"
TRACEAGENTPATH="/opt/datadog-agent/bin/trace-agent"
SUPERVISORD_PATH="PATH=/opt/datadog-agent/embedded/bin:/opt/datadog-agent/bin:$PATH /opt/datadog-agent/bin/supervisord"
SUPERVISORCTL_PATH="/opt/datadog-agent/bin/supervisorctl"
SUPERVISOR_CONF="/etc/dd-agent/supervisor.conf"
SUPERVISOR_SOCK="/opt/datadog-agent/run/datadog-supervisor.sock"
SUPERVISOR_PIDFILE="/opt/datadog-agent/run/datadog-supervisord.pid"
COLLECTOR_PIDFILE="/opt/datadog-agent/run/dd-agent.pid"
# Captures processes in the supervisord group that are 'optional' i.e. not essential for the agent to run
DD_OPT_PROC_REGEX="dogstatsd|jmxfetch|go-metro|trace-agent|process-agent"
# Source function library.
. /etc/rc.d/init.d/functions
# pull in sysconfig settings
[ -f $AGENTSYSCONFIG ] && . $AGENTSYSCONFIG
check_status() {
# run checks to determine if the service is running or use generic status
QUERY="all"
if [ -n "$1" ] && [ "$1" != "all" ] && [ "$1" != "essential" ] && [ "$1" != "optional" ]; then
echo "Valid arguments for $0: all | essential | optional"
elif [ -n "$1" ]; then
QUERY=$1
fi
ESSENTIAL=0
OPTIONAL=0
FAIL=1
# if the sock exists, we can use supervisorctl
if [ -e $SUPERVISOR_SOCK ]; then
s=`$SUPERVISORCTL_PATH -c $SUPERVISOR_CONF status`
# Number of RUNNING supervisord programs (ignoring optional procs captured by $DD_OPT_PROC_REGEX)
p=`echo "$s" | grep -Ev $DD_OPT_PROC_REGEX | grep -c RUNNING`
# Number of non-failed OPTIONAL supervisord programs
p_o=`echo "$s" | grep -E $DD_OPT_PROC_REGEX | grep -cv FATAL`
:
# Number of expected running supervisord programs (ignoring optional procs)
c=`grep -Ev $DD_OPT_PROC_REGEX $SUPERVISOR_CONF | grep -c '\[program:'`
# Number of expected optional supervisord programs
c_o=`grep -E $DD_OPT_PROC_REGEX $SUPERVISOR_CONF | grep -c '\[program:'`
if [ "$p" -ne "$c" ]; then
ESSENTIAL=1
fi
if [ "$p_o" -ne "$c_o" ]; then
OPTIONAL=1
fi
if [ "$QUERY" = "essential" ] && [ "$ESSENTIAL" -eq 0 ]; then
FAIL=0
elif [ "$QUERY" = "optional" ] && [ "$OPTIONAL" -eq 0 ]; then
FAIL=0
elif [ "$QUERY" = "all" ] && [ "$ESSENTIAL" = "$OPTIONAL" ] && [ $ESSENTIAL -eq 0 ]; then
FAIL=0
fi
if [ "$FAIL" -gt 0 ]; then
echo "$s"
echo -n "Datadog Agent (supervisor) is NOT running all child processes"; failure; echo
return 1
else
echo -n "Datadog Agent (supervisor) is running all child processes"; success; echo
return 0
fi
else
# if no sock, use the rc status function
status -p $SUPERVISOR_PIDFILE $PROG
RETVAL=$?
if [ $RETVAL -eq 0 ]; then
echo -n "Datadog Agent (supervisor) is running."; success; echo
else
echo -n "Datadog Agent (supervisor) is NOT running."; failure; echo
fi
return $RETVAL
fi
if [ -f "$LOCKFILE" ]; then
echo -n 'Datadog Agent is running'; success; echo
return 0
else
echo -n 'Datadog Agent is NOT running'; failure; echo
return 1
fi
}
grab_status() {
GRABSTATUS=`check_status essential &>/dev/null`
}
start() {
if [ ! -f $AGENTCONF ]; then
echo "$AGENTCONF not found. Exiting."
exit 3
fi
if [ "$DATADOG_ENABLED" = "no" ]; then
echo "Disabled via $AGENTSYSCONFIG. Exiting."
exit 0
fi
su $AGENTUSER -c "$AGENTPATH configcheck" > /dev/null 2>&1
if [ $? -ne 0 ]; then
echo -n $'\n'"Invalid check configuration. Please run sudo /etc/init.d/datadog-agent configtest for more details."
echo -n $'\n'"Resuming starting process."$'\n'
fi
# no need to test for status before daemon,
# the daemon function does the right thing
echo -n "Starting Datadog Agent (using supervisord):"
daemon --pidfile=$SUPERVISOR_PIDFILE $SUPERVISORD_PATH -c $SUPERVISOR_CONF > /dev/null
# check if the agent is running once per second for 10 seconds
retries=10
while [ $retries -gt 1 ]; do
if grab_status; then
touch $LOCKFILE
success; echo
return 0
else
retries=$(($retries - 1))
sleep 1
fi
done
# after 10 tries the agent didn't start. Report the error and stop
echo; check_status # check status will show us the error and take care of calling `failure`
stop
return 1
}
stop() {
# no need to test for status before killproc,
# it does the right thing. and testing supervisorctl status
# before killproc can lead to states where you cannot stop!
echo -n 'Stopping Datadog Agent (using killproc on supervisord): '
killproc -p $SUPERVISOR_PIDFILE -d 30
rm -f $LOCKFILE
echo
return 0
}
restart() {
stop
start
}
info() {
shift # Shift 'info' out of the args so we can pass any
# additional options to the real command
# (right now only dd-agent supports additional flags)
su $AGENTUSER -c "$AGENTPATH info $@"
COLLECTOR_RETURN=$?
su $AGENTUSER -c "$DOGSTATSDPATH info"
DOGSTATSD_RETURN=$?
su $AGENTUSER -c "$FORWARDERPATH info"
FORWARDER_RETURN=$?
su $AGENTUSER -c "$TRACEAGENTPATH -info"
TRACEAGENT_RETURN=$? # not used for now, Trace/APM not a core feature yet
exit $(($FORWARDER_RETURN+$COLLECTOR_RETURN+$DOGSTATSD_RETURN))
}
reload() {
$KILL_PATH -HUP `cat $COLLECTOR_PIDFILE`
exit $?
}
configcheck() {
su $AGENTUSER -c "$AGENTPATH configcheck"
exit $?
}
case "$1" in
start)
start
;;
stop)
stop
;;
restart)
restart
;;
status)
# Note: sh does not support arrays
# Check for kernel version 3.18+ - overlayfs has known bug affecting unix domain sockets
major=$(echo "$( uname -r )" | cut -d"." -f1)
minor=$(echo "$( uname -r )" | cut -d"." -f2)
# If major version 3, and minor version 18+, OR major version 4+
if ( [ $major -eq 3 ] && [ $minor -ge 18 ] ) || [ $major -gt 3 ]; then
RED='\033[0;31m' # Red Text
NC='\033[0m' # No Color
echo "${RED}Warning: Known bug in Linux Kernel 3.18+ causes 'status' to fail.${NC}"
echo "Calling 'info', instead..."
service datadog-agent info
if [ $? -ne 0 ]; then
exit 1
fi
else
check_status
fi
;;
info)
info "$@"
;;
reload)
reload
;;
configcheck)
configcheck
;;
configtest)
configcheck
;;
jmx)
shift
su $AGENTUSER -c "$AGENTPATH jmx $@"
exit $?
;;
flare)
shift
$AGENTPATH flare $@
exit $?
;;
*)
echo "Usage: $0 {start|stop|restart|info|status|reload|configcheck|configtest|jmx}"
exit 2
esac
exit $?
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment