Created
June 6, 2019 16:29
-
-
Save ncracker/52207c5091f8f06ea82df7a5a54b3de5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
############################################################################### | |
# datadog-agent | |
# | |
# Inspired by Boxed Ice <[email protected]> | |
# Forked by Datadog, Inc. <[email protected]> | |
# | |
# Licensed under Simplified BSD License (see LICENSE) | |
# | |
############################################################################### | |
# | |
# chkconfig: 345 99 01 | |
# description: Datadog Monitoring Agent | |
### BEGIN INIT INFO | |
# Provides: datadog-agent | |
# Short-Description: Start and start datadog-agent | |
# Description: datadog-agent is the monitoring agent component for Datadog | |
# Required-Start: $remote_fs $network | |
# Required-Stop: $remote_fs $network | |
# Default-Start: 2 3 4 5 | |
# Default-Stop: 0 1 6 | |
### END INIT INFO | |
AGENTPATH="/opt/datadog-agent/agent/agent.py" | |
AGENTCONF="/etc/dd-agent/datadog.conf" | |
AGENTSYSCONFIG="/etc/sysconfig/datadog-agent" | |
DOGSTATSDPATH="/opt/datadog-agent/agent/dogstatsd.py" | |
KILL_PATH="/opt/datadog-agent/embedded/bin/kill" | |
AGENTUSER="dd-agent" | |
PIDPATH="/var/run/dd-agent/" | |
PROG="datadog-agent" | |
LOCKFILE=/var/lock/subsys/$PROG | |
FORWARDERPATH="/opt/datadog-agent/agent/ddagent.py" | |
TRACEAGENTPATH="/opt/datadog-agent/bin/trace-agent" | |
SUPERVISORD_PATH="PATH=/opt/datadog-agent/embedded/bin:/opt/datadog-agent/bin:$PATH /opt/datadog-agent/bin/supervisord" | |
SUPERVISORCTL_PATH="/opt/datadog-agent/bin/supervisorctl" | |
SUPERVISOR_CONF="/etc/dd-agent/supervisor.conf" | |
SUPERVISOR_SOCK="/opt/datadog-agent/run/datadog-supervisor.sock" | |
SUPERVISOR_PIDFILE="/opt/datadog-agent/run/datadog-supervisord.pid" | |
COLLECTOR_PIDFILE="/opt/datadog-agent/run/dd-agent.pid" | |
# Captures processes in the supervisord group that are 'optional' i.e. not essential for the agent to run | |
DD_OPT_PROC_REGEX="dogstatsd|jmxfetch|go-metro|trace-agent|process-agent" | |
# Source function library. | |
. /etc/rc.d/init.d/functions | |
# pull in sysconfig settings | |
[ -f $AGENTSYSCONFIG ] && . $AGENTSYSCONFIG | |
check_status() { | |
# run checks to determine if the service is running or use generic status | |
QUERY="all" | |
if [ -n "$1" ] && [ "$1" != "all" ] && [ "$1" != "essential" ] && [ "$1" != "optional" ]; then | |
echo "Valid arguments for $0: all | essential | optional" | |
elif [ -n "$1" ]; then | |
QUERY=$1 | |
fi | |
ESSENTIAL=0 | |
OPTIONAL=0 | |
FAIL=1 | |
# if the sock exists, we can use supervisorctl | |
if [ -e $SUPERVISOR_SOCK ]; then | |
s=`$SUPERVISORCTL_PATH -c $SUPERVISOR_CONF status` | |
# Number of RUNNING supervisord programs (ignoring optional procs captured by $DD_OPT_PROC_REGEX) | |
p=`echo "$s" | grep -Ev $DD_OPT_PROC_REGEX | grep -c RUNNING` | |
# Number of non-failed OPTIONAL supervisord programs | |
p_o=`echo "$s" | grep -E $DD_OPT_PROC_REGEX | grep -cv FATAL` | |
: | |
# Number of expected running supervisord programs (ignoring optional procs) | |
c=`grep -Ev $DD_OPT_PROC_REGEX $SUPERVISOR_CONF | grep -c '\[program:'` | |
# Number of expected optional supervisord programs | |
c_o=`grep -E $DD_OPT_PROC_REGEX $SUPERVISOR_CONF | grep -c '\[program:'` | |
if [ "$p" -ne "$c" ]; then | |
ESSENTIAL=1 | |
fi | |
if [ "$p_o" -ne "$c_o" ]; then | |
OPTIONAL=1 | |
fi | |
if [ "$QUERY" = "essential" ] && [ "$ESSENTIAL" -eq 0 ]; then | |
FAIL=0 | |
elif [ "$QUERY" = "optional" ] && [ "$OPTIONAL" -eq 0 ]; then | |
FAIL=0 | |
elif [ "$QUERY" = "all" ] && [ "$ESSENTIAL" = "$OPTIONAL" ] && [ $ESSENTIAL -eq 0 ]; then | |
FAIL=0 | |
fi | |
if [ "$FAIL" -gt 0 ]; then | |
echo "$s" | |
echo -n "Datadog Agent (supervisor) is NOT running all child processes"; failure; echo | |
return 1 | |
else | |
echo -n "Datadog Agent (supervisor) is running all child processes"; success; echo | |
return 0 | |
fi | |
else | |
# if no sock, use the rc status function | |
status -p $SUPERVISOR_PIDFILE $PROG | |
RETVAL=$? | |
if [ $RETVAL -eq 0 ]; then | |
echo -n "Datadog Agent (supervisor) is running."; success; echo | |
else | |
echo -n "Datadog Agent (supervisor) is NOT running."; failure; echo | |
fi | |
return $RETVAL | |
fi | |
if [ -f "$LOCKFILE" ]; then | |
echo -n 'Datadog Agent is running'; success; echo | |
return 0 | |
else | |
echo -n 'Datadog Agent is NOT running'; failure; echo | |
return 1 | |
fi | |
} | |
grab_status() { | |
GRABSTATUS=`check_status essential &>/dev/null` | |
} | |
start() { | |
if [ ! -f $AGENTCONF ]; then | |
echo "$AGENTCONF not found. Exiting." | |
exit 3 | |
fi | |
if [ "$DATADOG_ENABLED" = "no" ]; then | |
echo "Disabled via $AGENTSYSCONFIG. Exiting." | |
exit 0 | |
fi | |
su $AGENTUSER -c "$AGENTPATH configcheck" > /dev/null 2>&1 | |
if [ $? -ne 0 ]; then | |
echo -n $'\n'"Invalid check configuration. Please run sudo /etc/init.d/datadog-agent configtest for more details." | |
echo -n $'\n'"Resuming starting process."$'\n' | |
fi | |
# no need to test for status before daemon, | |
# the daemon function does the right thing | |
echo -n "Starting Datadog Agent (using supervisord):" | |
daemon --pidfile=$SUPERVISOR_PIDFILE $SUPERVISORD_PATH -c $SUPERVISOR_CONF > /dev/null | |
# check if the agent is running once per second for 10 seconds | |
retries=10 | |
while [ $retries -gt 1 ]; do | |
if grab_status; then | |
touch $LOCKFILE | |
success; echo | |
return 0 | |
else | |
retries=$(($retries - 1)) | |
sleep 1 | |
fi | |
done | |
# after 10 tries the agent didn't start. Report the error and stop | |
echo; check_status # check status will show us the error and take care of calling `failure` | |
stop | |
return 1 | |
} | |
stop() { | |
# no need to test for status before killproc, | |
# it does the right thing. and testing supervisorctl status | |
# before killproc can lead to states where you cannot stop! | |
echo -n 'Stopping Datadog Agent (using killproc on supervisord): ' | |
killproc -p $SUPERVISOR_PIDFILE -d 30 | |
rm -f $LOCKFILE | |
echo | |
return 0 | |
} | |
restart() { | |
stop | |
start | |
} | |
info() { | |
shift # Shift 'info' out of the args so we can pass any | |
# additional options to the real command | |
# (right now only dd-agent supports additional flags) | |
su $AGENTUSER -c "$AGENTPATH info $@" | |
COLLECTOR_RETURN=$? | |
su $AGENTUSER -c "$DOGSTATSDPATH info" | |
DOGSTATSD_RETURN=$? | |
su $AGENTUSER -c "$FORWARDERPATH info" | |
FORWARDER_RETURN=$? | |
su $AGENTUSER -c "$TRACEAGENTPATH -info" | |
TRACEAGENT_RETURN=$? # not used for now, Trace/APM not a core feature yet | |
exit $(($FORWARDER_RETURN+$COLLECTOR_RETURN+$DOGSTATSD_RETURN)) | |
} | |
reload() { | |
$KILL_PATH -HUP `cat $COLLECTOR_PIDFILE` | |
exit $? | |
} | |
configcheck() { | |
su $AGENTUSER -c "$AGENTPATH configcheck" | |
exit $? | |
} | |
case "$1" in | |
start) | |
start | |
;; | |
stop) | |
stop | |
;; | |
restart) | |
restart | |
;; | |
status) | |
# Note: sh does not support arrays | |
# Check for kernel version 3.18+ - overlayfs has known bug affecting unix domain sockets | |
major=$(echo "$( uname -r )" | cut -d"." -f1) | |
minor=$(echo "$( uname -r )" | cut -d"." -f2) | |
# If major version 3, and minor version 18+, OR major version 4+ | |
if ( [ $major -eq 3 ] && [ $minor -ge 18 ] ) || [ $major -gt 3 ]; then | |
RED='\033[0;31m' # Red Text | |
NC='\033[0m' # No Color | |
echo "${RED}Warning: Known bug in Linux Kernel 3.18+ causes 'status' to fail.${NC}" | |
echo "Calling 'info', instead..." | |
service datadog-agent info | |
if [ $? -ne 0 ]; then | |
exit 1 | |
fi | |
else | |
check_status | |
fi | |
;; | |
info) | |
info "$@" | |
;; | |
reload) | |
reload | |
;; | |
configcheck) | |
configcheck | |
;; | |
configtest) | |
configcheck | |
;; | |
jmx) | |
shift | |
su $AGENTUSER -c "$AGENTPATH jmx $@" | |
exit $? | |
;; | |
flare) | |
shift | |
$AGENTPATH flare $@ | |
exit $? | |
;; | |
*) | |
echo "Usage: $0 {start|stop|restart|info|status|reload|configcheck|configtest|jmx}" | |
exit 2 | |
esac | |
exit $? |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment