Created
December 8, 2021 23:15
-
-
Save blockpane/17392df025bc89245518870e5cd62413 to your computer and use it in GitHub Desktop.
Tendermint Monit Checks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# used by monit, place in: /etc/monit/scripts/no-peers.sh | |
# as simple as it gets. Additional upside is it's easy to see what ports are used in m/monit | |
/usr/bin/ss -lntp | grep "${1}" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# used by monit, place in: /etc/monit/scripts/no-peers.sh | |
# checks if node has no peers via prometheus | |
PROMETHEUS=http://${1} | |
NUM_PEERS=$(curl -s ${PROMETHEUS}/stats |grep 'p2p_peers{' |awk '{print $NF}') | |
# if we can't connect don't alarm, that is done in another check. | |
[ -z $NUM_PEERS ] && exit 0 | |
if [ $NUM_PEERS -eq 0 ] ; then | |
echo NO PEERS ARE CONNECTED | |
exit 1 | |
fi | |
echo $NUM_PEERS peers are connected. | |
exit 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Replace aaa with the appropriate daemon name. | |
# place in: /etc/monit/conf.d/tendermint | |
# This check ensures the daemon is running, it also allows using m/monit to remotely | |
# stop / start / restart the service | |
check process aaa matching aaa | |
start program = "/usr/bin/systemctl start aaa.service" | |
restart program = "/usr/bin/systemctl restart aaa.service" | |
stop program = "/usr/bin/systemctl stop aaa.service" | |
# Alerts if node falls behind | |
check host aaa-syncing with address 127.0.0.1 | |
if failed | |
port 26657 protocol http | |
request /status with content = '"catching_up": false' | |
then alert | |
depends on aaa | |
# Ensure the daemon has ports listening, this can happen if cosmovisor locks up, or | |
# is performing a backup at upgrade time. | |
check program aaa-listening with path "/bin/bash /etc/monit/scripts/listening.sh aaa" | |
if status != 0 then alert | |
IF status != 0 FOR 3 CYCLES THEN UNMONITOR | |
depends on aaa | |
# Uses the prometheus endpoint to watch for connected peers, if no peers are present after | |
# 3 cycles (I use 3 minutes,) it will restart. | |
check program aaa-peers with path "/bin/bash /etc/monit/scripts/no-peers.sh 127.0.0.1:26660" | |
restart program = "/usr/bin/systemctl restart aaa.service" | |
if status != 0 then alert | |
IF status != 0 FOR 3 CYCLES THEN RESTART | |
depends on aaa |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment