Last active
July 17, 2018 19:25
-
-
Save pbostrom/fe8cb8e7a27f53d687b2110d06cd0567 to your computer and use it in GitHub Desktop.
Monitor your gaiad validator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
aws cloudwatch put-metric-data --metric-name UnhealthyValidator\ | |
--value $1 --namespace "Cosmos" --region us-east-1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
block_age() { | |
bt=$(date -d "$1" +%s) | |
now=$(date +%s) | |
echo $(( $now - $bt )) | |
} | |
jq --help > /dev/null | |
if [ $? -ne 0 ]; then | |
echo "jq not installed. Please use your favorite package manager to install jq. More info at https://stedolan.github.io/jq/download/" | |
exit 1 | |
fi | |
curl --help > /dev/null | |
if [ $? -ne 0 ]; then | |
echo "curl not installed. Please use your favorite package manager to install curl. More info at https://curl.haxx.se/download.html" | |
exit 1 | |
fi | |
curl -s http://localhost:26657/status > /dev/null 2>&1 | |
if [ $? -ne 0 ]; then | |
echo "gaiad RPC unavailable" | |
/usr/bin/cloudwatch_health.sh 1 | |
exit 1 | |
fi | |
addr=$(curl -s http://localhost:26657/status | jq -r ".result.validator_info.address") | |
lbh=$(curl -s http://localhost:26657/status | jq -r ".result.sync_info.latest_block_height") | |
lbt=$(curl -s http://localhost:26657/status | jq -r ".result.sync_info.latest_block_time") | |
ba=$(block_age $lbt) | |
echo "Latest block height: $lbh" | |
echo "Latest block age: $ba seconds" | |
echo "Validator address: $addr" | |
threshold=90 | |
precommit=$(curl -s http://localhost:26657/block?height=$lbh | jq -r ".result.block.last_commit.precommits | .[] | select(.validator_address==\"$addr\")") | |
if (( $ba > $threshold )); then | |
msg="Latest block age is over $threshold seconds; Validator or network has halted" | |
error=true | |
elif [ -z "$precommit" ]; then | |
msg="Validator not active; latest block does not contain a precommit from this validator" | |
error=true | |
fi | |
if [ $error ]; then | |
# configure an alert here: email, SNS, PagerDuty, etc. | |
echo "$msg" | |
/usr/bin/cloudwatch_health.sh 1 | |
exit 1 | |
fi | |
echo "Validator active" | |
/usr/bin/cloudwatch_health.sh 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment