Last active
July 2, 2019 00:00
-
-
Save robskillington/8de62dbe631a1c5b81b6401204d96d77 to your computer and use it in GitHub Desktop.
m3db-nodes-remove.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo "Example usage: KV_ZONE=embedded KV_ENV=default_env COORD_HTTP_HOST=\"host1\" COORD_HTTP_PORT=\"7201\" HOSTS=\"host1 host2 host3\" ISOLATION_GROUP=\"group\" ./m3dbops-nodes-remove.sh" | |
if [ "$HOSTS" = "" ]; then | |
echo "must set HOSTS" | |
exit 1 | |
fi | |
if [ "$ISOLATION_GROUP" = "" ]; then | |
echo "must set ISOLATION_GROUP" | |
exit 1 | |
fi | |
if [ "$KV_ZONE" = "" ]; then | |
echo "must set KV_ZONE" | |
exit 1 | |
fi | |
if [ "$KV_ENV" = "" ]; then | |
echo "must set KV_ENV" | |
exit 1 | |
fi | |
if [ "$COORD_HTTP_HOST" = "" ]; then | |
echo "must set COORD_HTTP_HOST" | |
exit 1 | |
fi | |
if [ "$COORD_HTTP_PORT" = "" ]; then | |
echo "must set COORD_HTTP_PORT" | |
exit 1 | |
fi | |
# Can override the node HTTP port with DBNODE_RPC_PORT | |
NODE_RPC_PORT=${DBNODE_HTTP_PORT:-9000} | |
# Can override the node HTTP port with DBNODE_HTTP_PORT | |
NODE_HTTP_PORT=${DBNODE_HTTP_PORT:-9002} | |
# Use following file for placement locally | |
placement_file="placement-${KV_ENV}-${KV_ZONE}.json" | |
set -ex | |
LOGFILE=$(mktemp) | |
echo "Logging verbose output: ${LOGFILE}" | |
export LOGFILE | |
function log() | |
{ | |
echo "[$(date --rfc-3339=seconds)]: $*" >> "${LOGFILE}" | |
} | |
export -f log | |
function log_stdout() | |
{ | |
echo "[$(date --rfc-3339=seconds)]: $*" | |
log "$*" # duplicate to make the log easier to follow | |
} | |
export -f log_stdout | |
function health_check_m3dbnode() | |
{ | |
local HOST=$1 | |
if [ "$HOST" = "" ]; then | |
log "invalid hostname: $HOST" | |
echo "BAD" | |
exit 1 | |
fi | |
local HEALTH=$(curl -s "http://$HOST:$NODE_HTTP_PORT/health" 2>/dev/null) | |
if [ "$HEALTH" = "" ]; then | |
log "${HOST}: not responding to health check" | |
echo "BAD" | |
return 0 | |
fi | |
local BOOTSTRAPPED=$(echo $HEALTH | jq .bootstrapped) | |
if [ "$BOOTSTRAPPED" != "true" ]; then | |
log "${HOST}: health check responding not bootstrapped" | |
echo "BAD" | |
return 0 | |
fi | |
log "${HOST}: health check responding bootstrapped" | |
echo "OK" | |
} | |
export -f health_check_m3dbnode | |
function get_placement_file() | |
{ | |
curl -s -H "Cluster-Zone-Name: ${KV_ZONE}" \ | |
-H "Cluster-Environment-Name: ${KV_ENV}" \ | |
http://$COORD_HTTP_HOST:$COORD_HTTP_PORT/api/v1/services/m3db/placement > ${placement_file} | |
} | |
export -f placement | |
function all_hosts() | |
{ | |
get_placement_file | |
cat <(cat ${placement_file} | jq -r '.placement.instances | keys[]') <(echo $HOSTS | tr ' ' '\n') | sort | uniq | |
} | |
export -f all_hosts | |
function health_check_cluster() | |
{ | |
# paranoia | |
local num_cluster_hosts=$(all_hosts | wc -l) | |
if [ "$num_cluster_hosts" -lt 10 ]; then | |
log "invalid num cluster hosts: $num_cluster_hosts" | |
echo "BAD" | |
exit 1 | |
fi | |
# If it's not the case that all shards are available, don't continue. | |
get_placement_file | |
local shards_avail | |
shards_avail=$(jq '.instances | to_entries | map(.value.shards | map(.state)) | flatten | sort | unique == ["AVAILABLE"]' < "${placement_file}") | |
if [[ "$shards_avail" != "true" ]]; then | |
log "observed non-AVAILABLE shard states" | |
echo "BAD" | |
return 0 | |
fi | |
num_ok=$(all_hosts | xargs -I{} -P 30 bash -c 'health_check_m3dbnode {}' | grep OK | wc -l) | |
if [ "$num_ok" -eq "$num_cluster_hosts" ]; then | |
log "all ${num_cluster_hosts} returned healthy status" | |
echo "OK" | |
return 0 | |
fi | |
log "${num_ok} of ${num_cluster_hosts} are healthy, waiting till all return healthy" | |
echo "BAD" | |
} | |
export -f health_check_cluster | |
for host in $(echo $HOSTS | tr " " "\n"); do | |
log_stdout "###################" | |
log_stdout "removing $host" | |
log_stdout "ensuring cluster is healthy before removing" | |
while true; do | |
STATUS=$(health_check_cluster) | |
if [ "$STATUS" = "OK" ]; then | |
break | |
fi | |
sleep 30 | |
done | |
log_stdout "removing $host from placement" | |
curl -X DELETE http://${COORD_HTTP_HOST}:${COORD_HTTP_PORT}/api/v1/services/m3db/placement/${host} | |
log_stdout "ensuring cluster is healthy before continuing" | |
while true; do | |
STATUS=$(health_check_cluster) | |
if [ "$STATUS" = "OK" ]; then | |
break | |
fi | |
sleep 30 | |
done | |
log_stdout "$host removed, and cluster is back to being healthy." | |
SLEEP_SEC=600 | |
log_stdout "sleeping for ${SLEEP_SEC}s before continuing" | |
sleep ${SLEEP_SEC} | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment