Created
June 29, 2016 10:30
-
-
Save gehel/3fd17d61ac8eec0326113a33f04340da to your computer and use it in GitHub Desktop.
elasticsearch restart
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -e | |
es_server_prefix=elastic10 | |
es_server_suffix=.eqiad.wmnet | |
first_server_index=1 | |
nb_of_servers_in_cluster=47 | |
# used to keep track on which server this script has already been executed | |
# this enables this script to be mostly idempotent, so that it can be | |
# relaunched as is in case of error | |
# WARN: needs to be changed manually | |
execution_id=ZplljWNP9hNobookM8fUhFuivkAKm8w3mVlQawXBg5c4JxzuEPcJcLgtY8ms0Pg | |
for i in $(seq -w ${first_server_index} ${nb_of_servers_in_cluster}); do | |
hostname="${es_server_prefix}${i}" | |
server="${es_server_prefix}${i}${es_server_suffix}" | |
if ssh ${server} grep -q ${execution_id} /var/lib/elasticsearch/script_execution_id ; then | |
echo "restart already executed on ${hostname}, skipping..." | |
continue | |
fi | |
echo "disabling alerts for ${hostname}" | |
ssh neon.wikimedia.org sudo icinga-downtime -h ${hostname} -d 1800 -r "restarting for config change - ${USER}" | |
echo "disabling replication" | |
until ssh ${server} es-tool stop-replication | |
do | |
echo "failed to stop replication, trying again" | |
done | |
echo "ready to start restart ${hostname}" | |
# echo "rebooting ${hostname}" | |
# ssh neodymium.eqiad.wmnet sudo salt ${server} system.reboot | |
# For some reason, rebooting as above does not work, host never completes shutdown | |
echo "You can now reboot ${server}" | |
echo "Press [enter] when done" | |
read | |
echo "waiting for server to be up" | |
until ssh ${server} true &> /dev/null; do | |
echo -n . | |
sleep 1 | |
done | |
echo "server is up" | |
echo "waiting for elasticsearch to be started" | |
until ssh ${server} curl -s 127.0.0.1:9200/_cat/health; do | |
echo -n '.' | |
sleep 1 | |
done | |
echo "elasticsearch is started" | |
echo "enabling replication" | |
until ssh ${server} es-tool start-replication | |
do | |
echo "failed to start replication, trying again" | |
done | |
echo "waiting for cluster recovery" | |
ssh ${server} "until curl -s 127.0.0.1:9200/_cat/health | grep green; do echo -n .; sleep 10; done" | |
echo "cluster is green" | |
echo "creating file to keep track of script execution" | |
ssh ${server} "echo ${execution_id} | sudo tee /var/lib/elasticsearch/script_execution_id" | |
echo "Done for ${hostname}" | |
echo "==============================================" | |
done | |
echo "Cluster restart completed" | |
echo "Cleaning up..." | |
for i in $(seq -w ${first_server_index} ${nb_of_servers_in_cluster}); do | |
hostname="${es_server_prefix}${i}" | |
server="${es_server_prefix}${i}${es_server_suffix}" | |
ssh ${server} "sudo rm /var/lib/elasticsearch/script_execution_id" | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment