Created
July 16, 2020 01:38
-
-
Save yteraoka/cb91a7c38724c302ded7a58dc91fde25 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
termination_policy="OldestLaunchTemplate OldestInstance" | |
info(){ | |
echo -e "\033[32m$(date +%Y-%m-%dT%H:%M:%S) $*\033[m" | |
} | |
usage(){ | |
if [ $# -ge 1 ] ; then | |
echo $* | |
echo | |
fi | |
echo "Usage: $0 ip-111-222-333-444.ap-northeast-1.compute.internal" 1>&2 | |
} | |
nodename=$1 | |
echo $nodename | grep -q '^ip-' | |
if [ $? -ne 0 ] ; then | |
usage | |
exit 1 | |
fi | |
kubectl get node "${nodename}" | |
if [ $? -ne 0 ] ; then | |
usage "Unknown node: ${nodename}" | |
exit 1 | |
fi | |
# aws:///ap-northeast-1a/i-0fc00637cc970e6f5 | |
provider_id=$(kubectl get node -o json | jq -r ".items[] | select(.metadata.name == \"$nodename\") | .spec.providerID") | |
# i-0fc00637cc970e6f5 | |
instance_id=$(basename $provider_id) | |
asgname=$(aws --output json ec2 describe-instances --instance-ids ${instance_id} | jq -r '.Reservations[0].Instances[0].Tags[] | select(.Key == "aws:autoscaling:groupName") | .Value') | |
info "Autoscaling group name: ${asgname}" | |
tmpfile1=$(mktemp) | |
echo $tmpfile1 | |
trap "rm -f $tmpfile1" EXIT | |
aws --output json autoscaling describe-auto-scaling-groups --auto-scaling-group-names ${asgname} > $tmpfile1 | |
orig_asgmax=$(cat $tmpfile1 | jq -r '.AutoScalingGroups[0].MaxSize') | |
orig_asgdesired=$(cat $tmpfile1 | jq -r '.AutoScalingGroups[0].DesiredCapacity') | |
orig_termination_policy=$(cat $tmpfile1 | jq -r '.AutoScalingGroups[0].TerminationPolicies | @csv' | sed -e 's/"//g' -e 's/,/ /g') | |
old_instances=$(cat $tmpfile1 | jq -r '.AutoScalingGroups[0].Instances[] | .InstanceId') | |
info "asgmax: ${orig_asgmax}" | |
info "asgdesired: ${orig_asgdesired}" | |
info "termination_policy: ${orig_termination_policy}" | |
info asg_instances: ${old_instances} | |
new_asgdesired=$(( $orig_asgdesired * 2 )) | |
if [ ${orig_asgmax} -lt ${new_asgdesired} ] ; then | |
new_asgmax=${new_asgdesired} | |
else | |
new_asgmax=${orig_asgmax} | |
fi | |
info "Searching ${instance_id} in target groups" | |
target_group_arns="" | |
for target_group_arn in $(aws --output json elbv2 describe-target-groups --no-paginate | jq -r '.TargetGroups[].TargetGroupArn'); do | |
state=$(aws --output json elbv2 describe-target-health --target-group-arn $target_group_arn | jq -r ".TargetHealthDescriptions[] | select(.Target.Id == \"$instance_id\") | .TargetHealth.State") | |
if [ -n "$state" ] ; then | |
info "${instance_id} registerd in ${target_group_arn}, state: ${state}" | |
target_group_arns="${target_group_arns} ${target_group_arn}" | |
fi | |
done | |
# Increase number of instances | |
info "Updating ASG: TerminationPolicy=${termination_policy}, maxSize: ${new_asgmax}, desired: ${new_asgdesired}" | |
aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${asgname} \ | |
--termination-policies ${termination_policy} \ | |
--max-size ${new_asgmax} \ | |
--desired-capacity ${new_asgdesired} | |
info "Waiting for all instances getting ready" | |
while :; do | |
aws --output json autoscaling describe-auto-scaling-groups --auto-scaling-group-names ${asgname} \ | |
| jq -M -c '.AutoScalingGroups[0].Instances[] | [.InstanceId, .InstanceType, .LifecycleState, .HealthStatus]' > $tmpfile1 | |
num_ready_nodes=$(cat $tmpfile1 | grep -c '"InService","Healthy"') | |
if [ $num_ready_nodes -eq $new_asgdesired ] ; then | |
echo | |
info "All instances is ready" | |
cat $tmpfile1 | |
break | |
fi | |
echo -n . | |
sleep 5 | |
done | |
info "Waiting for all kubernetes nodes getting ready" | |
kubectl get node | |
while :; do | |
not_ready_nodes=$(kubectl get node | grep -v ^NAME | grep -v ' Ready ' | wc -l) | |
if [ $not_ready_nodes -eq 0 ] ; then | |
break | |
fi | |
done | |
kubectl get node | |
info "Drain and delete all old instances" | |
for instance_id in ${old_instances}; do | |
for target_group_arn in $target_group_arns; do | |
info "Deregistering ${instance_id} from target group ${target_group_arn}" | |
aws elbv2 deregister-targets --target-group-arn $target_group_arn --targets Id=${instance_id} | |
info "Waiting to be finished draining (300 seconds)" | |
while :; do | |
state=$(aws --output json elbv2 describe-target-health --target-group-arn $target_group_arn | jq -r ".TargetHealthDescriptions[] | select(.Target.Id == \"$instance_id\") | .TargetHealth.State") | |
if [ -z "$state" ] ; then | |
break | |
else | |
echo $state | |
fi | |
sleep 30 | |
done | |
done | |
nodename=$(aws --output json ec2 describe-instances --instance-ids ${instance_id} | jq -r '.Reservations[0].Instances[0].PrivateDnsName') | |
info "Draining $nodename (${instance_id})" | |
kubectl drain $nodename --ignore-daemonsets --delete-local-data | |
info "Deleting $nodename (${instance_id}) from EKS cluster" | |
kubectl delete node $nodename | |
done | |
# Drain された Pod が正常起動するのを待つ | |
# 元々ダメなやつがいるかもしれないので sleep するだけにしておく | |
sleep 60 | |
info "kubectl get pods -A | grep -v Running" | |
kubectl get pods -A | grep -v Running | |
# revert autoscaling group changes | |
info "Updating ASG: TerminationPolicy=${orig_termination_policy}, maxSize: ${orig_asgmax}, desired: ${orig_asgdesired}" | |
aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${asgname} \ | |
--termination-policies ${orig_termination_policy} \ | |
--max-size ${orig_asgmax} \ | |
--desired-capacity ${orig_asgdesired} | |
info "Do not wait terminate instance completed" | |
aws --output json autoscaling describe-auto-scaling-groups --auto-scaling-group-names ${asgname} \ | |
| jq -M -c '.AutoScalingGroups[0].Instances[] | [.InstanceId, .InstanceType, .LifecycleState, .HealthStatus]' | |
info "Try following command to check termination process" | |
echo | |
cat <<EOF | |
aws --output json autoscaling describe-auto-scaling-groups --auto-scaling-group-names ${asgname} \ | |
| jq -M -c '.AutoScalingGroups[0].Instances[] | [.InstanceId, .InstanceType, .LifecycleState, .HealthStatus]' | |
EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment