mrballcb · April 19, 2019 23:16 · bracki · Apr 21, 2019
diff --git a/rolling-updates-2x-IG.txt b/rolling-updates-2x-IG.txt
 I don't advise to just blindly copy and paste.  Try things one line at a time.
 Make sure you understand what they are supposed to be doing.

 0. # Set versions
   kops get cluster
   CLUSTER=full_cluster_name
   kops edit cluster --name $CLUSTER           # set kube version
   kops get ig --name $CLUSTER
   kops edit ig --name $CLUSTER first_master   # if updating ami
   kops edit ig --name $CLUSTER second_master  # if updating ami
   kops edit ig --name $CLUSTER third_master   # if updating ami
   kops edit ig --name $CLUSTER nodes          # if updating ami
   kops edit ig --name $CLUSTER recorder       # if exists and updating ami
   kops update cluster --name $CLUSTER --create-kube-config=false --yes

   # Useful commands, CTX is the kubectl context to look at
   ## Node status, kube-system pods
   CTX=apac; watch -n 1 "kubectl get nodes --context $CTX; echo; kubectl get pods -n kube-system --context $CTX -o wide"
   ## Pods in any state other than "Running" (shows status of rolling updates)
   CTX=apac; watch -n 1 "kubectl get pods --all-namespaces --context $CTX -o wide | grep -v NAME | grep -v Running"
   ## Cassandra specific watch, is part of a global cluster
   CTX=apac; watch -n 1 kubectl get pods -n cassandra --context $CTX -o wide

 1. # kops normal rolling update of masters
   MASTERS=$(kops get ig --name $CLUSTER | grep master | awk '{print $1}')
   MASTERS=$(echo ${MASTERS} | tr ' ' ',')
   kops rolling-update cluster --name $CLUSTER --instance-group $MASTERS --yes

 2. # Scale down cluster autoscaler
   kubectl get deployment -n kube-system --context $CLUSTER cluster-autoscaler \
     -o yaml > cluster_autoscaler_normal.yaml
   kubectl get deployment -n kube-system --context $CLUSTER cluster-autoscaler \
     -o json | \
     jq ".spec += {\"replicas\":0}" | \
     yq r - > cluster_autoscaler_zero.yaml
   # Somehow validate yaml
   kubectl apply -n kube-system --context $CLUSTER -f cluster_autoscaler_zero.yaml
   sleep 15

 3. # Get list of nodes that we are replacing
   OLD_NODES=$(kubectl get nodes --context $CLUSTER -l kubernetes.io/role=node -o name | sed -e 's|nodes/||g')

 4. # Double the nodes instance group
   NODE_COUNT=$(kubectl get nodes --context $CLUSTER -l kubernetes.io/role=node | grep -v NAME | wc -l )
   DOUBLE=$(( $NODE_COUNT * 2 ))
   kops get ig nodes --name $CLUSTER -o yaml > nodes_ig_orig.yaml
   kops get ig nodes --name $CLUSTER -o json | \
     jq ".spec += {\"minSize\":$DOUBLE,\"maxSize\":$DOUBLE}" | \
     yq r - > nodes_ig_x2.yaml
   # Somehow validate yaml
   kops replace --name $CLUSTER -f nodes_ig_x2.yaml
   kops update cluster --name $CLUSTER --create-kube-config=false --yes

 5. ## Check that new nodes are Healthy in the ELB's!!

 6. # Cordon all the nodes we're replacing
   for J in $OLD_NODES; do
     kubectl cordon --context $CLUSTER $J
   done

 7. # Drain the old nodes to the new nodes
   SLEEP=15
   for J in $OLD_NODES; do
     kubectl drain --ignore-daemonsets --delete-local-data --force --context $CLUSTER $J
     FAILS=$(kubectl get pods --all-namespaces --context $CLUSTER | \
             grep -v NAME | grep -v Running | awk '{print $2}')
     COUNT=20
     while [ "$FAILS" != "" -a $COUNT -gt 0 ]; do
       echo $(( COUNT-- )) > /dev/null
       echo "There were $(echo $FAILS | wc -w | awk '{print $1}') non-Running processes, $COUNT tries left"
       echo "Sleeping $SLEEP seconds"
       sleep $SLEEP
       FAILS=$(kubectl get pods --all-namespaces --context $CLUSTER | \
               grep -v NAME | grep -v Running | awk '{print $2}')
     done
     if [ $COUNT -eq 0 ]; then
       echo "Stopping"
       #exit 1
     else
       echo "Completed $J, moving to next node"
     fi
   done

 8. # Re-enable auto-scaler
   kops replace --name $CLUSTER -f nodes_ig_orig.yaml
   kops update cluster --name $CLUSTER --create-kube-config=false --yes
   # Clean config of some things so can import it
   cat cluster_autoscaler_normal.yaml | yq r - -j | \
     jq "del(.status , .metadata.uid , .metadata.resourceVersion , .metadata.creationTimestamp)" \
     > cluster_autoscaler_new.yaml
   kubectl apply -n kube-system --context $CLUSTER -f cluster_autoscaler_new.yaml
	I don't advise to just blindly copy and paste. Try things one line at a time.
	Make sure you understand what they are supposed to be doing.

	0. # Set versions
	kops get cluster
	CLUSTER=full_cluster_name
	kops edit cluster --name $CLUSTER # set kube version
	kops get ig --name $CLUSTER
	kops edit ig --name $CLUSTER first_master # if updating ami
	kops edit ig --name $CLUSTER second_master # if updating ami
	kops edit ig --name $CLUSTER third_master # if updating ami
	kops edit ig --name $CLUSTER nodes # if updating ami
	kops edit ig --name $CLUSTER recorder # if exists and updating ami
	kops update cluster --name $CLUSTER --create-kube-config=false --yes

	# Useful commands, CTX is the kubectl context to look at
	## Node status, kube-system pods
	CTX=apac; watch -n 1 "kubectl get nodes --context $CTX; echo; kubectl get pods -n kube-system --context $CTX -o wide"
	## Pods in any state other than "Running" (shows status of rolling updates)
	CTX=apac; watch -n 1 "kubectl get pods --all-namespaces --context $CTX -o wide \| grep -v NAME \| grep -v Running"
	## Cassandra specific watch, is part of a global cluster
	CTX=apac; watch -n 1 kubectl get pods -n cassandra --context $CTX -o wide

	1. # kops normal rolling update of masters
	MASTERS=$(kops get ig --name $CLUSTER \| grep master \| awk '{print $1}')
	MASTERS=$(echo ${MASTERS} \| tr ' ' ',')
	kops rolling-update cluster --name $CLUSTER --instance-group $MASTERS --yes

	2. # Scale down cluster autoscaler
	kubectl get deployment -n kube-system --context $CLUSTER cluster-autoscaler \
	-o yaml > cluster_autoscaler_normal.yaml
	kubectl get deployment -n kube-system --context $CLUSTER cluster-autoscaler \
	-o json \| \
	jq ".spec += {\"replicas\":0}" \| \
	yq r - > cluster_autoscaler_zero.yaml
	# Somehow validate yaml
	kubectl apply -n kube-system --context $CLUSTER -f cluster_autoscaler_zero.yaml
	sleep 15

	3. # Get list of nodes that we are replacing
	OLD_NODES=$(kubectl get nodes --context $CLUSTER -l kubernetes.io/role=node -o name \| sed -e 's\|nodes/\|\|g')

	4. # Double the nodes instance group
	NODE_COUNT=$(kubectl get nodes --context $CLUSTER -l kubernetes.io/role=node \| grep -v NAME \| wc -l )
	DOUBLE=$(( $NODE_COUNT * 2 ))
	kops get ig nodes --name $CLUSTER -o yaml > nodes_ig_orig.yaml
	kops get ig nodes --name $CLUSTER -o json \| \
	jq ".spec += {\"minSize\":$DOUBLE,\"maxSize\":$DOUBLE}" \| \
	yq r - > nodes_ig_x2.yaml
	# Somehow validate yaml
	kops replace --name $CLUSTER -f nodes_ig_x2.yaml
	kops update cluster --name $CLUSTER --create-kube-config=false --yes

	5. ## Check that new nodes are Healthy in the ELB's!!

	6. # Cordon all the nodes we're replacing
	for J in $OLD_NODES; do
	kubectl cordon --context $CLUSTER $J
	done

	7. # Drain the old nodes to the new nodes
	SLEEP=15
	for J in $OLD_NODES; do
	kubectl drain --ignore-daemonsets --delete-local-data --force --context $CLUSTER $J
	FAILS=$(kubectl get pods --all-namespaces --context $CLUSTER \| \
	grep -v NAME \| grep -v Running \| awk '{print $2}')
	COUNT=20
	while [ "$FAILS" != "" -a $COUNT -gt 0 ]; do
	echo $(( COUNT-- )) > /dev/null
	echo "There were $(echo $FAILS \| wc -w \| awk '{print $1}') non-Running processes, $COUNT tries left"
	echo "Sleeping $SLEEP seconds"
	sleep $SLEEP
	FAILS=$(kubectl get pods --all-namespaces --context $CLUSTER \| \
	grep -v NAME \| grep -v Running \| awk '{print $2}')
	done
	if [ $COUNT -eq 0 ]; then
	echo "Stopping"
	#exit 1
	else
	echo "Completed $J, moving to next node"
	fi
	done

	8. # Re-enable auto-scaler
	kops replace --name $CLUSTER -f nodes_ig_orig.yaml
	kops update cluster --name $CLUSTER --create-kube-config=false --yes
	# Clean config of some things so can import it
	cat cluster_autoscaler_normal.yaml \| yq r - -j \| \
	jq "del(.status , .metadata.uid , .metadata.resourceVersion , .metadata.creationTimestamp)" \
	> cluster_autoscaler_new.yaml
	kubectl apply -n kube-system --context $CLUSTER -f cluster_autoscaler_new.yaml