Created
April 19, 2019 23:16
-
-
Save mrballcb/061c81ca929b22c09650ea095ed5c9f2 to your computer and use it in GitHub Desktop.
kops 2X IG rolling-update (from ReactiveOps medium.com article)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
I don't advise to just blindly copy and paste. Try things one line at a time. | |
Make sure you understand what they are supposed to be doing. | |
0. # Set versions | |
kops get cluster | |
CLUSTER=full_cluster_name | |
kops edit cluster --name $CLUSTER # set kube version | |
kops get ig --name $CLUSTER | |
kops edit ig --name $CLUSTER first_master # if updating ami | |
kops edit ig --name $CLUSTER second_master # if updating ami | |
kops edit ig --name $CLUSTER third_master # if updating ami | |
kops edit ig --name $CLUSTER nodes # if updating ami | |
kops edit ig --name $CLUSTER recorder # if exists and updating ami | |
kops update cluster --name $CLUSTER --create-kube-config=false --yes | |
# Useful commands, CTX is the kubectl context to look at | |
## Node status, kube-system pods | |
CTX=apac; watch -n 1 "kubectl get nodes --context $CTX; echo; kubectl get pods -n kube-system --context $CTX -o wide" | |
## Pods in any state other than "Running" (shows status of rolling updates) | |
CTX=apac; watch -n 1 "kubectl get pods --all-namespaces --context $CTX -o wide | grep -v NAME | grep -v Running" | |
## Cassandra specific watch, is part of a global cluster | |
CTX=apac; watch -n 1 kubectl get pods -n cassandra --context $CTX -o wide | |
1. # kops normal rolling update of masters | |
MASTERS=$(kops get ig --name $CLUSTER | grep master | awk '{print $1}') | |
MASTERS=$(echo ${MASTERS} | tr ' ' ',') | |
kops rolling-update cluster --name $CLUSTER --instance-group $MASTERS --yes | |
2. # Scale down cluster autoscaler | |
kubectl get deployment -n kube-system --context $CLUSTER cluster-autoscaler \ | |
-o yaml > cluster_autoscaler_normal.yaml | |
kubectl get deployment -n kube-system --context $CLUSTER cluster-autoscaler \ | |
-o json | \ | |
jq ".spec += {\"replicas\":0}" | \ | |
yq r - > cluster_autoscaler_zero.yaml | |
# Somehow validate yaml | |
kubectl apply -n kube-system --context $CLUSTER -f cluster_autoscaler_zero.yaml | |
sleep 15 | |
3. # Get list of nodes that we are replacing | |
OLD_NODES=$(kubectl get nodes --context $CLUSTER -l kubernetes.io/role=node -o name | sed -e 's|nodes/||g') | |
4. # Double the nodes instance group | |
NODE_COUNT=$(kubectl get nodes --context $CLUSTER -l kubernetes.io/role=node | grep -v NAME | wc -l ) | |
DOUBLE=$(( $NODE_COUNT * 2 )) | |
kops get ig nodes --name $CLUSTER -o yaml > nodes_ig_orig.yaml | |
kops get ig nodes --name $CLUSTER -o json | \ | |
jq ".spec += {\"minSize\":$DOUBLE,\"maxSize\":$DOUBLE}" | \ | |
yq r - > nodes_ig_x2.yaml | |
# Somehow validate yaml | |
kops replace --name $CLUSTER -f nodes_ig_x2.yaml | |
kops update cluster --name $CLUSTER --create-kube-config=false --yes | |
5. ## Check that new nodes are Healthy in the ELB's!! | |
6. # Cordon all the nodes we're replacing | |
for J in $OLD_NODES; do | |
kubectl cordon --context $CLUSTER $J | |
done | |
7. # Drain the old nodes to the new nodes | |
SLEEP=15 | |
for J in $OLD_NODES; do | |
kubectl drain --ignore-daemonsets --delete-local-data --force --context $CLUSTER $J | |
FAILS=$(kubectl get pods --all-namespaces --context $CLUSTER | \ | |
grep -v NAME | grep -v Running | awk '{print $2}') | |
COUNT=20 | |
while [ "$FAILS" != "" -a $COUNT -gt 0 ]; do | |
echo $(( COUNT-- )) > /dev/null | |
echo "There were $(echo $FAILS | wc -w | awk '{print $1}') non-Running processes, $COUNT tries left" | |
echo "Sleeping $SLEEP seconds" | |
sleep $SLEEP | |
FAILS=$(kubectl get pods --all-namespaces --context $CLUSTER | \ | |
grep -v NAME | grep -v Running | awk '{print $2}') | |
done | |
if [ $COUNT -eq 0 ]; then | |
echo "Stopping" | |
#exit 1 | |
else | |
echo "Completed $J, moving to next node" | |
fi | |
done | |
8. # Re-enable auto-scaler | |
kops replace --name $CLUSTER -f nodes_ig_orig.yaml | |
kops update cluster --name $CLUSTER --create-kube-config=false --yes | |
# Clean config of some things so can import it | |
cat cluster_autoscaler_normal.yaml | yq r - -j | \ | |
jq "del(.status , .metadata.uid , .metadata.resourceVersion , .metadata.creationTimestamp)" \ | |
> cluster_autoscaler_new.yaml | |
kubectl apply -n kube-system --context $CLUSTER -f cluster_autoscaler_new.yaml |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's the article referenced above: https://medium.com/@reactiveops/the-reactiveops-bestest-kubernetes-cluster-upgrade-f7a7589b21fb