tomasaschan · August 30, 2022 17:45 · bradwilson · Aug 6, 2019 · tomasaschan · Aug 9, 2019
diff --git a/aks-restart-nodes.sh b/aks-restart-nodes.sh
 #!/bin/bash

 set -e

 resourceGroupDefault='<set your default here, to avoid having to specify in the common case>'
 resourceGroup=${RESOURCE_GROUP:-$resourceGroupDefault}
 clusterNameDefault='<set your default here>'
 clusterName=${CLUSTER_NAME:-$clusterNameDefault}
 regionDefault='<set your default here>'
 region=${REGION:-$regionDefault}
 force=false
 dryrun=false
 nodes=''

 function print_usage() {
  echo "Usage: $0 [<options>]"
  echo ""
  echo "-n|--node <node>                    The name of a node to restart."
  echo "                                    By default, a rolling restart of all nodes"
  echo "                                    is performed."
  echo ""
  echo "--resource-group <group-name>       The resource group of the cluster."
  echo "                                    Can also be set by RESOURCE_GROUP"
  echo "                                    Default: $resourceGroupDefault"
  echo ""
  echo "--cluster-name <cluster-name>       The name of the cluster."
  echo "                                    Can also be set by CLUSTER_NAME"
  echo "                                    Default: $clusterNameDefault"
  echo ""
  echo "--region <azure-region>             The Azure region in which the cluster is."
  echo "                                    Can also be set by REGION"
  echo "                                    Default: $regionDefault"
  echo ""
  echo "-f|--force                          Restart node(s) without first draining."
  echo "                                    Useful if draining a node fails."
  echo ""
  echo "-d|--dry-run                        Just print what to do; don't actually do it"
  echo ""
  echo "-h|--help                           Print usage and exit."
 }

 while [[ $# -gt 0 ]]
 do
  key="$1"

  case $key in
    -n|--node)
    node="$2"
    shift
    shift
    ;;
    --resource-group)
    resourceGroup="$2"
    shift
    shift
    ;;
    --cluster-name)
    clusterName="$2"
    shift
    shift
    ;;
    --region)
    region="$2"
    shift
    shift
    ;;
    -f|--force)
    force=true
    shift
    ;;
    --dry-run)
    dryrun=true
    shift
    ;;
    -h|--help)
    print_usage
    exit 0
    ;;
    *)
    print_usage
    exit 1
    ;;
  esac
 done

 group="MC_${resourceGroup}_${clusterName}_$region"

 function wait_for_status() {
  node=$1
  reason=$2
  i=0
  while [[ $i -lt 30 ]]; do
    status=$(kubectl get node $node -o "jsonpath={.status.conditions[?(.reason==\"$reason\")].type}")
    if [[ "$status" == "Ready" ]]; then
      echo "$reason after $((i*2)) seconds"
      break;
    else
      sleep 2s
      i=$(($i+1))
    fi
  done
  if [[ $i == 30 ]]; then
    echo "Error: Did not reach $reason state within 1 minute"
    exit 1
  fi
 }

 if [ -z "$node" ]; then
  nodes=$(kubectl get nodes -o jsonpath={.items[*].metadata.name})
 else
  nodes="$node"
 fi

 for node in $nodes; do
  if $force; then
    echo "WARNING: --force specified, restarting node $node without draining first"
    if $dryrun; then
      echo "kubectl cordon $node"
    else
      kubectl cordon "$node"
    fi
  else
    echo "Draining $node..."
    if $dryrun; then
      echo "kubectl drain $node --ignore-daemonsets --delete-local-data"
    else
      kubectl drain "$node" --ignore-daemonsets --delete-local-data
    fi
  fi

  echo "Initiating VM restart for $node..."
  if $dryrun; then
    echo "az vm restart --resource-group $group --name $node"
  else
    az vm restart --resource-group "$group" --name "$node"
  fi

  if ! $dryrun; then
    echo "Waiting for $node to start back up..."
    wait_for_status $node KubeletNotReady
    wait_for_status $node KubeletReady
  fi

  echo "Re-enabling $node for scheduling"

  if $dryrun; then
    echo "kubectl uncordon $node"
  else
    kubectl uncordon "$node"
  fi
 done
	#!/bin/bash

	set -e

	resourceGroupDefault='<set your default here, to avoid having to specify in the common case>'
	resourceGroup=${RESOURCE_GROUP:-$resourceGroupDefault}
	clusterNameDefault='<set your default here>'
	clusterName=${CLUSTER_NAME:-$clusterNameDefault}
	regionDefault='<set your default here>'
	region=${REGION:-$regionDefault}
	force=false
	dryrun=false
	nodes=''

	function print_usage() {
	echo "Usage: $0 [<options>]"
	echo ""
	echo "-n\|--node <node> The name of a node to restart."
	echo " By default, a rolling restart of all nodes"
	echo " is performed."
	echo ""
	echo "--resource-group <group-name> The resource group of the cluster."
	echo " Can also be set by RESOURCE_GROUP"
	echo " Default: $resourceGroupDefault"
	echo ""
	echo "--cluster-name <cluster-name> The name of the cluster."
	echo " Can also be set by CLUSTER_NAME"
	echo " Default: $clusterNameDefault"
	echo ""
	echo "--region <azure-region> The Azure region in which the cluster is."
	echo " Can also be set by REGION"
	echo " Default: $regionDefault"
	echo ""
	echo "-f\|--force Restart node(s) without first draining."
	echo " Useful if draining a node fails."
	echo ""
	echo "-d\|--dry-run Just print what to do; don't actually do it"
	echo ""
	echo "-h\|--help Print usage and exit."
	}

	while [[ $# -gt 0 ]]
	do
	key="$1"

	case $key in
	-n\|--node)
	node="$2"
	shift
	shift
	;;
	--resource-group)
	resourceGroup="$2"
	shift
	shift
	;;
	--cluster-name)
	clusterName="$2"
	shift
	shift
	;;
	--region)
	region="$2"
	shift
	shift
	;;
	-f\|--force)
	force=true
	shift
	;;
	--dry-run)
	dryrun=true
	shift
	;;
	-h\|--help)
	print_usage
	exit 0
	;;
	*)
	print_usage
	exit 1
	;;
	esac
	done

	group="MC_${resourceGroup}_${clusterName}_$region"

	function wait_for_status() {
	node=$1
	reason=$2
	i=0
	while [[ $i -lt 30 ]]; do
	status=$(kubectl get node $node -o "jsonpath={.status.conditions[?(.reason==\"$reason\")].type}")
	if [[ "$status" == "Ready" ]]; then
	echo "$reason after $((i*2)) seconds"
	break;
	else
	sleep 2s
	i=$(($i+1))
	fi
	done
	if [[ $i == 30 ]]; then
	echo "Error: Did not reach $reason state within 1 minute"
	exit 1
	fi
	}

	if [ -z "$node" ]; then
	nodes=$(kubectl get nodes -o jsonpath={.items[*].metadata.name})
	else
	nodes="$node"
	fi

	for node in $nodes; do
	if $force; then
	echo "WARNING: --force specified, restarting node $node without draining first"
	if $dryrun; then
	echo "kubectl cordon $node"
	else
	kubectl cordon "$node"
	fi
	else
	echo "Draining $node..."
	if $dryrun; then
	echo "kubectl drain $node --ignore-daemonsets --delete-local-data"
	else
	kubectl drain "$node" --ignore-daemonsets --delete-local-data
	fi
	fi

	echo "Initiating VM restart for $node..."
	if $dryrun; then
	echo "az vm restart --resource-group $group --name $node"
	else
	az vm restart --resource-group "$group" --name "$node"
	fi

	if ! $dryrun; then
	echo "Waiting for $node to start back up..."
	wait_for_status $node KubeletNotReady
	wait_for_status $node KubeletReady
	fi

	echo "Re-enabling $node for scheduling"

	if $dryrun; then
	echo "kubectl uncordon $node"
	else
	kubectl uncordon "$node"
	fi
	done
No results found