Skip to content

Instantly share code, notes, and snippets.

@galexrt
Last active March 2, 2018 09:58
Show Gist options
  • Save galexrt/f9c3f017e950487470c1b87caf44ab0f to your computer and use it in GitHub Desktop.
Save galexrt/f9c3f017e950487470c1b87caf44ab0f to your computer and use it in GitHub Desktop.
Improved version of the ceph reboot status check script from [rook/rook - Pull Request: Added ceph-reboot-script using the Container Linux Update Operator #1492](https://github.com/rook/rook/pull/1492).
#!/bin/bash
# preflightCheck checks for existence of "dependencies"
preflightCheck() {
if [ ! -f "/var/run/secrets/kubernetes.io/serviceaccount/token" ]; then
echo "$(date) | No Kubernetes ServiceAccount token found."
exit 1
fi
}
# updateNodeRebootAnnotation sets the `ceph-reboot-check` annotation to `true` on `$NODE`
updateNodeRebootAnnotation(){
export PATCH="[{ \"op\": \"add\", \"path\": \"/metadata/annotations/ceph-reboot-check\", \"value\": \"true\" }]"
KUBE_TOKEN="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
export KUBE_TOKEN
TRIES=0
until [ $TRIES -eq 10 ]; do
if curl -sSk \
--fail \
-XPATCH \
-H "Authorization: Bearer $KUBE_TOKEN" \
-H "Accept: application/json" \
-H "Content-Type:application/json-patch+json" \
--data "$PATCH" \
"https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_PORT_443_TCP_PORT/api/v1/nodes/$NODE"; then
echo "$(date) | Annotation \"ceph-reboot-check\" from node $NODE updated to \"true\". Reboot confirmed!"
return 0
else
echo "$(date) | Kubernetes API server connection error, will retry in 5 seconds..."
/bin/sleep 5
fi
done
return 1
}
# checkCephClusterHealth checks `ceph health` command for `HEALTH_OK`
checkCephClusterHealth(){
echo "$(date) | Running ceph health command"
if /usr/bin/ceph health | grep -q "HEALTH_OK"; then
echo "$(date) | Ceph cluster health is: OKAY"
return 0
fi
return 1
}
preflightCheck
echo "$(date) | Running the rook toolbox config initiation script..."
/usr/local/bin/toolbox.sh &
TRIES=0
until [ -f /etc/ceph/ceph.conf ]; do
[ $TRIES -eq 10 ] && { echo "$(date) | No Ceph config found after 10 tries. Exiting ..."; exit 1; }
echo "$(date) | Waiting for Ceph config (try $TRIES from 10) ..."
(( TRIES++ ))
sleep 3
done
while true; do
if checkCephClusterHealth; then
if updateNodeRebootAnnotation; then
while true; do
echo "$(date) | Waiting for $NODE to reboot ..."
/bin/sleep 30
done
exit 0
else
echo "$(date) | Failed updating annotation for $NODE. Exiting."
exit 1
fi
fi
echo "$(date) | Ceph cluster Health not HEALTH_OK currently. Checking again in 20 seconds ..."
/bin/sleep 20
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment