Last active
March 2, 2018 09:58
-
-
Save galexrt/f9c3f017e950487470c1b87caf44ab0f to your computer and use it in GitHub Desktop.
Improved version of the ceph reboot status check script from [rook/rook - Pull Request: Added ceph-reboot-script using the Container Linux Update Operator #1492](https://github.com/rook/rook/pull/1492).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# preflightCheck checks for existence of "dependencies" | |
preflightCheck() { | |
if [ ! -f "/var/run/secrets/kubernetes.io/serviceaccount/token" ]; then | |
echo "$(date) | No Kubernetes ServiceAccount token found." | |
exit 1 | |
fi | |
} | |
# updateNodeRebootAnnotation sets the `ceph-reboot-check` annotation to `true` on `$NODE` | |
updateNodeRebootAnnotation(){ | |
export PATCH="[{ \"op\": \"add\", \"path\": \"/metadata/annotations/ceph-reboot-check\", \"value\": \"true\" }]" | |
KUBE_TOKEN="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" | |
export KUBE_TOKEN | |
TRIES=0 | |
until [ $TRIES -eq 10 ]; do | |
if curl -sSk \ | |
--fail \ | |
-XPATCH \ | |
-H "Authorization: Bearer $KUBE_TOKEN" \ | |
-H "Accept: application/json" \ | |
-H "Content-Type:application/json-patch+json" \ | |
--data "$PATCH" \ | |
"https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_PORT_443_TCP_PORT/api/v1/nodes/$NODE"; then | |
echo "$(date) | Annotation \"ceph-reboot-check\" from node $NODE updated to \"true\". Reboot confirmed!" | |
return 0 | |
else | |
echo "$(date) | Kubernetes API server connection error, will retry in 5 seconds..." | |
/bin/sleep 5 | |
fi | |
done | |
return 1 | |
} | |
# checkCephClusterHealth checks `ceph health` command for `HEALTH_OK` | |
checkCephClusterHealth(){ | |
echo "$(date) | Running ceph health command" | |
if /usr/bin/ceph health | grep -q "HEALTH_OK"; then | |
echo "$(date) | Ceph cluster health is: OKAY" | |
return 0 | |
fi | |
return 1 | |
} | |
preflightCheck | |
echo "$(date) | Running the rook toolbox config initiation script..." | |
/usr/local/bin/toolbox.sh & | |
TRIES=0 | |
until [ -f /etc/ceph/ceph.conf ]; do | |
[ $TRIES -eq 10 ] && { echo "$(date) | No Ceph config found after 10 tries. Exiting ..."; exit 1; } | |
echo "$(date) | Waiting for Ceph config (try $TRIES from 10) ..." | |
(( TRIES++ )) | |
sleep 3 | |
done | |
while true; do | |
if checkCephClusterHealth; then | |
if updateNodeRebootAnnotation; then | |
while true; do | |
echo "$(date) | Waiting for $NODE to reboot ..." | |
/bin/sleep 30 | |
done | |
exit 0 | |
else | |
echo "$(date) | Failed updating annotation for $NODE. Exiting." | |
exit 1 | |
fi | |
fi | |
echo "$(date) | Ceph cluster Health not HEALTH_OK currently. Checking again in 20 seconds ..." | |
/bin/sleep 20 | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment