Last active
April 16, 2023 19:09
-
-
Save EvanBalster/87d0ac9153587c4dce6d260ee49bd64d to your computer and use it in GitHub Desktop.
WIP: A Google Cloud startup-script to automatically revive preemptible compute instances.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# GCloud startup script to auto-restart any instances with 'revive' tag. | |
# The calling machine must have Read/Write access to compute API!! | |
# I use this to reboot preemptible instances. | |
# Output is logged to /tmp/revive.log | |
indent() { sed 's/^/ /'; } | |
revive_instances() { | |
# Go through lines in the provided string | |
for line in "$1"; do | |
echo "$line" | |
# Instance name is the first word in the line. | |
instance_name=`echo "$line" | head -n1 | awk '{print $1}'` | |
instance_zone=`echo "$line" | head -n1 | awk '{print $2}'` | |
# Attempt to reboot the instance | |
echo "Rebooting '$instance_name' in zone '$instance_zone'..." | |
gcloud compute instances start "--zone=$instance_zone" "$instance_name" | |
done | |
} | |
auto_reviver () { | |
REVIVE_TAG="$1" | |
CHECK_INTERVAL="$2" | |
LOG_FILE="$3" | |
IFS=$'\n' | |
date +"%F %T: monitoring instances with revive tag '$REVIVE_TAG', interval $CHECK_INTERVAL" >> "$LOG_FILE" | |
while :; do | |
# Look for instances with "revive" in their name/tags and TERMINATED status | |
offline=`gcloud compute instances list --format='table(name,zone,status,tags.list())' | grep "$REVIVE_TAG" | grep "TERMINATED"` | |
if [[ ! -z "$offline" ]] ; then | |
# If we found some, reboot them | |
date +"%F %T: some instances are down." >> "$LOG_FILE" | |
revive_instances "$offline" | indent >> "$LOG_FILE" | |
fi | |
# Sleep for the check interval | |
sleep $CHECK_INTERVAL | |
done | |
} | |
# Make sure revive.log is readable by general users | |
printf '' >> "/tmp/revive.log" | |
chmod 644 "/tmp/revive.log" | |
# Run auto-reviver with tag "revive", check interval 2 minutes, logging | |
auto_reviver "revive" 120 "/tmp/revive.log" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I suggest taking that question to a Q&A forum like Stack Overflow.
I haven't done anything with cloud computing in over a year so I have no opinion.