-
-
Save fogoat/032f71d86c9b219d71ef7627a0fefc41 to your computer and use it in GitHub Desktop.
WIP: A Google Cloud startup-script to automatically revive preemptible compute instances.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# GCloud startup script to auto-restart any instances with 'revive' tag. | |
# The calling machine must have Read/Write access to compute API!! | |
# I use this to reboot preemptible instances. | |
# Output is logged to /tmp/revive.log | |
indent() { sed 's/^/ /'; } | |
revive_instances() { | |
# Go through lines in the provided string | |
for line in "$1"; do | |
echo "$line" | |
# Instance name is the first word in the line. | |
instance_name=`echo "$line" | head -n1 | awk '{print $1}'` | |
instance_zone=`echo "$line" | head -n1 | awk '{print $2}'` | |
# Attempt to reboot the instance | |
echo "Rebooting '$instance_name' in zone '$instance_zone'..." | |
gcloud compute instances start "--zone=$instance_zone" "$instance_name" | |
done | |
} | |
auto_reviver () { | |
REVIVE_TAG="$1" | |
CHECK_INTERVAL="$2" | |
LOG_FILE="$3" | |
IFS=$'\n' | |
date +"%F %T: monitoring instances with revive tag '$REVIVE_TAG', interval $CHECK_INTERVAL" >> "$LOG_FILE" | |
while :; do | |
# Look for instances with "revive" in their name/tags and TERMINATED status | |
offline=`gcloud compute instances list --format='table(name,zone,status,tags.list())' | grep "$REVIVE_TAG" | grep "TERMINATED"` | |
if [[ ! -z "$offline" ]] ; then | |
# If we found some, reboot them | |
date +"%F %T: some instances are down." >> "$LOG_FILE" | |
revive_instances "$offline" | indent >> "$LOG_FILE" | |
fi | |
# Sleep for the check interval | |
sleep $CHECK_INTERVAL | |
done | |
} | |
# Make sure revive.log is readable by general users | |
printf '' >> "/tmp/revive.log" | |
chmod 644 "/tmp/revive.log" | |
# Run auto-reviver with tag "revive", check interval 2 minutes, logging | |
auto_reviver "revive" 120 "/tmp/revive.log" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment