Last active
February 25, 2016 10:08
-
-
Save jontg/9745186 to your computer and use it in GitHub Desktop.
Zero Downtime
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Source files so we can announce stuff | |
. /mnt/apps/scripts/setenv.sh | |
. /mnt/apps/scripts/hipchat_notify.sh | |
if [ $(pidof -x deploy_if_new.sh | wc -w) -gt 2 ]; then | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") Cowardly exiting as deploy_if_new is already running" | |
exit -1 | |
fi | |
BASE_URI=http://teamcity.relateiq.com | |
REST_URI=$BASE_URI/app/rest/builds | |
BUILD_LOCATOR="buildType:relateiq_master,running:false,status:SUCCESS" | |
BRANCH="${BRANCH:-master}" | |
IMAGE_NAME="webapp" | |
WEBAPP_PORT=80 | |
if [[ "$BRANCH" != "master"* ]]; then | |
BUILD_LOCATOR="buildType:relateiq_combined,running:false,status:SUCCESS,pinned:true,branch:$BRANCH" | |
else | |
BUILD_LOCATOR="buildType:relateiq_master,running:false,status:SUCCESS,pinned:true" | |
fi | |
set -e | |
SCRIPT_HOME="$( cd "$( dirname "$0" )" && pwd )" | |
# check for hipache | |
STATE=$(docker inspect hipache | jq ".[0].State.Running") | |
if [[ "$STATE" != "true" ]]; then | |
set +e | |
docker rm hipache > /dev/null 2&>1 | |
set -e | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") hipache not running, starting it" | |
mkdir -p /mnt/apps/hipache/logs | |
docker run -p 80:80 -p 6379:6379 -name hipache -v /mnt/apps/hipache/logs:/logs -d quay.io/relateiq/hipache | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") lpush frontend:* default" | |
sleep 5 | |
(echo -en "lpush frontend:* default\r\n"; sleep 1) | nc localhost 6379 | |
fi | |
# get the currently running VERSION | |
VERSION_FILE="$SCRIPT_HOME/VERSION" | |
LOG_DIR="${SCRIPT_HOME}/logs/$(date +"%Y-%m-%d-%H-%M-%S-%N")" | |
# get the BUILD_ID and ARTIFACT_URI of the webapp to deploy | |
BUILD_ID=$(curl -f -s -u $username:$password -H'Accept: application/json' "$REST_URI?locator=$BUILD_LOCATOR,count:1" | jq -r '.build | map(.id) | @sh') | |
ARTIFACT_URI="$REST_URI/id:$BUILD_ID/artifacts/files" | |
REMOTE_VERSION=$(curl -f -s -u $username:$password $ARTIFACT_URI/VERSION) | |
REPO=${IMAGE_NAME}:$REMOTE_VERSION | |
# grab old ids, hacky way is to look for port 8080 | |
OLD_WEBAPP_IDS=$(docker ps |grep 8080/tcp |cut -d" " -f 1) | |
if [[ -f $VERSION_FILE ]]; then | |
LOCAL_VERSION=$(cat $VERSION_FILE 2> /dev/null) | |
fi | |
# nothing to do if there is a webapp running and we have the latest version | |
if [[ "$REMOTE_VERSION" == "$LOCAL_VERSION" && "$OLD_WEBAPP_IDS" != "" ]]; then | |
exit 0 | |
fi | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") remote=$REMOTE_VERSION != local=$LOCAL_VERSION" | |
# pull new version, TODO: retry a few times if it doesn't work | |
IMAGE_ID=$(docker images | grep ${IMAGE_NAME} | grep $REMOTE_VERSION | head -n 1 | awk '{print $3}') | |
if [ -z $IMAGE_ID ]; then | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") pulling latest from $ARTIFACT_URI/${IMAGE_NAME}.tgz" | |
curl -s -u $username:$password $ARTIFACT_URI/${IMAGE_NAME}.tgz | gzip -d -c | docker load | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") pulled latest from $ARTIFACT_URI/${IMAGE_NAME}.tgz" | |
IMAGE_ID=$(docker images | grep none | head -n 1 | awk '{print $3}') | |
docker tag $IMAGE_ID $REPO | |
fi | |
echo $REMOTE_VERSION > $VERSION_FILE | |
#launch a new one | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") launching $REPO, logging to $LOG_DIR" | |
mkdir -p $LOG_DIR | |
NEW_WEBAPP_ID="abcdefghijklmnopqrstuvwxyz" | |
MAX_TIMEOUT=5 | |
set +e | |
until [ $MAX_TIMEOUT -le 0 ] || NEW_WEBAPP_ID=$(docker run -P -h $(hostname) -link hipache:hipache -d -v /mnt/apps/riq:/mnt/apps/riq -v $LOG_DIR:/logs $REPO); do | |
echo -n "." | |
sleep 1 | |
let MAX_TIMEOUT-=1 | |
done | |
set -e | |
echo | |
if [[ "$NEW_WEBAPP_ID" == "" ]]; then | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") failed to start new webapp" | |
send_deploy_message $HOSTNAME $BRANCH $IMAGE_NAME "error" | |
exit 1 | |
fi | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") launched $NEW_WEBAPP_ID" | |
# Zero Downtime Zero Rollback | |
docker tag $IMAGE_ID $IMAGE_NAME:$REMOTE_VERSION | |
NEW_WEBAPP_IP_ADDR=$(docker inspect $NEW_WEBAPP_ID | jq '.[0].NetworkSettings.IPAddress' -r) | |
if [ -z "$NEW_WEBAPP_IP_ADDR" -o $NEW_WEBAPP_IP_ADDR = "null" ]; then | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") no new webapp ip, failed to start" | |
send_deploy_message $HOSTNAME $BRANCH $IMAGE_NAME "error" | |
exit 1 | |
fi | |
echo -n "$(date +"%Y-%m-%d %H:%M:%S %Z") new instance $NEW_WEBAPP_ID starting, on ip $NEW_WEBAPP_IP_ADDR" | |
# 5 minutes | |
MAX_TIMEOUT=300 | |
HEALTH_RC=1 | |
set +e | |
until [ $HEALTH_RC == 0 ]; do | |
if [ $MAX_TIMEOUT -le 0 ]; then | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") failed to be healthy within 5 minutes, exiting..." | |
send_deploy_message $HOSTNAME $BRANCH $IMAGE_NAME "error" | |
exit 1 | |
fi | |
${SCRIPT_HOME}/health.sh $NEW_WEBAPP_IP_ADDR | |
HEALTH_RC=$? | |
echo -n "." | |
sleep 5 | |
let MAX_TIMEOUT-=5 | |
done | |
set -e | |
echo | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") new instance $NEW_WEBAPP_ID started successfully." | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") registering http://${NEW_WEBAPP_IP_ADDR}:${WEBAPP_PORT} with hipache" | |
# add myself as a backend | |
(echo -en "rpush frontend:* http://${NEW_WEBAPP_IP_ADDR}:${WEBAPP_PORT}\r\n"; sleep 1) | nc localhost 6379 | |
# ensure i am first backend | |
(echo -en "lset frontend:* 1 http://${NEW_WEBAPP_IP_ADDR}:${WEBAPP_PORT}\r\n"; sleep 1) | nc localhost 6379 | |
# remove all but 1 backend | |
(echo -en "ltrim frontend:* 0 1\r\n"; sleep 1) | nc localhost 6379 | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") registered http://${NEW_WEBAPP_IP_ADDR}:${WEBAPP_PORT} with hipache" | |
if [[ $OLD_WEBAPP_IDS == "" ]]; then | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") no old webapp to kill" | |
send_deploy_message $HOSTNAME $BRANCH $IMAGE_NAME "success" | |
exit 0 | |
fi | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") found old webapp containers $OLD_WEBAPP_IDS, gonna wait for new one to come up and then kill them" | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") stopping $OLD_WEBAPP_IDS" | |
echo $OLD_WEBAPP_IDS | xargs docker stop | |
echo "$(date +"%Y-%m-%d %H:%M:%S %Z") rm $OLD_WEBAPP_IDS" | |
echo $OLD_WEBAPP_IDS | xargs docker rm | |
set +e | |
# sort gives oldest date tags first | |
# head -n -5 gives all but the last five (i.e. the most recent 5 images) | |
# awk script gives the image associated with the tag | |
# unique avoids the interesting edge case of multiple tags on a single image | |
for image in $(docker images | grep ${IMAGE_NAME} | sort | head -n -5 | awk '{print $3}' | sort --unique); do | |
echo Removing old tagged image $image | |
docker rmi $image | |
done | |
send_deploy_message $HOSTNAME $BRANCH $IMAGE_NAME "success" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment