Skip to content

Instantly share code, notes, and snippets.

@awilczek
Created May 22, 2017 18:29
Show Gist options
  • Save awilczek/2a638675bb93195b4d7ba950f3a86eb1 to your computer and use it in GitHub Desktop.
Save awilczek/2a638675bb93195b4d7ba950f3a86eb1 to your computer and use it in GitHub Desktop.
"use strict";
// Registering cleanup callback before requiring flightplan.
process.on("SIGINT", interruptedCleanup);
const util = require("util"),
moment = require("moment"),
_ = require("lodash"),
plan = require("flightplan"),
request = require("request-promise");
/*
Usages:
fly [deploy:]dev (deploys current branch on DEV)
fly [deploy:]dev --branch=testing (deploys testing branch, this optional param can be used for all targets)
fly [deploy:]dev --branch=23af9e8 (deploys 23af9e8 commit, can be used for all targets)
fly [deploy:]canary --msg="Summary" (deploys master branch on Canary, msg is required param for canary and production targets)
fly [deploy:]production10 --msg="Summary" (deploys master branch on 10% of production servers)
fly [deploy:]production25 --msg="Summary" (deploys master branch on 25% of production servers)
fly [deploy:]production --msg="Summary" (deploys master branch on all of production servers)
fly [deploy:]production --msg="Summary" --force="172.11.22.333"
fly [deploy:]production --msg="Summary" --force="172.11.22.333,172.22.33.444"
(force param allows to skip waiting for some instances' healthchecks and does the forced redeploys there)
fly [deploy:]canary --msg="Msg" --silent (silent mode turns off Slack notifications and events)
fly rollback:canary (rollbacks old build on Canary)
fly rollback:production (rollbacks old build on all of production servers)
fly unify:production (unifies build version for all of production servers, helpful to "rollback" partial deploys like 10%)
*/
// --- Configuration ---
let BRANCH = "master";
const DEV_OPTIONS = {
versionKeyPostfix: "_dev"
},
CANARY_OPTIONS = {
event: "Canary",
versionKeyPostfix: "_canary",
oldVersionKeyPostfix: "_canary_old",
lockTarget: "canary",
lbTakeout: true
},
PROD_10_OPTIONS = {
event: "10% of servers",
lockTarget: "production",
lbTakeout: true,
bringBackPrevVersion: true
},
PROD_25_OPTIONS = {
event: "25% of servers",
lockTarget: "production",
lbTakeout: true,
bringBackPrevVersion: true
},
PROD_OPTIONS = {
event: "All servers",
oldVersionKeyPostfix: "_old",
lockTarget: "production",
lbTakeout: true,
waitForAllHealthy: true,
canUnify: true
};
const PARALLEL_DEPLOYS_FRACTION = 0.2;
const CONSUL_URL = "http://url.to.consul:8500/v1";
const BACKEND_GIT_URL = "[email protected]:ORG/product.git";
// ---------------------
let newRev, prevRev,
localMachine,
forced = [],
maxParallelDeploys = 1,
deployedInstancesCount = 0,
parallelDeploysCount = 0;
const removedFromELB = [];
// Target plan for DEV server.
plan.target("dev", done => {
BRANCH = "HEAD"; // Using current branch as default.
getServersList()
.catch(err => done(new Error(util.format("Getting servers list failed - Message: %s, Error: %j", err, err))))
.then(servers => {
if (_.isEmpty(servers)) {
return;
}
const address = (_.find(servers, isDev) || {}).Address;
if (!address) {
done(new Error("DEV not found"));
return;
}
console.log("DEV IP: %s", address);
done([toHost(address)]);
});
}, DEV_OPTIONS);
// Target plan for Canary server.
plan.target("canary", done => {
getServersList()
.catch(err => done(new Error(util.format("Getting servers list failed - Message: %s, Error: %j", err, err))))
.then(servers => {
if (_.isEmpty(servers)) {
return;
}
const address = (_.find(servers, isCanary) || {}).Address;
if (!address) {
done(new Error("Canary not found"));
return;
}
console.log("Canary IP: %s", address);
done([toHost(address)]);
});
}, CANARY_OPTIONS);
// Target plan for 10% of production servers.
plan.target("production10", done => {
getServersList()
.catch(err => done(new Error(util.format("Getting servers list failed - Message: %s, Error: %j", err, err))))
.then(servers => {
if (_.isEmpty(servers)) {
return;
}
const addresses = _(servers)
.reject(isNonProduction)
.map("Address")
.sortBy()
.value();
const hosts = _(addresses)
.take(Math.floor(addresses.length / 10))
.map(toHost)
.value();
if (_.isEmpty(hosts)) {
done(new Error("No productions servers found"));
return;
}
maxParallelDeploys = hosts.length;
console.log("Backend servers: %j", _.map(hosts, "host"));
console.log("Parallel deploys: %d", maxParallelDeploys);
done(hosts);
});
}, PROD_10_OPTIONS);
// Target plan for 25% of production servers.
plan.target("production25", done => {
getServersList()
.catch(err => done(new Error(util.format("Getting servers list failed - Message: %s, Error: %j", err, err))))
.then(servers => {
if (_.isEmpty(servers)) {
return;
}
const addresses = _(servers)
.reject(isNonProduction)
.map("Address")
.sortBy()
.value();
const hosts = _(addresses)
.take(Math.floor(addresses.length / 4))
.map(toHost)
.value();
if (_.isEmpty(hosts)) {
done(new Error("No productions servers found"));
return;
}
maxParallelDeploys = hosts.length;
console.log("Backend servers: %j", _.map(hosts, "host"));
console.log("Parallel deploys: %d", maxParallelDeploys);
done(hosts);
});
}, PROD_25_OPTIONS);
// Target plan for production servers.
plan.target("production", done => {
getServersList()
.catch(err => done(new Error(util.format("Getting servers list failed - Message: %s, Error: %j", err, err))))
.then(servers => {
if (_.isEmpty(servers)) {
return;
}
const addresses = _(servers)
.reject(isNonProduction)
.map("Address")
.sortBy()
.value();
if (_.isEmpty(addresses)) {
done(new Error("No productions servers found"));
return;
}
const hosts = _.map(addresses, toHost);
maxParallelDeploys = Math.floor(addresses.length * PARALLEL_DEPLOYS_FRACTION) || 1;
console.log("Backend servers: %j", addresses);
console.log("Parallel deploys: %d", maxParallelDeploys);
done(hosts);
});
}, PROD_OPTIONS);
function isNonProduction(server) {
return isDev(server) || isCanary(server);
}
function isDev(server) {
return _.includes(server.ServiceTags, "dev");
}
function isCanary(server) {
return _.includes(server.ServiceTags, "canary");
}
function toHost(address) {
return {
host: address,
username: "user",
agent: process.env.SSH_AUTH_SOCK
};
}
plan.local(["default", "deploy"], local => {
localMachine = local;
const options = plan.runtime.options;
if (options.force) {
forced = options.force.trim().split(",");
}
if (options.branch) {
BRANCH = options.branch;
}
if (BRANCH === "HEAD") {
newRev = local.exec("git rev-parse HEAD").stdout.trim();
} else {
const remoteRev = local.exec("git ls-remote " + BACKEND_GIT_URL + " " + BRANCH).stdout;
newRev = (remoteRev || local.exec("git rev-parse " + BRANCH).stdout).split("\t")[0].trim();
}
if (options.event && typeof options.msg !== "string") {
plan.abort("Please provide deploy summary. E.g. 'fly deploy:production --msg=\"Deploy summary\"'");
return;
}
if (!buildReady()) {
plan.abort("Build is not ready");
}
});
plan.local("rollback", local => {
localMachine = local;
const options = plan.runtime.options;
if (!options.oldVersionKeyPostfix) {
plan.abort("Target is not reversible");
}
});
plan.local("unify", local => {
localMachine = local;
const options = plan.runtime.options;
if (!options.canUnify) {
plan.abort("Target can not be unified");
}
});
// HACK: Connecting to each server before locking, posting to Slack, etc.
plan.remote(["default", "deploy", "rollback", "unify"], remote => remote.log("Connected"));
plan.local(["default", "deploy"], local => {
const options = plan.runtime.options;
local.log("Setting Consul lock and new app version");
const err = local.waitFor(done =>
Promise.resolve()
.then(() => {
if (!options.waitForAllHealthy) {
return;
}
return new Promise(checkServersHealth)
.then(healthy => {
if (!healthy) {
return Promise.reject("Servers not healthy");
}
});
})
.then(() => {
if (!options.lockTarget) {
return;
}
return getLock()
.catch(err => Promise.reject(util.format("Getting lock state failed - Message: %s, Error: %j", err, err)))
.then(result => {
console.log("Lock state: %j", result);
if (result !== "free") {
return Promise.reject("Deploy is locked in Consul");
}
return lock(options.lockTarget)
.catch(err => Promise.reject(util.format("Locking failed - Message: %s, Error: %j", err, err)));
})
.then(() => console.log("Locking succeeded"));
})
.then(() => {
if (!options.bringBackPrevVersion && !options.oldVersionKeyPostfix) {
return;
}
return getCurrentVersion()
.then(currentVersion => {
console.log("Current version: %s", currentVersion);
prevRev = currentVersion;
})
.catch(err => Promise.reject(util.format("Getting current version failed - Message: %s, Error: %j", err, err)));
})
.then(() => {
// Don't update old version if new revision is same as prev - it means last deploy failed.
if (!options.oldVersionKeyPostfix || newRev === prevRev) {
return;
}
return setNewVersion(options.oldVersionKeyPostfix, prevRev)
.then(result => console.log("Old version set - Result: %s", result))
.catch(err => Promise.reject(util.format("Setting old version failed - Message: %s, Error: %j", err, err)));
})
.then(() => {
// Posting to Slack and creating Event.
if (options.event && !options.silent) {
postToSlack(options.event, options.msg);
createEvent(options.event, options.msg);
}
return setNewVersion(options.versionKeyPostfix)
.catch(err => Promise.reject(util.format("Setting new version failed - Message: %s, Error: %j", err, err)));
})
.then(result => console.log("New version set - Result: %s", result))
.then(done)
.catch(done)
);
if (err) {
plan.abort(err);
}
});
plan.local("rollback", local => {
const options = plan.runtime.options;
local.log("Setting Consul lock and app version to old one");
const err = local.waitFor(done =>
Promise.resolve()
.then(() => {
if (!options.lockTarget) {
return;
}
return getLock()
.catch(err => Promise.reject(util.format("Getting lock state failed - Message: %s, Error: %j", err, err)))
.then(result => {
console.log("Lock state: %j", result);
if (result !== "free") {
return Promise.reject("Deploy is locked in Consul");
}
return lock(options.lockTarget)
.catch(err => Promise.reject(util.format("Locking failed - Message: %s, Error: %j", err, err)));
})
.then(() => console.log("Locking succeeded"));
})
.then(() =>
getCurrentVersion(options.oldVersionKeyPostfix)
.then(oldVersion => {
console.log("Old version: %s", oldVersion);
newRev = oldVersion;
})
.catch(err => Promise.reject(util.format("Getting old version failed - Message: %s, Error: %j", err, err)))
)
.then(() => {
// Posting to Slack and creating Event.
if (options.event && !options.silent) {
postToSlack(options.event, "Rollback");
createEvent(options.event, "Rollback");
}
return setNewVersion(options.versionKeyPostfix)
.catch(err => Promise.reject(util.format("Setting new version failed - Message: %s, Error: %j", err, err)));
})
.then(result => console.log("New version set - Result: %s", result))
.then(done)
.catch(done)
);
if (err) {
plan.abort(err);
}
});
plan.local("unify", local => {
const options = plan.runtime.options;
local.log("Setting Consul lock");
const err = local.waitFor(done =>
Promise.resolve()
.then(() => {
if (!options.lockTarget) {
return;
}
return getLock()
.catch(err => Promise.reject(util.format("Getting lock state failed - Message: %s, Error: %j", err, err)))
.then(result => {
console.log("Lock state: %j", result);
if (result !== "free") {
return Promise.reject("Deploy is locked in Consul");
}
return lock(options.lockTarget)
.catch(err => Promise.reject(util.format("Locking failed - Message: %s, Error: %j", err, err)));
})
.then(() => console.log("Locking succeeded"));
})
.then(() =>
getCurrentVersion()
.then(version => {
console.log("Current version: %s", version);
newRev = version;
})
.catch(err => Promise.reject(util.format("Getting current version failed - Message: %s, Error: %j", err, err)))
)
.then(() => {
// Posting to Slack and creating Event.
if (options.event && !options.silent) {
postToSlack(options.event, "Unifying build version");
createEvent(options.event, "Unifying build version");
}
})
.then(done)
.catch(done)
);
if (err) {
plan.abort(err);
}
});
plan.remote(["default", "deploy", "rollback", "unify"], remote => {
const options = plan.runtime.options;
const instanceId = remote.exec("wget -q -O - http://instance-data/latest/meta-data/instance-id").stdout;
const instanceHost = remote.hostname().stdout.trim();
const instanceData = { id: instanceId, host: instanceHost };
if (options.waitForAllHealthy) {
// Wait until all servers are back in Consul (healthcheck pass).
waitUntilServersHealthy(remote);
}
parallelDeploysCount++;
remote.log("Deploying to instance: " + instanceId);
if (options.lbTakeout) {
removedFromELB.push(instanceData);
logProgress();
removeInstanceFromELB(instanceId);
// Wait until connections are finished.
remote.exec("sleep 20");
}
remote.log("Restarting product_s service on instance: " + instanceId);
remote.exec("sudo systemctl stop product_s && sudo systemctl start product_s");
if (options.lbTakeout) {
addInstanceToELB(instanceId);
_.pull(removedFromELB, instanceData);
}
parallelDeploysCount--;
deployedInstancesCount++;
remote.log("Deployed to instance: " + instanceId);
logProgress();
});
plan.local(["default", "deploy", "rollback", "unify"], local => {
const options = plan.runtime.options;
const err = local.waitFor(done =>
Promise.resolve()
.then(() => {
if (!options.bringBackPrevVersion) {
return;
}
console.log("Setting previous version in Consul: %s", prevRev);
newRev = prevRev;
return setNewVersion(options.versionKeyPostfix)
.then(result => console.log("Previous version set - Result: %s", result))
.catch(err => Promise.reject(util.format("Setting old version failed - Message: %s, Error: %j", err, err)));
})
.then(() => {
if (!options.lockTarget) {
return;
}
return unlock()
.then(result => console.log("Unlock succeeded - Result: %j", result))
.catch(err => Promise.reject(util.format("Unlocking failed - Message: %s, Error: %j", err, err)));
})
.then(done)
.catch(done)
);
if (err) {
console.log(err);
}
});
function getServersList() {
return request({
uri: CONSUL_URL + "/catalog/service/product",
json: true
});
}
function getLock() {
return request({
uri: CONSUL_URL + "/kv/deploy/product_deploy_lock",
json: true
})
.then(results => new Buffer(results[0].Value, "base64").toString("utf8"));
}
function lock(deployTarget) {
return request({
uri: CONSUL_URL + "/kv/deploy/product_deploy_lock",
method: "PUT",
body: util.format("Deploy to %s at %s", deployTarget, moment().toISOString())
})
.then(result => {
if (result !== "true") {
return Promise.reject(new Error(util.format("Locking failed - Result: %j", result)));
}
return result;
});
}
function unlock() {
return request({
uri: CONSUL_URL + "/kv/deploy/product_deploy_lock",
method: "PUT",
body: "free"
});
}
function getCurrentVersion(versionKeyPostfix) {
return request({
uri: CONSUL_URL + "/kv/product/version" + (versionKeyPostfix || ""),
method: "GET",
json: true
})
.then(results => new Buffer(results[0].Value, "base64").toString("utf8"))
.then(result => {
if (!result) {
return Promise.reject(new Error("Missing current version"));
}
return result;
});
}
function setNewVersion(versionKeyPostfix, rev) {
return request({
uri: CONSUL_URL + "/kv/product/version" + (versionKeyPostfix || ""),
method: "PUT",
body: rev || newRev
})
.then(result => {
if (result !== "true") {
return Promise.reject(result);
}
return result;
});
}
function postToSlack(event, msg) {
const url = "https://github.com/ORG/product/commit/" + newRev,
options = {
uri: "https://hooks.slack.com/services/KEY_A_1/KEY_A_2/KEY_A_3",
method: "POST",
json: true,
body: {
text: event + ": " + msg + "\n<" + url + ">"
}
};
request(options)
.then(result => console.log("Posted to Slack - Result: %j", result))
.catch(err => console.log("Error posting to Slack - Message: %s, Error: %j", err, err));
}
function createEvent(event, msg) {
const url = "https://github.com/ORG/product/commit/" + newRev;
const time = moment().utc().format("DD.MM.YYYY HH:mm");
request({
uri: "http://url.to.event.storage",
method: "PUT",
json: true,
body: {
summary: "DEPLOYMENT - " + event + ": " + msg,
description: url,
time: time
}
})
.then(result => console.log("Event Created - Result: %j", result))
.catch(err => console.log("Error creating Event - Message: %s, Error: %j", err, err));
}
let w = 0;
function waitUntilServersHealthy(remote) {
while (true) {
if (parallelDeploysCount < maxParallelDeploys) {
const serversHealthy = remote.waitFor(checkServersHealth);
if (serversHealthy && parallelDeploysCount < maxParallelDeploys) {
w = 0;
break;
}
}
if (++w % 20 === 0) {
logProgress();
}
remote.exec("sleep 10");
}
}
function checkServersHealth(done) {
getServersHealthStatuses()
.then(servers => {
done(_.every(servers, server => {
const passing = _.every(server.Checks, check => check.Status === "passing") || _.includes(forced, server.Node.Address);
if (!passing) {
console.log("Not healthy: " + server.Node.Address);
}
return passing;
}));
})
.catch(err => {
console.log("Waiting for all Backend servers in Consul failed - Message: %s, Error: %j", err, err);
done(false);
});
}
function getServersHealthStatuses() {
return request({
uri: CONSUL_URL + "/health/service/product",
json: true
});
}
function logProgress() {
console.log("Progress: %d/%d, Removed from ELB: %j, Parallel deploys: %d/%d",
deployedInstancesCount, plan.runtime.hosts.length, removedFromELB, parallelDeploysCount, maxParallelDeploys);
}
function buildReady() {
return localMachine.exec("aws s3 ls s3://url.to.docker.registry/docker/registry/v2/repositories/product/_manifests/tags/" + newRev, { failsafe: true }).code === 0;
}
function removeInstanceFromELB(instanceId) {
localMachine.exec("aws elb deregister-instances-from-load-balancer --load-balancer-name ProductELB --instances " + instanceId, { silent: true });
}
function addInstanceToELB(instanceId) {
localMachine.exec("aws elb register-instances-with-load-balancer --load-balancer-name ProductELB --instances " + instanceId, { silent: true });
}
function interruptedCleanup() {
if (removedFromELB.length) {
console.log("Register instances back:");
console.log("aws elb register-instances-with-load-balancer --load-balancer-name ProductELB --instances " +
_.map(removedFromELB, "id").join(" "));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment