Skip to content

Instantly share code, notes, and snippets.

@subuk
Created December 24, 2018 15:14
Show Gist options
  • Save subuk/0d184f1d82ce704ed51e092c70082092 to your computer and use it in GitHub Desktop.
Save subuk/0d184f1d82ce704ed51e092c70082092 to your computer and use it in GitHub Desktop.
ASG Automatic Deployment
func (e *ASGTarget) Deploy(ctx context.Context, artifact Artifact, output chan DeployLogEntry) error {
ctx, _ = context.WithTimeout(ctx, e.Timeout)
if artifact.Type != ARTIFACT_AMI {
return fmt.Errorf("unsupported artifact type '%s'", artifact.Type)
}
asg, err := e.fetchAsg()
if err != nil {
return err
}
oldLc, err := e.fetchLc(asg)
if err != nil {
return err
}
tgHealth, err := e.fetchTargetGroupsHealth(asg)
if err != nil {
return err
}
lbHealth, err := e.fetchELBHealth(asg)
if err != nil {
return err
}
if reason := e.isStable(asg, lbHealth, tgHealth); reason != nil {
return tool.NewError(reason, "autoscaling group is unstable")
}
select {
case <-ctx.Done():
return fmt.Errorf("deployment has been cancelled")
default:
}
rollback := tool.NewFuncStack()
if artifact.Url == *oldLc.ImageId {
output <- NewDeployLogEntry("current launch configuration contains required AMI, doing redeploy")
}
now := time.Now().UTC()
newLcName := fmt.Sprintf(
"%s-%d%d%d-%d%d%d",
*asg.AutoScalingGroupName, now.Year(), now.Month(), now.Day(), now.Hour(), now.Minute(), now.Second(),
)
output <- NewDeployLogEntry("creating new launch configuration '%s'", newLcName)
createLcInput := &aws_autoscaling.CreateLaunchConfigurationInput{
AssociatePublicIpAddress: oldLc.AssociatePublicIpAddress,
BlockDeviceMappings: oldLc.BlockDeviceMappings,
EbsOptimized: oldLc.EbsOptimized,
IamInstanceProfile: oldLc.IamInstanceProfile,
InstanceMonitoring: oldLc.InstanceMonitoring,
InstanceType: oldLc.InstanceType,
ImageId: oldLc.ImageId,
KeyName: oldLc.KeyName,
LaunchConfigurationName: aws.String(newLcName),
PlacementTenancy: oldLc.PlacementTenancy,
SecurityGroups: oldLc.SecurityGroups,
UserData: oldLc.UserData,
}
if oldLc.KernelId != nil && *oldLc.KernelId != "" {
createLcInput.KernelId = oldLc.KernelId
}
if oldLc.RamdiskId != nil && *oldLc.RamdiskId != "" {
createLcInput.RamdiskId = oldLc.RamdiskId
}
if oldLc.SpotPrice != nil && *oldLc.SpotPrice != "" {
createLcInput.SpotPrice = oldLc.SpotPrice
}
_, err = e.autoscaling.CreateLaunchConfiguration(createLcInput)
if err != nil {
return tool.NewError(err, "failed to create new launch configuration")
}
rollback.Push(func() {
output <- NewDeployLogEntry("deleting new launch configuration %s", newLcName)
_, err := e.autoscaling.DeleteLaunchConfiguration(&aws_autoscaling.DeleteLaunchConfigurationInput{
LaunchConfigurationName: aws.String(newLcName),
})
if err != nil {
output <- NewDeployLogEntry("failed to delete new launch configuration '%s': %s", newLcName, err)
}
})
output <- NewDeployLogEntry("changing asg launch configuration to '%s'", newLcName)
_, err = e.autoscaling.UpdateAutoScalingGroup(&aws_autoscaling.UpdateAutoScalingGroupInput{
AutoScalingGroupName: asg.AutoScalingGroupName,
LaunchConfigurationName: aws.String(newLcName),
})
if err != nil {
rollback.Call()
return tool.NewError(err, "failed to change asg launch configuration")
}
rollback.Push(func() {
output <- NewDeployLogEntry("changing asg launch configuration back to %s", *oldLc.LaunchConfigurationName)
_, err = e.autoscaling.UpdateAutoScalingGroup(&aws_autoscaling.UpdateAutoScalingGroupInput{
AutoScalingGroupName: asg.AutoScalingGroupName,
LaunchConfigurationName: oldLc.LaunchConfigurationName,
})
if err != nil {
output <- NewDeployLogEntry("failed to change launch configuration back to '%s': %s", *oldLc.LaunchConfigurationName, err)
}
})
output <- NewDeployLogEntry("refreshing asg info after changing launch configuration")
asg, err = e.fetchAsg()
if err != nil {
rollback.Call()
return err
}
select {
case <-ctx.Done():
rollback.Call()
return fmt.Errorf("deployment has been cancelled")
default:
}
instancesToReplace := e.getOutdatedLcInstances(asg)
if len(instancesToReplace) <= 0 {
output <- NewDeployLogEntry("all instances have required launch configuration")
return nil
}
asg, err = e.fetchAsg()
if err != nil {
rollback.Call()
return err
}
desiredCapacity := *asg.DesiredCapacity + int64(len(instancesToReplace))
maxSize := *asg.MaxSize
if desiredCapacity > maxSize {
rollback.Call()
return fmt.Errorf("required desired capacity more than max group size")
}
output <- NewDeployLogEntry("launching new instances by setting desired capacity from %d to %d", *asg.DesiredCapacity, desiredCapacity)
_, err = e.autoscaling.UpdateAutoScalingGroup(&aws_autoscaling.UpdateAutoScalingGroupInput{
AutoScalingGroupName: asg.AutoScalingGroupName,
DesiredCapacity: aws.Int64(desiredCapacity),
})
if err != nil {
rollback.Call()
return tool.NewError(err, "failed to set desired capacity")
}
rollback.Push(func() {
output <- NewDeployLogEntry("terminating instances with new launch configuration, please wait!")
freshAsg, err := e.fetchAsg()
if err != nil {
output <- NewDeployLogEntry("cannot refresh asg info", err)
return
}
instances := e.getCurrentLcInstances(freshAsg)
for _, instance := range instances {
if err := e.sendTerminationSignal(instance); err != nil {
output <- NewDeployLogEntry("failed to send termination signal to instance '%s': %s", *instance.InstanceId, err)
} else {
output <- NewDeployLogEntry("termination sent to instance %s", *instance.InstanceId)
}
}
})
output <- NewDeployLogEntry("starting uninterruptable loop, waiting for new instances to be spawned, please wait")
checkTicker0 := time.NewTicker(time.Second * 10)
ctxDoneHandled := false
Loop0:
for {
// Uninterruptable loop
select {
case <-ctx.Done():
if !ctxDoneHandled {
output <- NewDeployLogEntry("cancel requested, but cannot be done at the moment, please wait until asg desired capacity reached for smooth rollback")
ctxDoneHandled = true
}
case <-checkTicker0.C:
asg, err := e.fetchAsg()
if err != nil {
return err
}
if reason := e.checkDesiredCapacityReachedOnASG(asg); reason != nil {
output <- NewDeployLogEntry(tool.NewError(reason, "still waiting").Error())
} else {
checkTicker0.Stop()
output <- NewDeployLogEntry("all new instances have been spawned")
break Loop0
}
}
}
select {
case <-ctx.Done():
rollback.Call()
return fmt.Errorf("deployment has been cancelled")
default:
}
output <- NewDeployLogEntry("waiting for autoscaling group reaches stable state")
checkTicker1 := time.NewTicker(time.Second * 5)
Loop1:
for {
select {
case <-ctx.Done():
checkTicker1.Stop()
output <- NewDeployLogEntry("cancel request received")
rollback.Call()
return fmt.Errorf("deployment cancelled")
case <-checkTicker1.C:
asg, err := e.fetchAsg()
if err != nil {
output <- NewDeployLogEntry("WARNING failed to refresh asg, retrying: %s", err)
continue
}
tgHealth, err := e.fetchTargetGroupsHealth(asg)
if err != nil {
output <- NewDeployLogEntry("WARNING failed to refresh target groups health, retrying: %s", err)
continue
}
lbHealth, err := e.fetchELBHealth(asg)
if err != nil {
output <- NewDeployLogEntry("WARNING failed to refresh elb health, retrying: %s", err)
continue
}
if reason := e.isStable(asg, lbHealth, tgHealth); reason != nil {
output <- NewDeployLogEntry("still not stable: %s", reason)
} else {
checkTicker1.Stop()
output <- NewDeployLogEntry("stable state reached")
break Loop1
}
}
}
output <- NewDeployLogEntry("looks like new deployment works, tidying up on a best-efforts basis")
refreshedAsg, err := e.fetchAsg()
if err != nil {
output <- NewDeployLogEntry(tool.NewError(err, "WARNING refreshing asg failed, continue with outdated asg info").Error())
} else {
asg = refreshedAsg
}
outdatedInstances := e.getOutdatedLcInstances(asg)
for _, instance := range outdatedInstances {
if err := e.sendTerminationSignal(instance); err != nil {
output <- NewDeployLogEntry("termination failed for instance %s: %s", *instance.InstanceId, err)
} else {
output <- NewDeployLogEntry("termination sent to instance %s", *instance.InstanceId)
}
}
_, err = e.autoscaling.DeleteLaunchConfiguration(&aws_autoscaling.DeleteLaunchConfigurationInput{
LaunchConfigurationName: oldLc.LaunchConfigurationName,
})
if err != nil {
return tool.NewError(err, "failed to delete old launch configuration '%s", *oldLc.LaunchConfigurationName)
}
output <- NewDeployLogEntry("old launch configuration '%s' deleted", *oldLc.LaunchConfigurationName)
output <- NewDeployLogEntry("waiting for outdated instances (%d) to be terminated", len(outdatedInstances))
checkTicker2 := time.NewTicker(time.Second * 5)
for {
select {
case <-ctx.Done():
checkTicker2.Stop()
output <- NewDeployLogEntry("cancel request received, so we won't wait for old instances to be terminated, just exit with success")
return nil
case <-checkTicker2.C:
asg, err := e.fetchAsg()
if err != nil {
output <- NewDeployLogEntry("WARNING failed to refresh asg, retrying: %s", err)
continue
}
if reason := e.checkDesiredCapacityReachedOnASG(asg); reason != nil {
output <- NewDeployLogEntry(tool.NewError(reason, "still waiting").Error())
} else {
checkTicker2.Stop()
output <- NewDeployLogEntry("stable state reached")
return nil
}
}
}
return nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment