Last active
January 24, 2018 07:29
-
-
Save gnilchee/cc852bd8519b4c4e656201ffa171f00a to your computer and use it in GitHub Desktop.
Rotate out instances by bring up new instances making sure they are healthy and removing the old ones
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import boto3 | |
from time import sleep | |
from concurrent.futures import ProcessPoolExecutor, wait, as_completed | |
def get_asg_instances(asg_name): | |
''' | |
Responds with list of instance dictionaries that provides the following | |
:InstanceId - instance id | |
:HealthStatus - Check Status for ec2 | |
:AvailabilityZone - what AZ this is launched into | |
''' | |
try: | |
asg_client = boto3.client('autoscaling') | |
response = asg_client.describe_auto_scaling_groups( | |
AutoScalingGroupNames=[ | |
asg_name, | |
], | |
) | |
return response['AutoScalingGroups'][0] | |
except Exception as err: | |
raise SystemExit("ASG {asg} - get_asg_instances failed due to: {err}" | |
.format(asg=asg_name, err=err)) | |
def get_instance_health(elb_name): | |
''' | |
Responds with dictionary containing the following | |
{InstanceId: State,...} | |
''' | |
try: | |
elb_client = boto3.client('elb') | |
response = elb_client.describe_instance_health( | |
LoadBalancerName=elb_name | |
) | |
response_dict = {} | |
for state in response['InstanceStates']: | |
response_dict[state['InstanceId']] = state['State'] | |
return response_dict | |
except Exception as err: | |
raise SystemExit("ELB {elb} - get_instance_health failed due to: {err}".format(elb=elb_name, err=err)) | |
def scale_asg(asg_name, asg_min=2, asg_desired=2, asg_max=8): | |
''' | |
Sets desired and minimum capacity during scale out or scale in | |
Returns status code of the API call to confirm success | |
''' | |
try: | |
asg_client = boto3.client('autoscaling') | |
response = asg_client.update_auto_scaling_group( | |
AutoScalingGroupName=asg_name, | |
MinSize=asg_min, | |
DesiredCapacity=asg_desired, | |
MaxSize=asg_max, | |
) | |
return response['ResponseMetadata']['HTTPStatusCode'] | |
except Exception as err: | |
raise SystemExit("ASG {asg} - scale_asg failed due to: {err}" | |
.format(asg=asg_name, err=err)) | |
def wait_for_elb_health(elb_name, timeout=10): | |
''' | |
Waits for health of the instance from get_instance_health function up | |
unit the timeout set. | |
''' | |
try: | |
retries=timeout*4 | |
print("ELB {elb}: Waiting up to {tmout}min for Instances to become healthy" | |
.format(elb=elb_name, tmout=timeout)) | |
while retries > 0: | |
health = get_instance_health(elb_name) | |
if 'OutOfService' in health.values(): | |
print("ELB {}: Sleeping 15 seconds".format(elb_name)) | |
sleep(15) | |
retries -= 1 | |
else: | |
print("ELB {} - Instances are healthy".format(elb_name)) | |
break | |
except Exception as err: | |
raise SystemExit("ELB {elb} - wait_for_elb_health failed due to: {err}" | |
.format(elb=elb_name, err=err)) | |
def wait_for_asg_health(asg_name, expected_instances, timeout=5): | |
''' | |
Waits for health of ASG once scale_asg is run until all instances are | |
InService or returns helper text as to which condition its waiting on | |
''' | |
try: | |
retries=timeout*4 | |
transition_states = ['Pending', 'Pending', 'Terminating'] | |
print("ASG {asg}: Waiting up to {tmout}min for Instances to scale." | |
.format(asg=asg_name, tmout=timeout)) | |
while retries > 0: | |
health_dict = {} | |
health = get_asg_instances(asg_name) | |
for server in health['Instances']: | |
health_dict[server['InstanceId']] = server['LifecycleState'] | |
if len(health['Instances']) != expected_instances: | |
print("ASG {} NotExpectedLength: Sleeping 15 seconds".format(asg_name)) | |
sleep(15) | |
retries -= 1 | |
elif 'Pending' in health_dict.values(): | |
print("ASG {} NotInService: Sleeping 15 seconds".format(asg_name)) | |
sleep(15) | |
retries -= 1 | |
else: | |
print("ASG {} scaled successfully".format(asg_name)) | |
break | |
except Exception as err: | |
raise SystemExit("ASG {asg}: wait_for_asg_health failed due to: {err}".format(asg=asg_name, err=err)) | |
def rotate_asg_now(asg_name, elb_name): | |
''' | |
Basically the :main function but named for action its completing. | |
''' | |
# Scale out | |
response = get_asg_instances(asg_name) | |
orig_min = response['MinSize'] | |
orig_desired = response['DesiredCapacity'] | |
orig_max = response['MaxSize'] | |
#orig_num = len(response) | |
target_val = orig_desired*2 | |
if target_val > orig_max: | |
target_max = target_val | |
else: | |
target_max = orig_max | |
orig_instances = [] | |
for instance in response['Instances']: | |
orig_instances.append(instance['InstanceId']) | |
grow_asg_resp = scale_asg(asg_name, asg_min=target_val, asg_desired=target_val, asg_max=target_max) | |
if grow_asg_resp != 200: | |
raise SystemExit("Unexpected Response Code growing ASG") | |
wait_for_asg_health(asg_name, expected_instances=target_val, timeout=5) | |
wait_for_elb_health(elb_name, timeout=10) | |
# Scale in | |
response = get_asg_instances(asg_name) | |
instances_to_protect = [] | |
for instances in response['Instances']: | |
if instances['InstanceId'] not in orig_instances: | |
instances_to_protect.append(instances['InstanceId']) | |
try: | |
asg_client = boto3.client('autoscaling') | |
protect_response = asg_client.set_instance_protection( | |
InstanceIds=instances_to_protect, | |
AutoScalingGroupName=asg_name, | |
ProtectedFromScaleIn=True | |
) | |
except Exception as err: | |
raise SystemExit("Issue setting instance protection due to: {}".format(err)) | |
shrink_asg_resp = scale_asg(asg_name, asg_min=orig_min, asg_desired=orig_desired, asg_max=orig_max) | |
if shrink_asg_resp != 200: | |
raise SystemExit("Unexpected Response Code growing ASG") | |
wait_for_asg_health(asg_name, expected_instances=orig_desired, timeout=5) | |
response = get_asg_instances(asg_name) | |
instances_to_unprotect = [] | |
for instance in response['Instances']: | |
instances_to_unprotect.append(instance['InstanceId']) | |
try: | |
asg_unprotect_client = boto3.client('autoscaling') | |
unprotect_response = asg_unprotect_client.set_instance_protection( | |
InstanceIds=instances_to_unprotect, | |
AutoScalingGroupName=asg_name, | |
ProtectedFromScaleIn=False | |
) | |
except Exception as err: | |
raise SystemExit("ASG {asg}: Issue unsetting instance protection due to: {err}" | |
.format(asg=asg_name, err=err)) | |
if __name__ == '__main__': | |
try: | |
print("Autoscale Rotation Beginning") | |
pool = ProcessPoolExecutor(max_workers=2) | |
asg_dict = {'autoscale-group-1': 'autoscale-group-1-lb', 'autoscale-group-2': 'autoscale-group-2-lb'} | |
futures = [pool.submit(rotate_asg_now, asg_name=key, elb_name=value) for key, value in asg_dict.iteritems()] | |
results = [r.result() for r in as_completed(futures)] | |
print("Autoscale Rotation Successful") | |
except Exception as err: | |
raise SystemExit("There was an issue due to: {}".format(err)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment