Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save vvalorous/28fb1e2357625775a6bf293c6ff7a9ef to your computer and use it in GitHub Desktop.
Save vvalorous/28fb1e2357625775a6bf293c6ff7a9ef to your computer and use it in GitHub Desktop.
import boto3
import json
import logging
import time
logger = logging.getLogger()
logger.setLevel(logging.INFO)
def notify_on_error(message):
logger.error(message)
try:
sns_client = boto3.client('sns')
sns_response = sns_client.publish(
TopicArn='arn:aws:sns:us-east-1:xxxxxxxxxxxx:Lambda-Errors',
Message=message,
Subject='ASG/ECS Lifecycle Termination Error'
)
logger.info("SNS Publish HTTP Response: %s" % sns_response[u'ResponseMetadata'][u'HTTPStatusCode'])
except Exception as e:
logger.error(e)
def deregister_from_ecs_cluster(instanceid, cluster_name):
try:
ecs_client = boto3.client('ecs')
containerarn = None
ecsclusterinstances = ecs_client.list_container_instances(
cluster=cluster_name
)
for containerarns in ecsclusterinstances[u'containerInstanceArns']:
response = ecs_client.describe_container_instances(
cluster=cluster_name,
containerInstances=[
containerarns
]
)
if response[u'containerInstances'][0][u'ec2InstanceId'] == instanceid:
containerarn = containerarns
if containerarn:
logger.info("%s found registered with container ARN (%s) in ECS cluster. "
"Attempting to deregister from cluster." % (instanceid, containerarn))
ecs_response = ecs_client.deregister_container_instance(
cluster=cluster_name,
containerInstance=containerarn,
force=True
)
logger.info("ECS Deregistration HTTP Response: %s" % ecs_response[u'ResponseMetadata'][u'HTTPStatusCode'])
else:
logger.info('%s is NOT registered with the Cluster.' % instanceid)
return
except Exception as e:
notify_on_error(str(e))
def wait_for_alb(alb):
try:
alb_client = boto3.client('elbv2')
target_groups = alb_client.describe_target_groups(
LoadBalancerArn=alb
)
for target_group in target_groups[u'TargetGroups']:
for timer in range(0, 24):
healthy_count = 0
alb_response = alb_client.describe_target_health(
TargetGroupArn=target_group[u'TargetGroupArn']
)
print json.dumps(alb_response)
for target in alb_response[u'TargetHealthDescriptions']:
instanceid = target[u'Target'][u'Id']
if target[u'TargetHealth'][u'State'] == "healthy":
healthy_count += 1
logger.info("%s is healthy, bringing total healthy to %s" % (instanceid, healthy_count))
logger.info("Description: %s" % target)
time.sleep(5)
else:
logger.info("%s is not in a healthy state. It is in State: %s." % (instanceid, target[u'TargetHealth'][u'State']))
if healthy_count >= 2:
logger.info("Total healthy instances now at %s.. moving on" % healthy_count)
return
else:
logger.info("Whoops! %s took more than 2 minutes to get 2 healthy instances attached to the ALB!"
" Giving up on being being graceful!" % instanceid)
return
except Exception as e:
notify_on_error(str(e))
def wait_for_elb(elb):
try:
elb_client = boto3.client('elb')
for timer in range(0, 60):
elb_response = elb_client.describe_load_balancers(
LoadBalancerNames=[elb]
)
if len(elb_response[u'LoadBalancerDescriptions'][0][u'Instances']) <= 2:
logger.info("There are two or fewer instances registered with the ELB, "
"waiting for another instance to register before moving on. (5 min timeout)")
logger.info(elb_response[u'LoadBalancerDescriptions'][0][u'Instances'])
time.sleep(5)
else:
logger.info("Sweet! We have more than two instances in the ELB, "
"moving forward with termination request.")
logger.info(elb_response[u'LoadBalancerDescriptions'][0][u'Instances'])
break
else:
logger.info("Whoops! It took more than 5 minutes waiting for more instances to register in the ELB! "
"Giving up on being graceful!")
return
except Exception as e:
notify_on_error(str(e))
def wait_for_tasks(ecscluster, wait=60):
logger.info("Looking up status of tasks on %s" % ecscluster)
ecs_client = boto3.client('ecs')
if wait is 0:
logger.info("Whoops! It took more than 5 minutes waiting for all the tasks to be up in the cluster! "
"Giving up on being graceful!")
return
ecs_tasks = ecs_client.list_tasks(cluster=ecscluster)
if not ecs_tasks[u'taskArns']:
logger.info("No Tasks found.. Waiting for service to spawn tasks..")
time.sleep(5)
wait_for_tasks(ecscluster, wait-1)
else:
task_descs = ecs_client.describe_tasks(cluster=ecscluster, tasks=ecs_tasks[u'taskArns'])
status = []
for task in task_descs[u'tasks']:
status.append(task[u'lastStatus'])
if 'PENDING' in status:
logger.info("Status of Tasks %s .. Waiting for RUNNING status.." % status)
time.sleep(5)
wait_for_tasks(ecscluster, wait-1)
else:
logger.info("Great! Status of Tasks is %s .. moving on" % status)
return
def complete_asg_lifecycle(hookname, asg, actiontoken, instanceid):
try:
asg_client = boto3.client('autoscaling')
asg_response = asg_client.complete_lifecycle_action(
LifecycleHookName=hookname,
AutoScalingGroupName=asg,
LifecycleActionToken=actiontoken,
LifecycleActionResult='CONTINUE',
InstanceId=instanceid
)
logger.info("ASG Complete Lifecycle Action Response: %s" % asg_response[u'ResponseMetadata'][u'HTTPStatusCode'])
except Exception as e:
notify_on_error(str(e))
def lambda_handler(event, context):
logger.info(json.dumps(event))
message = json.loads(event[u'Records'][0][u'Sns'][u'Message'])
logger.info(message)
# Parse SNS message for required data.
ec2instanceid = message['EC2InstanceId']
stackname = message['NotificationMetadata']
asgname = message['AutoScalingGroupName']
lifecycleactiontoken = message['LifecycleActionToken']
lifecyclehookname = message['LifecycleHookName']
logger.info("EC2 Instance ID: %s" % ec2instanceid)
logger.info("CF Stack Name: %s" % stackname)
# Create Cloudformation connection object.
cf_client = boto3.client('cloudformation')
# Get stack details via describe_stacks
stackdetails = cf_client.describe_stacks(StackName=stackname)
# Initialize ALB var to None.
albarn = None
# Initialize ELB var to None.
elbname = None
# Parse the ALB ARN, ELB Name and ECS Cluster Name.
for output in stackdetails[u'Stacks'][0][u'Outputs']:
if output[u'OutputKey'] == 'alb':
logger.info("ALB ARN: %s" % output[u'OutputValue'])
albarn = output[u'OutputValue']
if output[u'OutputKey'] == 'elb':
logger.info("ELB NAME: %s" % output[u'OutputValue'])
elbname = output[u'OutputValue']
if output[u'OutputKey'] == 'ecscluster':
logger.info("ECS CLUSTER NAME: %s" % output[u'OutputValue'])
ecsclustername = output[u'OutputValue']
# Deregister the EC2 instance from the ECS Cluster to
# prevent new tasks from being launched on it.
deregister_from_ecs_cluster(ec2instanceid, ecsclustername)
# Check on health state of instance registered with
# target group in ALB.
if albarn:
wait_for_alb(albarn)
else:
logger.info("No ALB found in stackdetails; skipping de-registration.")
# Deregister the EC2 instance from the ELB to initiate
# connection draining before allowing the ASG to terminate.
if elbname:
wait_for_elb(elbname)
else:
logger.info("No ELB found in stackdetails; skipping de-registration.")
wait_for_tasks(ecsclustername)
# Notify ASG to complete lifecycle; don't wait for timeout.
complete_asg_lifecycle(lifecyclehookname, asgname, lifecycleactiontoken, ec2instanceid)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment