Last active
July 3, 2019 14:18
-
-
Save omerxx/6ace714d550051bb5a0a8e5b97b9ceaa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import boto3 | |
import datetime | |
import dateutil | |
DEFAULT_MAX_MEM = 3000 | |
LOW_CLUSTER_CPU_TH = 20 | |
HIGH_CLUSTER_CPU_TH = 65 | |
CONTAINERS_MAX_MEM = { | |
'cluster1': 1200, | |
'cluster2': 3000, | |
'cluster3': 800, | |
} | |
def ec2_average_cpu(instances, client): | |
sum = 0 | |
count = 0 | |
for instance in instances['containerInstances']: | |
response = client.get_metric_statistics( | |
Namespace='AWS/EC2', | |
MetricName='CPUUtilization', | |
Dimensions=[ | |
{ | |
'Name': 'InstanceId', | |
'Value': instance['ec2InstanceId'], | |
}, | |
], | |
StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=300), | |
EndTime=datetime.datetime.utcnow(), | |
Period=300, | |
Statistics=['Average'], | |
) | |
sum += response['Datapoints'][0]['Average'] | |
count += 1 | |
return sum/count | |
def lambda_handler(event, context): | |
ecs = boto3.client('ecs') | |
cw = boto3.client('cloudwatch') | |
# Iterate over all clusters | |
for clus in ecs.list_clusters()['clusterArns']: | |
if clus.split('/')[1][0:5] != 'awseb': | |
cluster = clus.split('/')[1] | |
print('Calculating schedulable containers for %s' % cluster) | |
instance_list = ecs.list_container_instances(cluster=cluster, status='ACTIVE') | |
shouldScale = 1 # 0 - In (Down), 1 - No, 2 - Out (Up) | |
if not instance_list['containerInstanceArns']: | |
print 'No instances in cluster {}, moving on'.format(cluster) | |
continue | |
instances = ecs.describe_container_instances(cluster=cluster, | |
containerInstances=instance_list['containerInstanceArns']) | |
schedulable_containers = 0 | |
clusterAvailableMemory = 0 | |
ec2TotalMemory = 0 | |
ec2AverageCpu = ec2_average_cpu(instances, cw) | |
print 'EC2 AVG CPU: {}'.format(ec2AverageCpu) | |
for instance in instances['containerInstances']: | |
if not ec2TotalMemory: | |
registered_resources = {resource['name']: resource for resource in instance['registeredResources']} | |
ec2TotalMemory = registered_resources['MEMORY']['integerValue'] | |
remaining_resources = {resource['name']: resource for resource in instance['remainingResources']} | |
max_mem = CONTAINERS_MAX_MEM[cluster] if CONTAINERS_MAX_MEM.get(cluster) else DEFAULT_MAX_MEM | |
containers_by_mem = int(remaining_resources['MEMORY']['integerValue'] / max_mem) | |
schedulable_containers += containers_by_mem | |
clusterAvailableMemory += remaining_resources['MEMORY']['integerValue'] | |
print('Schedulable containers: %s' % schedulable_containers) | |
# If [(total cluster available memory) - (1 ec2 memory)] > [(memory required for current containers) + (1 container memory)] -> remove instance | |
if schedulable_containers > 0: | |
# If there's memory room in the cluster to scale (else scale anyway) -> maybe should scale -> | |
if ec2AverageCpu > HIGH_CLUSTER_CPU_TH: | |
# SCALE OUT! | |
print 'SCALE OUT based on CPU: {} > {}'.format(ec2AverageCpu, HIGH_CLUSTER_CPU_TH) | |
shouldScale = 2 | |
elif ec2AverageCpu < LOW_CLUSTER_CPU_TH: | |
if (clusterAvailableMemory - ec2TotalMemory) > ((len(instances['containerInstances'])*ec2TotalMemory)-clusterAvailableMemory) + max_mem: | |
# If The available memory - A potenial reduce in ec2, still holds the utilized memory + room for one more -> scale in (down) | |
shouldScale = 0 | |
else: | |
# SCALE OUT! | |
print 'SCALE OUT - no room for containers' | |
shouldScale = 2 | |
cw.put_metric_data(Namespace='AWS/ECS', | |
MetricData=[{ | |
'MetricName': 'ShouldScaleEC2'.format(cluster), | |
'Dimensions': [{ | |
'Name': 'ClusterName', | |
'Value': cluster | |
}], | |
'Timestamp': datetime.datetime.now(dateutil.tz.tzlocal()), | |
'Value': shouldScale | |
}]) | |
print('Metric was sent to CloudWatch') | |
return {} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi,
I would like to test your code with my ecs cluster, appreciate to get some info about how it works till now... and also few words about the logic.
Best,
Roee.