Skip to content

Instantly share code, notes, and snippets.

Revisions

  1. swo0k revised this gist Aug 23, 2019. 5 changed files with 280 additions and 3 deletions.
    3 changes: 0 additions & 3 deletions compute-workers-tf-2-gotemplate.tf
    Original file line number Diff line number Diff line change
    @@ -148,8 +148,5 @@ resource "aws_autoscaling_lifecycle_hook" "{{ $workers_type }}_workers" {
    default_result = "CONTINUE"
    heartbeat_timeout = 300
    lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING"

    notification_target_arn = "${aws_sqs_queue.instance_termination.arn}"
    role_arn = "${aws_iam_role.autoscaling_instance_termination_notifications.arn}"
    }
    {{- end }}
    155 changes: 155 additions & 0 deletions compute-workers-tf-3-gotemplate.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,155 @@
    {{$asg_config := yaml "../workers.yaml" -}}

    # DO NOT EDIT THIS FILE DIRECTLY
    ## EDIT ./workers.yaml
    ## RUN make workers.tf

    {{ range $workers_type, $workers_config := $asg_config.workers}}
    data "template_file" "{{ $workers_type }}_workers_user_data" {
    template = "${file("./templates/workers_user_data.tpl.sh")}"

    vars {
    aws_region = "${var.aws_region}"
    cluster_name = "${module.cluster_label.id}"

    node_labels = "lifecycle=OnDemand,{{ join "," $workers_config.labels.onDemand }}"
    node_taints = "{{ join "," $workers_config.taints}}"
    spot_node_labels = "lifecycle=Ec2Spot,{{ join "," $workers_config.labels.spot }}"
    {{- set $ "spot_node_taints" (prepend $workers_config.taints "spotInstance=true:PreferNoSchedule") }}
    spot_node_taints = "{{ join "," $.spot_node_taints }}"

    {{- with $workers_config.evictionHard }}
    eviction_hard = "{{ . }}"
    {{- end }}

    {{- with $workers_config.kubeReserved }}
    kube_reserved = "{{ . }}"
    {{- end }}

    {{- with $workers_config.systemReserved }}
    system_reserved = "{{ . }}"
    {{- end }}
    }
    }

    resource "aws_launch_template" "{{ $workers_type }}_workers" {
    name = "${module.cluster_label.id}-{{ $workers_type }}-workers"
    image_id = "${data.aws_ami.worker.id}"
    instance_type = "{{ index $workers_config.instanceTypes 0 }}"

    credit_specification {
    # T3 instances are launched as unlimited by default. T2 instances are launched as standard by default.
    cpu_credits = "standard" # Can be "standard" or "unlimited"
    }

    network_interfaces {
    security_groups = {{ toHcl $workers_config.securityGroups }}
    device_index = 0
    associate_public_ip_address = false
    }

    iam_instance_profile {
    name = "${aws_iam_instance_profile.{{$workers_config.iamInstanceProfile}}.name}"
    }

    tag_specifications {
    resource_type = "instance"
    tags = "${merge(module.cluster_label.tags,map(
    "Name","${module.cluster_label.id}-{{ $workers_type }}-worker-ASG-Node",
    "KubernetesCluster","${module.cluster_label.id}",
    "kubernetes.io/cluster/${module.cluster_label.id}","owned"
    ))}"
    }

    user_data = "${base64encode(data.template_file.{{ $workers_type }}_workers_user_data.rendered)}"
    key_name = "${ {{ $asg_config.common.key_name }} }"

    tags = "${module.cluster_label.tags}"
    }

    resource "aws_autoscaling_group" "{{ $workers_type }}_workers" {
    name = "${module.cluster_label.id}-{{ $workers_type }}-workers"
    vpc_zone_identifier = ["${values({{ $asg_config.common.subnets }})}"]
    min_size = {{ $workers_config.asg.minSize }}
    desired_capacity = {{ $workers_config.asg.desiredCapacity }}
    max_size = {{ $workers_config.asg.maxSize }}

    mixed_instances_policy {
    instances_distribution {
    on_demand_allocation_strategy = "prioritized" # Valid values: prioritized. Default: prioritized
    spot_allocation_strategy = "lowest-price" # Valid values: lowest-price. Default: lowest-price.
    on_demand_base_capacity = {{ $workers_config.asg.onDemandBaseCapacity }}
    on_demand_percentage_above_base_capacity = {{ $workers_config.asg.onDemandPercentageAboveBaseCapacity }}
    # EC2 Auto Scaling selects the cheapest Spot pools and evenly allocates Spot capacity across the number of Spot pools that you specify.
    spot_instance_pools = {{ $workers_config.asg.spotInstancePools }} # Default: 2
    }

    launch_template {
    launch_template_specification {
    launch_template_id = "${aws_launch_template.{{ $workers_type }}_workers.id}"
    version = "$$Latest"
    }
    {{- range $instanceType := $workers_config.instanceTypes }}
    override {
    instance_type = "{{ $instanceType }}"
    }
    {{- end}}
    }
    }

    tags = [
    {
    key = "Namespace"
    value = "${var.namespace}"
    propagate_at_launch = true
    },
    {
    key = "Stage"
    value = "${var.stage}"
    propagate_at_launch = true
    },
    {
    key = "Name"
    value = "${module.cluster_label.id}-{{ $workers_type }}-workers-ASG-Node"
    propagate_at_launch = true
    },
    {
    key = "kubernetes.io/cluster/${module.cluster_label.id}"
    value = "owned"
    propagate_at_launch = true
    },
    {{- if $workers_config.autoscaling }}
    {
    key = "k8s.io/cluster-autoscaler/enabled"
    value = "true"
    propagate_at_launch = true
    },
    {{- end }}
    ]

    # Allowed values are Launch, Terminate, HealthCheck, ReplaceUnhealthy,
    # AZRebalance, AlarmNotification, ScheduledActions, AddToLoadBalancer.
    suspended_processes = [
    "AZRebalance",
    ]

    depends_on = {{ toHcl $workers_config.tfDependencies }}

    {{- if $workers_config.autoscaling }}
    lifecycle {
    ignore_changes = [ "desired_capacity" ]
    }
    {{- end}}
    }

    resource "aws_autoscaling_lifecycle_hook" "{{ $workers_type }}_workers" {
    name = "${module.cluster_label.id}-{{ $workers_type }}-workers-nodedrainerLCH"
    autoscaling_group_name = "${aws_autoscaling_group.{{ $workers_type }}_workers.name}"
    default_result = "CONTINUE"
    heartbeat_timeout = 300
    lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING"

    notification_target_arn = "${aws_sqs_queue.instance_termination.arn}"
    role_arn = "${aws_iam_role.autoscaling_instance_termination_notifications.arn}"
    }
    {{- end }}
    13 changes: 13 additions & 0 deletions compute-workers-tf-gotemplate-1.diff
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,13 @@
    diff --git a/compute-workers-tf-2-gotemplate.tf b/compute-workers-tf-3-gotemplate.tf
    index d6fd8f3..85773b0 100644
    --- a/compute-workers-tf-2-gotemplate.tf
    +++ b/compute-workers-tf-3-gotemplate.tf
    @@ -148,5 +148,8 @@ resource "aws_autoscaling_lifecycle_hook" "{{ $workers_type }}_workers" {
    default_result = "CONTINUE"
    heartbeat_timeout = 300
    lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING"
    +
    + notification_target_arn = "${aws_sqs_queue.instance_termination.arn}"
    + role_arn = "${aws_iam_role.autoscaling_instance_termination_notifications.arn}"
    }
    {{- end }}
    54 changes: 54 additions & 0 deletions lch-sqs-1.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,54 @@
    resource "aws_sqs_queue" "instance_termination" {
    name = "${module.cluster_label.id}-instance-termination"
    max_message_size = 2048 # 2 kb (default is 256KiB)
    message_retention_seconds = 86400 # 1 day (default is 4 days)
    receive_wait_time_seconds = 10 # long polling, default is 0 - instant return
    # # docs: https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-dead-letter-queues.html
    # redrive_policy = "{\"deadLetterTargetArn\":\"${aws_sqs_queue.terraform_queue_deadletter.arn}\",\"maxReceiveCount\":4}"

    tags = "${module.cluster_label.tags}"
    }

    resource "aws_iam_role" "autoscaling_instance_termination_notifications" {
    name = "${module.cluster_label.id}-instance-termination-notifications"

    assume_role_policy = "${data.aws_iam_policy_document.assume_autoscaling_role.json}"

    tags = "${module.cluster_label.tags}"
    }

    data "aws_iam_policy_document" "assume_autoscaling_role" {
    statement {
    effect = "Allow"

    actions = ["sts:AssumeRole"]

    principals {
    type = "Service"
    identifiers = ["autoscaling.amazonaws.com"]
    }
    }
    }

    resource "aws_iam_role_policy" "autoscaling_instance_termination_notifications" {
    name = "${module.cluster_label.id}-instance-termination-notifications"
    role = "${aws_iam_role.autoscaling_instance_termination_notifications.id}"

    policy = "${data.aws_iam_policy_document.instance_termination_notifications.json}"
    }

    data "aws_iam_policy_document" "instance_termination_notifications" {
    statement {
    sid = "AllowWriteToNotificationsQueue"

    effect = "Allow"

    actions = [
    "sqs:SendMessage",
    "sqs:GetQueueUrl",
    "sns:Publish",
    ]

    resources = ["${aws_sqs_queue.instance_termination.arn}"]
    }
    }
    58 changes: 58 additions & 0 deletions lch-sqs-2.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,58 @@
    data "aws_iam_policy_document" "system_workers" {
    statement {
    sid = "ClusterAutoScalerAll"
    effect = "Allow"
    actions = [
    "autoscaling:DescribeAutoScalingGroups",
    "autoscaling:DescribeTags", # for dynamic discovery, this verifies the cluster-name in the ASG Tags...
    ]
    # ref https://docs.aws.amazon.com/autoscaling/ec2/userguide/control-access-using-iam.html#policy-auto-scaling-resources
    resources = [
    "*",
    ]
    }
    statement {
    sid = "ClusterAutoScalerSpecific"
    effect = "Allow"
    actions = [
    "autoscaling:DescribeAutoScalingInstances",
    "autoscaling:SetDesiredCapacity",
    "autoscaling:TerminateInstanceInAutoScalingGroup",
    ]
    # ref https://docs.aws.amazon.com/autoscaling/ec2/userguide/control-access-using-iam.html#policy-auto-scaling-resources
    resources = [
    "${aws_autoscaling_group.edge_workers.arn}",
    "${aws_autoscaling_group.compute_workers.arn}",
    ]
    }
    # handle instance_termination_notifications
    statement {
    sid = "AllowReadInstanceTerminationNotificationsQueue"
    effect = "Allow"
    actions = [
    "sqs:ReceiveMessage",
    "sqs:DeleteMessage",
    "sqs:GetQueueAttributes",
    ]

    resources = ["${aws_sqs_queue.instance_termination.arn}"]
    }
    # required by rebuy/node-drainer
    statement {
    sid = "AllowEc2Describe"
    effect = "Allow"
    actions = [
    "ec2:Describe*",
    ]
    resources = ["*"]
    }
    statement {
    sid = "AllowAsgLifeCycle"
    effect = "Allow"
    actions = [
    "autoscaling:CompleteLifecycleAction",
    "autoscaling:RecordLifecycleActionHeartbeat",
    ]
    resources = ["*"]
    }
    }
  2. swo0k revised this gist Aug 23, 2019. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions eks-iam-role-2.tf
    Original file line number Diff line number Diff line change
    @@ -29,6 +29,7 @@ terraform {
    # A project specific key
    key = "tf-shared/tfstate"
    ## ...
    ## Ref: https://www.terraform.io/docs/backends/config.html
    }
    }

  3. swo0k revised this gist Aug 23, 2019. 2 changed files with 74 additions and 1 deletion.
    15 changes: 14 additions & 1 deletion eks-iam-role-2.tf
    Original file line number Diff line number Diff line change
    @@ -21,4 +21,17 @@ resource "aws_iam_role" "eks_full_access" {
    resource "aws_iam_role_policy_attachment" "eks_full_access" {
    policy_arn = "${aws_iam_policy.eks_full_access.arn}"
    role = "${aws_iam_role.eks_full_access.name}"
    }
    }

    ## expose role ARN to remote state
    terraform {
    backend "s3" {
    # A project specific key
    key = "tf-shared/tfstate"
    ## ...
    }
    }

    output "swat_eks_role_arn" {
    value = "${aws_iam_role.eks_full_access.arn}"
    }
    60 changes: 60 additions & 0 deletions eks-iam-role-4.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,60 @@

    provider "aws" {
    region = "${var.aws_region}"
    version = "~> 2"
    }

    data "terraform_remote_state" "shared" {
    backend = "s3"
    config {
    key = "tf-shared/tfstate"
    # bucket = "..."
    }
    }

    ## EKS cluster created with eks role
    module "cluster_label" {
    source = "git::ssh://git@bitbucket.org/swatrider/tf-modules.git?ref=master//naming"
    namespace = "${var.namespace}"
    stage = "${var.stage}"
    name = "${var.cluster_name}"
    }

    provider "aws" {
    alias = "eks"
    region = "${var.aws_region}"
    assume_role {
    role_arn = "${data.terraform_remote_state.shared.swat_eks_role_arn}"
    }
    }

    resource "aws_eks_cluster" "main" {
    provider = "aws.eks" # assumes the EKS full access role
    name = "${module.cluster_label.id}"
    role_arn = "${aws_iam_role.masters.arn}"

    vpc_config {
    security_group_ids = ["${aws_security_group.masters.id}"]
    subnet_ids = [
    "${values(data.terraform_remote_state.production.private_subnets_newton)}",
    ]
    # WARNING: Private EKS APIs across peered VPCs require DNS Resolvers!
    endpoint_private_access = true
    endpoint_public_access = false
    }

    depends_on = [
    "aws_iam_role_policy_attachment.masters_EKSCluster_policy",
    "aws_iam_role_policy_attachment.masters_EKSService_policy",
    ]
    }

    data "aws_ami" "worker" {
    filter {
    name = "name"
    values = ["amazon-eks-node-${var.eks_version}-v*"]
    }

    most_recent = true
    owners = ["amazon"]
    }
  4. swo0k revised this gist Aug 23, 2019. 1 changed file with 0 additions and 3 deletions.
    3 changes: 0 additions & 3 deletions eks-iam-role-2.tf
    Original file line number Diff line number Diff line change
    @@ -2,9 +2,6 @@ data "aws_iam_policy_document" "current_account_trust" {
    statement = {
    principals = {
    type = "AWS"

    # 1st half of permissions: make role trust current account
    # 2nd half of permissions: give iam groups/users ability to assume the role (see below)
    identifiers = [
    "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root",
    ]
  5. swo0k revised this gist Aug 23, 2019. 6 changed files with 84 additions and 3 deletions.
    1 change: 0 additions & 1 deletion compute-workers-cfn-1.tf
    Original file line number Diff line number Diff line change
    @@ -33,7 +33,6 @@ resource "aws_cloudformation_stack" "workers" {
    lifecycle {
    ignore_changes = [
    "parameters.NodeAutoScalingGroupDesiredSize",
    "parameters.OnDemandBaseCapacity",
    ]
    }
    }
    1 change: 0 additions & 1 deletion compute-workers-cfn-2.tf
    Original file line number Diff line number Diff line change
    @@ -34,7 +34,6 @@ resource "aws_cloudformation_stack" "workers" {
    lifecycle {
    ignore_changes = [
    "parameters.NodeAutoScalingGroupDesiredSize",
    "parameters.OnDemandBaseCapacity",
    ]
    }
    }
    2 changes: 1 addition & 1 deletion compute-workers-tf-1-asg.tf
    Original file line number Diff line number Diff line change
    @@ -11,7 +11,7 @@ resource "aws_autoscaling_group" "compute_workers" {
    on_demand_allocation_strategy = "prioritized" # Valid values: prioritized. Default: prioritized
    spot_allocation_strategy = "lowest-price" # Valid values: lowest-price. Default: lowest-price.
    on_demand_base_capacity = 1
    on_demand_percentage_above_base_capacity = 0
    on_demand_percentage_above_base_capacity = 0 # this means everything else will be 100% spot and 0% onDemand (we have fixed capacity of 1 onDemand)

    # EC2 Auto Scaling selects the cheapest Spot pools and evenly allocates Spot capacity across the number of Spot pools that you specify.
    spot_instance_pools = 2 # Default: 2
    28 changes: 28 additions & 0 deletions eks-iam-role-1.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,28 @@
    resource "aws_iam_policy" "eks_full_access" {
    name = "${module.eks_full_access_label.id}-policy"
    description = "Provides full access to Amazon EKS"

    policy = "${data.aws_iam_policy_document.eks_full_access.json}"
    }

    #Ref https://docs.aws.amazon.com/eks/latest/userguide/security_iam_id-based-policy-examples.html#security_iam_id-based-policy-examples-console
    data "aws_iam_policy_document" "eks_full_access" {
    statement {
    sid = "EKSFullAccess"
    effect = "Allow"
    actions = [
    "eks:*",
    ]
    resources = ["*"]
    }

    statement {
    sid = "EKSPassrole"
    effect = "Allow"
    actions = [
    "iam:GetRole",
    "iam:PassRole",
    ]
    resources = ["*"]
    }
    }
    27 changes: 27 additions & 0 deletions eks-iam-role-2.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,27 @@
    data "aws_iam_policy_document" "current_account_trust" {
    statement = {
    principals = {
    type = "AWS"

    # 1st half of permissions: make role trust current account
    # 2nd half of permissions: give iam groups/users ability to assume the role (see below)
    identifiers = [
    "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root",
    ]
    }

    actions = [
    "sts:AssumeRole",
    ]
    }
    }

    resource "aws_iam_role" "eks_full_access" {
    name = "${module.eks_full_access_label.id}"
    assume_role_policy = "${data.aws_iam_policy_document.current_account_trust.json}"
    }

    resource "aws_iam_role_policy_attachment" "eks_full_access" {
    policy_arn = "${aws_iam_policy.eks_full_access.arn}"
    role = "${aws_iam_role.eks_full_access.name}"
    }
    28 changes: 28 additions & 0 deletions eks-iam-role-3.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,28 @@
    resource "aws_iam_group" "eks_full_access" {
    name = "${module.eks_full_access_label.id}"
    path = "/"
    }

    resource "aws_iam_group_policy_attachment" "assume_eks_full_access" {
    group = "${aws_iam_group.eks_full_access.name}"
    policy_arn = "${aws_iam_policy.assume_eks_full_access.arn}"
    }

    resource "aws_iam_policy" "assume_eks_full_access" {
    name = "${aws_iam_group.eks_full_access.name}-assume-policy"
    description = "User policy to assume eks full access role"
    policy = "${data.aws_iam_policy_document.assume_eks_full_access_role.json}"
    }

    # allow eks_full_access to assume the cluster role
    data "aws_iam_policy_document" "assume_eks_full_access_role" {
    statement = {
    actions = [
    "sts:AssumeRole",
    ]

    resources = [
    "${aws_iam_role.eks_full_access.arn}",
    ]
    }
    }
  6. swo0k revised this gist Aug 23, 2019. 2 changed files with 219 additions and 0 deletions.
    64 changes: 64 additions & 0 deletions compute-workers-tf-2-data-structure.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,64 @@
    common:
    key_name: data.terraform_remote_state.staging.ec2_key_name
    subnets: data.terraform_remote_state.staging.private_subnets_bohr

    workers:
    compute:
    instanceTypes:
    - c5.2xlarge
    - m5a.2xlarge
    - c5d.2xlarge
    autoscaling: true
    asg:
    minSize: 1
    desiredCapacity: 3
    maxSize: 18
    onDemandBaseCapacity: 1
    onDemandPercentageAboveBaseCapacity: 0
    spotInstancePools: 2
    taints: []
    labels:
    onDemand:
    - node-role.kubernetes.io/worker=true
    spot:
    - node-role.kubernetes.io/spot-worker=true
    evictionHard: "memory.available<750Mi,nodefs.available<10%,nodefs.inodesFree<5%,imagefs.available<10%,imagefs.inodesFree<5%"
    kubeReserved: "cpu=250m,memory=1Gi,ephemeral-storage=1Gi"
    systemReserved: "cpu=250m,memory=0.2Gi,ephemeral-storage=1Gi"
    securityGroups:
    - ${aws_security_group.workers.id}
    iamInstanceProfile: workers
    tfDependencies:
    - aws_eks_cluster.main
    - aws_iam_role_policy_attachment.workers_EKSWorkerNodePolicy
    - aws_iam_role_policy_attachment.workers_EKS_CNI_Policy
    edge:
    instanceTypes:
    - t3.small
    - t2.small
    - t3.medium
    autoscaling: true
    asg:
    minSize: 2
    desiredCapacity: 2
    maxSize: 3
    onDemandBaseCapacity: 1
    onDemandPercentageAboveBaseCapacity: 0
    spotInstancePools: 2
    taints:
    - edge=true:NoSchedule
    labels:
    onDemand:
    - node-role.kubernetes.io/edge=true
    spot:
    - node-role.kubernetes.io/spot-edge=true
    evictionHard: "memory.available<100Mi,nodefs.available<10%,nodefs.inodesFree<5%,imagefs.available<10%,imagefs.inodesFree<5%"
    kubeReserved: "cpu=250m,memory=150Mi,ephemeral-storage=1Gi"
    systemReserved: "cpu=250m,memory=150Mi,ephemeral-storage=1Gi"
    securityGroups:
    - ${aws_security_group.edge.id} # DMZ security groups
    iamInstanceProfile: workers
    tfDependencies:
    - aws_eks_cluster.main
    - aws_iam_role_policy_attachment.workers_EKSWorkerNodePolicy
    - aws_iam_role_policy_attachment.workers_EKS_CNI_Policy
    155 changes: 155 additions & 0 deletions compute-workers-tf-2-gotemplate.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,155 @@
    {{$asg_config := yaml "../workers.yaml" -}}

    # DO NOT EDIT THIS FILE DIRECTLY
    ## EDIT ./workers.yaml
    ## RUN make workers.tf

    {{ range $workers_type, $workers_config := $asg_config.workers}}
    data "template_file" "{{ $workers_type }}_workers_user_data" {
    template = "${file("./templates/workers_user_data.tpl.sh")}"

    vars {
    aws_region = "${var.aws_region}"
    cluster_name = "${module.cluster_label.id}"

    node_labels = "lifecycle=OnDemand,{{ join "," $workers_config.labels.onDemand }}"
    node_taints = "{{ join "," $workers_config.taints}}"
    spot_node_labels = "lifecycle=Ec2Spot,{{ join "," $workers_config.labels.spot }}"
    {{- set $ "spot_node_taints" (prepend $workers_config.taints "spotInstance=true:PreferNoSchedule") }}
    spot_node_taints = "{{ join "," $.spot_node_taints }}"

    {{- with $workers_config.evictionHard }}
    eviction_hard = "{{ . }}"
    {{- end }}

    {{- with $workers_config.kubeReserved }}
    kube_reserved = "{{ . }}"
    {{- end }}

    {{- with $workers_config.systemReserved }}
    system_reserved = "{{ . }}"
    {{- end }}
    }
    }

    resource "aws_launch_template" "{{ $workers_type }}_workers" {
    name = "${module.cluster_label.id}-{{ $workers_type }}-workers"
    image_id = "${data.aws_ami.worker.id}"
    instance_type = "{{ index $workers_config.instanceTypes 0 }}"

    credit_specification {
    # T3 instances are launched as unlimited by default. T2 instances are launched as standard by default.
    cpu_credits = "standard" # Can be "standard" or "unlimited"
    }

    network_interfaces {
    security_groups = {{ toHcl $workers_config.securityGroups }}
    device_index = 0
    associate_public_ip_address = false
    }

    iam_instance_profile {
    name = "${aws_iam_instance_profile.{{$workers_config.iamInstanceProfile}}.name}"
    }

    tag_specifications {
    resource_type = "instance"
    tags = "${merge(module.cluster_label.tags,map(
    "Name","${module.cluster_label.id}-{{ $workers_type }}-worker-ASG-Node",
    "KubernetesCluster","${module.cluster_label.id}",
    "kubernetes.io/cluster/${module.cluster_label.id}","owned"
    ))}"
    }

    user_data = "${base64encode(data.template_file.{{ $workers_type }}_workers_user_data.rendered)}"
    key_name = "${ {{ $asg_config.common.key_name }} }"

    tags = "${module.cluster_label.tags}"
    }

    resource "aws_autoscaling_group" "{{ $workers_type }}_workers" {
    name = "${module.cluster_label.id}-{{ $workers_type }}-workers"
    vpc_zone_identifier = ["${values({{ $asg_config.common.subnets }})}"]
    min_size = {{ $workers_config.asg.minSize }}
    desired_capacity = {{ $workers_config.asg.desiredCapacity }}
    max_size = {{ $workers_config.asg.maxSize }}

    mixed_instances_policy {
    instances_distribution {
    on_demand_allocation_strategy = "prioritized" # Valid values: prioritized. Default: prioritized
    spot_allocation_strategy = "lowest-price" # Valid values: lowest-price. Default: lowest-price.
    on_demand_base_capacity = {{ $workers_config.asg.onDemandBaseCapacity }}
    on_demand_percentage_above_base_capacity = {{ $workers_config.asg.onDemandPercentageAboveBaseCapacity }}
    # EC2 Auto Scaling selects the cheapest Spot pools and evenly allocates Spot capacity across the number of Spot pools that you specify.
    spot_instance_pools = {{ $workers_config.asg.spotInstancePools }} # Default: 2
    }

    launch_template {
    launch_template_specification {
    launch_template_id = "${aws_launch_template.{{ $workers_type }}_workers.id}"
    version = "$$Latest"
    }
    {{- range $instanceType := $workers_config.instanceTypes }}
    override {
    instance_type = "{{ $instanceType }}"
    }
    {{- end}}
    }
    }

    tags = [
    {
    key = "Namespace"
    value = "${var.namespace}"
    propagate_at_launch = true
    },
    {
    key = "Stage"
    value = "${var.stage}"
    propagate_at_launch = true
    },
    {
    key = "Name"
    value = "${module.cluster_label.id}-{{ $workers_type }}-workers-ASG-Node"
    propagate_at_launch = true
    },
    {
    key = "kubernetes.io/cluster/${module.cluster_label.id}"
    value = "owned"
    propagate_at_launch = true
    },
    {{- if $workers_config.autoscaling }}
    {
    key = "k8s.io/cluster-autoscaler/enabled"
    value = "true"
    propagate_at_launch = true
    },
    {{- end }}
    ]

    # Allowed values are Launch, Terminate, HealthCheck, ReplaceUnhealthy,
    # AZRebalance, AlarmNotification, ScheduledActions, AddToLoadBalancer.
    suspended_processes = [
    "AZRebalance",
    ]

    depends_on = {{ toHcl $workers_config.tfDependencies }}

    {{- if $workers_config.autoscaling }}
    lifecycle {
    ignore_changes = [ "desired_capacity" ]
    }
    {{- end}}
    }

    resource "aws_autoscaling_lifecycle_hook" "{{ $workers_type }}_workers" {
    name = "${module.cluster_label.id}-{{ $workers_type }}-workers-nodedrainerLCH"
    autoscaling_group_name = "${aws_autoscaling_group.{{ $workers_type }}_workers.name}"
    default_result = "CONTINUE"
    heartbeat_timeout = 300
    lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING"

    notification_target_arn = "${aws_sqs_queue.instance_termination.arn}"
    role_arn = "${aws_iam_role.autoscaling_instance_termination_notifications.arn}"
    }
    {{- end }}
  7. swo0k revised this gist Aug 23, 2019. 1 changed file with 0 additions and 3 deletions.
    3 changes: 0 additions & 3 deletions compute-workers-tf-1-asg.tf
    Original file line number Diff line number Diff line change
    @@ -84,7 +84,4 @@ resource "aws_autoscaling_lifecycle_hook" "compute_workers" {
    default_result = "CONTINUE"
    heartbeat_timeout = 300
    lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING"

    notification_target_arn = "${aws_sqs_queue.instance_termination.arn}"
    role_arn = "${aws_iam_role.autoscaling_instance_termination_notifications.arn}"
    }
  8. swo0k revised this gist Aug 23, 2019. 4 changed files with 151 additions and 0 deletions.
    90 changes: 90 additions & 0 deletions compute-workers-tf-1-asg.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,90 @@

    resource "aws_autoscaling_group" "compute_workers" {
    name = "${module.cluster_label.id}-compute-workers"
    vpc_zone_identifier = ["${values(data.terraform_remote_state.staging.private_subnets_bohr)}"]
    min_size = 1
    desired_capacity = 3
    max_size = 18

    mixed_instances_policy {
    instances_distribution {
    on_demand_allocation_strategy = "prioritized" # Valid values: prioritized. Default: prioritized
    spot_allocation_strategy = "lowest-price" # Valid values: lowest-price. Default: lowest-price.
    on_demand_base_capacity = 1
    on_demand_percentage_above_base_capacity = 0

    # EC2 Auto Scaling selects the cheapest Spot pools and evenly allocates Spot capacity across the number of Spot pools that you specify.
    spot_instance_pools = 2 # Default: 2
    }

    launch_template {
    launch_template_specification {
    launch_template_id = "${aws_launch_template.compute_workers.id}"
    version = "$$Latest"
    }

    override {
    instance_type = "c5.2xlarge"
    }

    override {
    instance_type = "m5a.2xlarge"
    }

    override {
    instance_type = "c5d.2xlarge"
    }
    }
    }

    tags = [
    {
    key = "Namespace"
    value = "${var.namespace}"
    propagate_at_launch = true
    },
    {
    key = "Stage"
    value = "${var.stage}"
    propagate_at_launch = true
    },
    {
    key = "Name"
    value = "${module.cluster_label.id}-compute-workers-ASG-Node"
    propagate_at_launch = true
    },
    {
    key = "kubernetes.io/cluster/${module.cluster_label.id}"
    value = "owned"
    propagate_at_launch = true
    },
    {
    key = "k8s.io/cluster-autoscaler/enabled"
    value = "true"
    propagate_at_launch = true
    },
    ]

    # Allowed values are Launch, Terminate, HealthCheck, ReplaceUnhealthy,
    # AZRebalance, AlarmNotification, ScheduledActions, AddToLoadBalancer.
    suspended_processes = [
    "AZRebalance",
    ]

    depends_on = ["aws_eks_cluster.main", "aws_iam_role_policy_attachment.workers_EKSWorkerNodePolicy", "aws_iam_role_policy_attachment.workers_EKS_CNI_Policy"]

    lifecycle {
    ignore_changes = ["desired_capacity"]
    }
    }

    resource "aws_autoscaling_lifecycle_hook" "compute_workers" {
    name = "${module.cluster_label.id}-compute-workers-nodedrainerLCH"
    autoscaling_group_name = "${aws_autoscaling_group.compute_workers.name}"
    default_result = "CONTINUE"
    heartbeat_timeout = 300
    lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING"

    notification_target_arn = "${aws_sqs_queue.instance_termination.arn}"
    role_arn = "${aws_iam_role.autoscaling_instance_termination_notifications.arn}"
    }
    35 changes: 35 additions & 0 deletions compute-workers-tf-1-launch-template.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,35 @@
    resource "aws_launch_template" "compute_workers" {
    name = "${module.cluster_label.id}-compute-workers"
    image_id = "${data.aws_ami.worker.id}"
    instance_type = "c5.2xlarge"

    credit_specification {
    # T3 instances are launched as unlimited by default. T2 instances are launched as standard by default.
    cpu_credits = "standard" # Can be "standard" or "unlimited"
    }

    network_interfaces {
    security_groups = ["${aws_security_group.workers.id}"]
    device_index = 0
    associate_public_ip_address = false
    }

    iam_instance_profile {
    name = "${aws_iam_instance_profile.workers.name}"
    }

    tag_specifications {
    resource_type = "instance"

    tags = "${merge(module.cluster_label.tags,map(
    "Name","${module.cluster_label.id}-compute-worker-ASG-Node",
    "KubernetesCluster","${module.cluster_label.id}",
    "kubernetes.io/cluster/${module.cluster_label.id}","owned"
    ))}"
    }

    user_data = "${base64encode(data.template_file.compute_workers_user_data.rendered)}"
    key_name = "${ data.terraform_remote_state.staging.ec2_key_name }"

    tags = "${module.cluster_label.tags}"
    }
    16 changes: 16 additions & 0 deletions compute-workers-tf-1-user-data.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,16 @@
    data "template_file" "compute_workers_user_data" {
    template = "${file("./templates/workers_user_data.tpl.sh")}"

    vars {
    aws_region = "${var.aws_region}"
    cluster_name = "${module.cluster_label.id}"

    node_labels = "lifecycle=OnDemand,node-role.kubernetes.io/worker=true"
    node_taints = ""
    spot_node_labels = "lifecycle=Ec2Spot,node-role.kubernetes.io/spot-worker=true"
    spot_node_taints = "spotInstance=true:PreferNoSchedule"
    eviction_hard = "memory.available<750Mi,nodefs.available<10%,nodefs.inodesFree<5%,imagefs.available<10%,imagefs.inodesFree<5%"
    kube_reserved = "cpu=250m,memory=1Gi,ephemeral-storage=1Gi"
    system_reserved = "cpu=250m,memory=0.2Gi,ephemeral-storage=1Gi"
    }
    }
    10 changes: 10 additions & 0 deletions user-data.tpl.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,10 @@
    #!/bin/bash
    set -o xtrace
    iid=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
    export AWS_DEFAULT_REGION=${aws_region}
    ilc=`aws ec2 describe-instances --instance-ids $${iid} --query 'Reservations[0].Instances[0].InstanceLifecycle' --output text`
    if [ "$${ilc}" == "spot" ]; then
    /etc/eks/bootstrap.sh ${cluster_name} --kubelet-extra-args '--node-labels=${spot_node_labels} --register-with-taints=${spot_node_taints}%{ if eviction_hard != "" } --eviction-hard ${eviction_hard}%{ endif }%{ if kube_reserved != "" } --kube-reserved ${kube_reserved}%{ endif }%{ if system_reserved != "" } --system-reserved ${system_reserved}%{ endif }'
    else
    /etc/eks/bootstrap.sh ${cluster_name} --kubelet-extra-args '--node-labels=${node_labels}%{ if node_taints != "" } --register-with-taints=${node_taints}%{ endif }%{ if eviction_hard != "" } --eviction-hard ${eviction_hard}%{ endif }%{ if kube_reserved != "" } --kube-reserved ${kube_reserved}%{ endif }%{ if system_reserved != "" } --system-reserved ${system_reserved}%{ endif }'
    fi
  9. swo0k revised this gist Aug 23, 2019. 2 changed files with 888 additions and 0 deletions.
    459 changes: 459 additions & 0 deletions amazon-eks-nodegroup-with-mixed-instances-custom.yml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,459 @@
    ---
    AWSTemplateFormatVersion: '2010-09-09'
    Description: 'Amazon EKS Node Group'
    Metadata:
    AWS::CloudFormation::Interface:
    ParameterGroups:
    -
    Label:
    default: "EKS Configuration"
    Parameters:
    - ClusterName
    - ClusterControlPlaneSecurityGroup
    - NodeInstanceProfile
    - UseExistingNodeSecurityGroups
    - ExistingNodeSecurityGroups
    - NodeImageId
    - VpcId
    - KeyName
    - NodeGroupName
    - Subnets
    - BootstrapArgumentsForOnDemand
    - BootstrapArgumentsForSpotFleet
    -
    Label:
    default: "Auto Scaling Configuration"
    Parameters:
    - NodeAutoScalingGroupMinSize
    - NodeAutoScalingGroupDesiredSize
    - NodeAutoScalingGroupMaxSize
    - ClusterAutoscalerStatus
    - NodeInstanceType
    - ASGAutoAssignPublicIp
    - OnDemandBaseCapacity
    - OnDemandPercentageAboveBaseCapacity
    - SpotInstancePools
    - InstanceTypesOverride

    Parameters:
    VpcId:
    Description: The VPC of the worker instances
    Type: AWS::EC2::VPC::Id

    Subnets:
    Description: Select 3 subnets where workers can be created.
    Type: List<AWS::EC2::Subnet::Id>

    NodeInstanceProfile:
    Type: String
    Description: Use the existing Instance Profile ARN for your nodegroup
    Default: ""

    KeyName:
    Description: The EC2 Key Pair to allow SSH access to the instances
    Type: AWS::EC2::KeyPair::KeyName
    Default: "eksworkshop"

    NodeImageId:
    Type: AWS::EC2::Image::Id
    Description: Find the latest AMI id here - https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html

    NodeInstanceType:
    Description: Default EC2 instance type for the node instances.
    Type: String
    Default: m4.large
    AllowedValues:
    - t2.small
    - t2.medium
    - t2.large
    - t2.xlarge
    - t2.2xlarge
    - t3.nano
    - t3.micro
    - t3.small
    - t3.medium
    - t3.large
    - t3.xlarge
    - t3.2xlarge
    - m3.medium
    - m3.large
    - m3.xlarge
    - m3.2xlarge
    - m4.large
    - m4.xlarge
    - m4.2xlarge
    - m4.4xlarge
    - m4.10xlarge
    - m5.large
    - m5.xlarge
    - m5.2xlarge
    - m5.4xlarge
    - m5.12xlarge
    - m5.24xlarge
    - c4.large
    - c4.xlarge
    - c4.2xlarge
    - c4.4xlarge
    - c4.8xlarge
    - c5.large
    - c5.xlarge
    - c5.2xlarge
    - c5.4xlarge
    - c5.9xlarge
    - c5.18xlarge
    - i3.large
    - i3.xlarge
    - i3.2xlarge
    - i3.4xlarge
    - i3.8xlarge
    - i3.16xlarge
    - r3.xlarge
    - r3.2xlarge
    - r3.4xlarge
    - r3.8xlarge
    - r4.large
    - r4.xlarge
    - r4.2xlarge
    - r4.4xlarge
    - r4.8xlarge
    - r4.16xlarge
    - x1.16xlarge
    - x1.32xlarge
    - p2.xlarge
    - p2.8xlarge
    - p2.16xlarge
    - p3.2xlarge
    - p3.8xlarge
    - p3.16xlarge
    - p3dn.24xlarge
    - r5.large
    - r5.xlarge
    - r5.2xlarge
    - r5.4xlarge
    - r5.12xlarge
    - r5.24xlarge
    - r5d.large
    - r5d.xlarge
    - r5d.2xlarge
    - r5d.4xlarge
    - r5d.12xlarge
    - r5d.24xlarge
    - z1d.large
    - z1d.xlarge
    - z1d.2xlarge
    - z1d.3xlarge
    - z1d.6xlarge
    - z1d.12xlarge
    ConstraintDescription: Must be a valid EC2 instance type

    NodeAutoScalingGroupMinSize:
    Type: Number
    Description: Minimum size of Node Group ASG.
    Default: 1

    NodeAutoScalingGroupDesiredSize:
    Type: Number
    Description: Desired size of Node Group ASG.
    Default: 3

    NodeAutoScalingGroupMaxSize:
    Type: Number
    Description: Maximum size of Node Group ASG.
    Default: 5

    ASGAutoAssignPublicIp:
    Type: String
    Description: "auto assign public IP address for ASG instances"
    AllowedValues:
    - "yes"
    - "no"
    Default: "yes"

    ClusterAutoscalerStatus:
    Type: String
    Description: "cluster-autoscaler status"
    AllowedValues:
    - "enabled"
    - "disabled"
    Default: "enabled"

    OnDemandBaseCapacity:
    Type: Number
    Description: "on-demand base capacity"
    Default: 1

    OnDemandPercentageAboveBaseCapacity:
    Type: Number
    Description: "on-demand percentage above base capacity(0-100)"
    Default: 0

    SpotInstancePools:
    Type: Number
    Description: "spot instance pools(1-20)"
    Default: 2

    InstanceTypesOverride:
    Type: String
    Description: "multiple spot instances to override(seperated by comma)"
    Default: "m4.large,c4.large,c5.large"

    UseExistingNodeSecurityGroups:
    Type: String
    Description: Please select 'yes' to attach existing SGs to nodegroup
    Default: "yes"
    AllowedValues:
    - "yes"
    - "no"
    ExistingNodeSecurityGroups:
    Type: String
    Description: Use the existing Security Group for your nodegroup
    Default: ""

    ClusterName:
    Description: The cluster name provided when the cluster was created. If it is incorrect, nodes will not be able to join the cluster.
    Type: String
    Default: "eksworkshop-eksctl"

    BootstrapArgumentsForOnDemand:
    Description: Arguments to pass to the bootstrap script. See files/bootstrap.sh in https://github.com/awslabs/amazon-eks-ami
    Default: "--kubelet-extra-args --node-labels=lifecycle=OnDemand"
    Type: String

    BootstrapArgumentsForSpotFleet:
    Description: Arguments to pass to the bootstrap script. See files/bootstrap.sh in https://github.com/awslabs/amazon-eks-ami
    Default: "--kubelet-extra-args '--node-labels=lifecycle=Ec2Spot --register-with-taints=spotInstance=true:PreferNoSchedule'
    "
    Type: String

    ClusterControlPlaneSecurityGroup:
    Description: The security group of the cluster control plane.
    Type: AWS::EC2::SecurityGroup::Id

    NodeGroupName:
    Description: Unique identifier for the Node Group.
    Type: String
    Default: "spotworkers"

    Conditions:
    IsASGAutoAssignPublicIp: !Equals [ !Ref ASGAutoAssignPublicIp , "yes" ]
    AttachExistingNodeSG: !Equals [ !Ref UseExistingNodeSecurityGroups, "yes" ]
    CreateNewNodeSG: !Equals [ !Ref UseExistingNodeSecurityGroups, "no" ]


    Resources:
    NodeSecurityGroup:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroup
    Properties:
    GroupDescription: Security group for all nodes in the cluster
    VpcId:
    !Ref VpcId
    Tags:
    - Key: !Sub "kubernetes.io/cluster/${ClusterName}"
    Value: 'owned'
    - Key: Name
    Value: !Sub "${ClusterName}-cluster/NodeSecurityGroup"

    NodeSecurityGroupIngress:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroupIngress
    DependsOn: NodeSecurityGroup
    Properties:
    Description: Allow node to communicate with each other
    GroupId: !Ref NodeSecurityGroup
    SourceSecurityGroupId: !Ref NodeSecurityGroup
    IpProtocol: '-1'
    FromPort: 0
    ToPort: 65535

    NodeSecurityGroupFromControlPlaneIngress:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroupIngress
    DependsOn: NodeSecurityGroup
    Properties:
    Description: Allow worker Kubelets and pods to receive communication from the cluster control plane
    GroupId: !Ref NodeSecurityGroup
    SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup
    IpProtocol: tcp
    FromPort: 1025
    ToPort: 65535

    ControlPlaneEgressToNodeSecurityGroup:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroupEgress
    DependsOn: NodeSecurityGroup
    Properties:
    Description: Allow the cluster control plane to communicate with worker Kubelet and pods
    GroupId: !Ref ClusterControlPlaneSecurityGroup
    DestinationSecurityGroupId: !Ref NodeSecurityGroup
    IpProtocol: tcp
    FromPort: 1025
    ToPort: 65535

    NodeSecurityGroupFromControlPlaneOn443Ingress:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroupIngress
    DependsOn: NodeSecurityGroup
    Properties:
    Description: Allow pods running extension API servers on port 443 to receive communication from cluster control plane
    GroupId: !Ref NodeSecurityGroup
    SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup
    IpProtocol: tcp
    FromPort: 443
    ToPort: 443

    ControlPlaneEgressToNodeSecurityGroupOn443:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroupEgress
    DependsOn: NodeSecurityGroup
    Properties:
    Description: Allow the cluster control plane to communicate with pods running extension API servers on port 443
    GroupId: !Ref ClusterControlPlaneSecurityGroup
    DestinationSecurityGroupId: !Ref NodeSecurityGroup
    IpProtocol: tcp
    FromPort: 443
    ToPort: 443

    ClusterControlPlaneSecurityGroupIngress:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroupIngress
    DependsOn: NodeSecurityGroup
    Properties:
    Description: Allow pods to communicate with the cluster API Server
    GroupId: !Ref ClusterControlPlaneSecurityGroup
    SourceSecurityGroupId: !Ref NodeSecurityGroup
    IpProtocol: tcp
    ToPort: 443
    FromPort: 443

    NodeGroup:
    Type: AWS::AutoScaling::AutoScalingGroup
    Properties:
    DesiredCapacity: !Ref NodeAutoScalingGroupDesiredSize
    #LaunchConfigurationName: !Ref NodeLaunchConfig
    # LaunchTemplate:
    # LaunchTemplateId: !Ref MyLaunchTemplate
    # Version: !GetAtt MyLaunchTemplate.LatestVersionNumber
    MixedInstancesPolicy:
    InstancesDistribution:
    OnDemandAllocationStrategy: prioritized
    OnDemandBaseCapacity: !Ref OnDemandBaseCapacity
    OnDemandPercentageAboveBaseCapacity: !Ref OnDemandPercentageAboveBaseCapacity
    SpotAllocationStrategy: lowest-price
    SpotInstancePools: !Ref SpotInstancePools
    # SpotMaxPrice: String
    LaunchTemplate:
    LaunchTemplateSpecification:
    LaunchTemplateId: !Ref MyLaunchTemplate
    # LaunchTemplateName: String
    Version: !GetAtt MyLaunchTemplate.LatestVersionNumber
    Overrides:
    - InstanceType: !Select [0, !Split [ ",", !Ref InstanceTypesOverride ] ]
    - InstanceType: !Select [1, !Split [ ",", !Ref InstanceTypesOverride ] ]
    - InstanceType: !Select [2, !Split [ ",", !Ref InstanceTypesOverride ] ]

    MinSize: !Ref NodeAutoScalingGroupMinSize
    MaxSize: !Ref NodeAutoScalingGroupMaxSize
    VPCZoneIdentifier:
    !Ref Subnets
    Tags:
    - Key: Name
    Value: !Sub "${ClusterName}-${NodeGroupName}-ASG-Node"
    PropagateAtLaunch: 'true'
    - Key: !Sub 'kubernetes.io/cluster/${ClusterName}'
    Value: 'owned'
    PropagateAtLaunch: 'true'
    - Key: Namespace
    Value: swat
    PropagateAtLaunch: 'true'
    - Key: Stage
    Value: stage
    PropagateAtLaunch: 'true'
    - Key: !Sub 'k8s.io/cluster-autoscaler/${ClusterAutoscalerStatus}'
    Value: 'true'
    PropagateAtLaunch: 'true'
    UpdatePolicy:
    AutoScalingRollingUpdate:
    MinInstancesInService: !Ref NodeAutoScalingGroupDesiredSize
    MaxBatchSize: '1'
    PauseTime: 'PT5M'

    LCH:
    Type: AWS::AutoScaling::LifecycleHook
    Properties:
    AutoScalingGroupName: !Ref NodeGroup
    HeartbeatTimeout: 60
    DefaultResult: CONTINUE
    LifecycleHookName: !Sub "${NodeGroupName}-LCH"
    LifecycleTransition: autoscaling:EC2_INSTANCE_TERMINATING

    #
    # Launch Template
    #
    MyLaunchTemplate:
    Type: AWS::EC2::LaunchTemplate
    Properties:
    LaunchTemplateName: !Sub "eksLaunchTemplate-${AWS::StackName}"
    LaunchTemplateData:
    # SecurityGroupIds:
    # - !Ref NodeSecurityGroup
    TagSpecifications:
    -
    ResourceType: instance
    Tags:
    - Key: Name
    Value: !Sub "${ClusterName}-${NodeGroupName}-ASG-Node"
    - Key: KubernetesCluster
    Value: !Ref ClusterName
    - Key: !Sub 'kubernetes.io/cluster/${ClusterName}'
    Value: 'owned'
    - Key: Namespace
    Value: swat
    - Key: Stage
    Value: stage
    - Key: !Sub 'k8s.io/cluster-autoscaler/${ClusterAutoscalerStatus}'
    Value: 'true'
    UserData:
    Fn::Base64:
    !Sub |
    #!/bin/bash
    set -o xtrace
    iid=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
    export AWS_DEFAULT_REGION=${AWS::Region}
    ilc=`aws ec2 describe-instances --instance-ids $iid --query 'Reservations[0].Instances[0].InstanceLifecycle' --output text`
    if [ "$ilc" == "spot" ]; then
    /etc/eks/bootstrap.sh ${ClusterName} ${BootstrapArgumentsForSpotFleet}
    else
    /etc/eks/bootstrap.sh ${ClusterName} ${BootstrapArgumentsForOnDemand}
    fi
    # /etc/eks/bootstrap.sh ${ClusterName} $BootstrapArgumentsForOnDemand
    /opt/aws/bin/cfn-signal --exit-code $? \
    --stack ${AWS::StackName} \
    --resource NodeGroup \
    --region ${AWS::Region}
    IamInstanceProfile:
    Arn: !Ref NodeInstanceProfile
    KeyName: !Ref KeyName
    NetworkInterfaces:
    -
    DeviceIndex: 0
    AssociatePublicIpAddress:
    !If
    - IsASGAutoAssignPublicIp
    - 'true'
    - 'false'
    SubnetId: !Select [0, !Ref Subnets]
    Groups:
    !If
    - CreateNewNodeSG
    -
    - !Ref NodeSecurityGroup
    - !Split [ ",", !Ref ExistingNodeSecurityGroups ]
    ImageId: !Ref NodeImageId
    InstanceType: !Ref NodeInstanceType

    Outputs:

    NodeGroup:
    Description: The node instance group
    Value: !Ref NodeGroup
    429 changes: 429 additions & 0 deletions amazon-eks-nodegroup-with-mixed-instances.yml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,429 @@
    ---
    AWSTemplateFormatVersion: '2010-09-09'
    Description: 'Amazon EKS Node Group'
    Metadata:
    AWS::CloudFormation::Interface:
    ParameterGroups:
    -
    Label:
    default: "EKS Configuration"
    Parameters:
    - ClusterName
    - ClusterControlPlaneSecurityGroup
    - NodeInstanceProfile
    - UseExistingNodeSecurityGroups
    - ExistingNodeSecurityGroups
    - NodeImageId
    - VpcId
    - KeyName
    - NodeGroupName
    - Subnets
    - BootstrapArgumentsForOnDemand
    - BootstrapArgumentsForSpotFleet
    -
    Label:
    default: "Auto Scaling Configuration"
    Parameters:
    - NodeAutoScalingGroupMinSize
    - NodeAutoScalingGroupDesiredSize
    - NodeAutoScalingGroupMaxSize
    - NodeInstanceType
    - ASGAutoAssignPublicIp
    - OnDemandBaseCapacity
    - OnDemandPercentageAboveBaseCapacity
    - SpotInstancePools
    - InstanceTypesOverride

    Parameters:
    VpcId:
    Description: The VPC of the worker instances
    Type: AWS::EC2::VPC::Id

    Subnets:
    Description: Select 3 subnets where workers can be created.
    Type: List<AWS::EC2::Subnet::Id>

    NodeInstanceProfile:
    Type: String
    Description: Use the existing Instance Profile ARN for your nodegroup
    Default: ""

    KeyName:
    Description: The EC2 Key Pair to allow SSH access to the instances
    Type: AWS::EC2::KeyPair::KeyName
    Default: "eksworkshop"

    NodeImageId:
    Type: AWS::EC2::Image::Id
    Description: Find the latest AMI id here - https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html

    NodeInstanceType:
    Description: Default EC2 instance type for the node instances.
    Type: String
    Default: m4.large
    AllowedValues:
    - t2.small
    - t2.medium
    - t2.large
    - t2.xlarge
    - t2.2xlarge
    - t3.nano
    - t3.micro
    - t3.small
    - t3.medium
    - t3.large
    - t3.xlarge
    - t3.2xlarge
    - m3.medium
    - m3.large
    - m3.xlarge
    - m3.2xlarge
    - m4.large
    - m4.xlarge
    - m4.2xlarge
    - m4.4xlarge
    - m4.10xlarge
    - m5.large
    - m5.xlarge
    - m5.2xlarge
    - m5.4xlarge
    - m5.12xlarge
    - m5.24xlarge
    - c4.large
    - c4.xlarge
    - c4.2xlarge
    - c4.4xlarge
    - c4.8xlarge
    - c5.large
    - c5.xlarge
    - c5.2xlarge
    - c5.4xlarge
    - c5.9xlarge
    - c5.18xlarge
    - i3.large
    - i3.xlarge
    - i3.2xlarge
    - i3.4xlarge
    - i3.8xlarge
    - i3.16xlarge
    - r3.xlarge
    - r3.2xlarge
    - r3.4xlarge
    - r3.8xlarge
    - r4.large
    - r4.xlarge
    - r4.2xlarge
    - r4.4xlarge
    - r4.8xlarge
    - r4.16xlarge
    - x1.16xlarge
    - x1.32xlarge
    - p2.xlarge
    - p2.8xlarge
    - p2.16xlarge
    - p3.2xlarge
    - p3.8xlarge
    - p3.16xlarge
    - p3dn.24xlarge
    - r5.large
    - r5.xlarge
    - r5.2xlarge
    - r5.4xlarge
    - r5.12xlarge
    - r5.24xlarge
    - r5d.large
    - r5d.xlarge
    - r5d.2xlarge
    - r5d.4xlarge
    - r5d.12xlarge
    - r5d.24xlarge
    - z1d.large
    - z1d.xlarge
    - z1d.2xlarge
    - z1d.3xlarge
    - z1d.6xlarge
    - z1d.12xlarge
    ConstraintDescription: Must be a valid EC2 instance type

    NodeAutoScalingGroupMinSize:
    Type: Number
    Description: Minimum size of Node Group ASG.
    Default: 1

    NodeAutoScalingGroupDesiredSize:
    Type: Number
    Description: Desired size of Node Group ASG.
    Default: 3

    NodeAutoScalingGroupMaxSize:
    Type: Number
    Description: Maximum size of Node Group ASG.
    Default: 5

    ASGAutoAssignPublicIp:
    Type: String
    Description: "auto assign public IP address for ASG instances"
    AllowedValues:
    - "yes"
    - "no"
    Default: "yes"

    OnDemandBaseCapacity:
    Type: Number
    Description: "on-demand base capacity"
    Default: 1

    OnDemandPercentageAboveBaseCapacity:
    Type: Number
    Description: "on-demand percentage above base capacity(0-100)"
    Default: 0

    SpotInstancePools:
    Type: Number
    Description: "spot instance pools(1-20)"
    Default: 2

    InstanceTypesOverride:
    Type: String
    Description: "multiple spot instances to override(seperated by comma)"
    Default: "m4.large,c4.large,c5.large"

    UseExistingNodeSecurityGroups:
    Type: String
    Description: Please select 'yes' to attach existing SGs to nodegroup
    Default: "yes"
    AllowedValues:
    - "yes"
    - "no"
    ExistingNodeSecurityGroups:
    Type: String
    Description: Use the existing Security Group for your nodegroup
    Default: ""

    ClusterName:
    Description: The cluster name provided when the cluster was created. If it is incorrect, nodes will not be able to join the cluster.
    Type: String
    Default: "eksworkshop-eksctl"

    BootstrapArgumentsForOnDemand:
    Description: Arguments to pass to the bootstrap script. See files/bootstrap.sh in https://github.com/awslabs/amazon-eks-ami
    Default: "--kubelet-extra-args --node-labels=lifecycle=OnDemand"
    Type: String

    BootstrapArgumentsForSpotFleet:
    Description: Arguments to pass to the bootstrap script. See files/bootstrap.sh in https://github.com/awslabs/amazon-eks-ami
    Default: "--kubelet-extra-args '--node-labels=lifecycle=Ec2Spot --register-with-taints=spotInstance=true:PreferNoSchedule'
    "
    Type: String

    ClusterControlPlaneSecurityGroup:
    Description: The security group of the cluster control plane.
    Type: AWS::EC2::SecurityGroup::Id

    NodeGroupName:
    Description: Unique identifier for the Node Group.
    Type: String
    Default: "spotworkers"

    Conditions:
    IsASGAutoAssignPublicIp: !Equals [ !Ref ASGAutoAssignPublicIp , "yes" ]
    AttachExistingNodeSG: !Equals [ !Ref UseExistingNodeSecurityGroups, "yes" ]
    CreateNewNodeSG: !Equals [ !Ref UseExistingNodeSecurityGroups, "no" ]


    Resources:
    NodeSecurityGroup:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroup
    Properties:
    GroupDescription: Security group for all nodes in the cluster
    VpcId:
    !Ref VpcId
    Tags:
    - Key: !Sub "kubernetes.io/cluster/${ClusterName}"
    Value: 'owned'
    - Key: Name
    Value: !Sub "${ClusterName}-cluster/NodeSecurityGroup"

    NodeSecurityGroupIngress:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroupIngress
    DependsOn: NodeSecurityGroup
    Properties:
    Description: Allow node to communicate with each other
    GroupId: !Ref NodeSecurityGroup
    SourceSecurityGroupId: !Ref NodeSecurityGroup
    IpProtocol: '-1'
    FromPort: 0
    ToPort: 65535

    NodeSecurityGroupFromControlPlaneIngress:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroupIngress
    DependsOn: NodeSecurityGroup
    Properties:
    Description: Allow worker Kubelets and pods to receive communication from the cluster control plane
    GroupId: !Ref NodeSecurityGroup
    SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup
    IpProtocol: tcp
    FromPort: 1025
    ToPort: 65535

    ControlPlaneEgressToNodeSecurityGroup:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroupEgress
    DependsOn: NodeSecurityGroup
    Properties:
    Description: Allow the cluster control plane to communicate with worker Kubelet and pods
    GroupId: !Ref ClusterControlPlaneSecurityGroup
    DestinationSecurityGroupId: !Ref NodeSecurityGroup
    IpProtocol: tcp
    FromPort: 1025
    ToPort: 65535

    NodeSecurityGroupFromControlPlaneOn443Ingress:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroupIngress
    DependsOn: NodeSecurityGroup
    Properties:
    Description: Allow pods running extension API servers on port 443 to receive communication from cluster control plane
    GroupId: !Ref NodeSecurityGroup
    SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup
    IpProtocol: tcp
    FromPort: 443
    ToPort: 443

    ControlPlaneEgressToNodeSecurityGroupOn443:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroupEgress
    DependsOn: NodeSecurityGroup
    Properties:
    Description: Allow the cluster control plane to communicate with pods running extension API servers on port 443
    GroupId: !Ref ClusterControlPlaneSecurityGroup
    DestinationSecurityGroupId: !Ref NodeSecurityGroup
    IpProtocol: tcp
    FromPort: 443
    ToPort: 443

    ClusterControlPlaneSecurityGroupIngress:
    Condition: CreateNewNodeSG
    Type: AWS::EC2::SecurityGroupIngress
    DependsOn: NodeSecurityGroup
    Properties:
    Description: Allow pods to communicate with the cluster API Server
    GroupId: !Ref ClusterControlPlaneSecurityGroup
    SourceSecurityGroupId: !Ref NodeSecurityGroup
    IpProtocol: tcp
    ToPort: 443
    FromPort: 443

    NodeGroup:
    Type: AWS::AutoScaling::AutoScalingGroup
    Properties:
    DesiredCapacity: !Ref NodeAutoScalingGroupDesiredSize
    #LaunchConfigurationName: !Ref NodeLaunchConfig
    # LaunchTemplate:
    # LaunchTemplateId: !Ref MyLaunchTemplate
    # Version: !GetAtt MyLaunchTemplate.LatestVersionNumber
    MixedInstancesPolicy:
    InstancesDistribution:
    OnDemandAllocationStrategy: prioritized
    OnDemandBaseCapacity: !Ref OnDemandBaseCapacity
    OnDemandPercentageAboveBaseCapacity: !Ref OnDemandPercentageAboveBaseCapacity
    SpotAllocationStrategy: lowest-price
    SpotInstancePools: !Ref SpotInstancePools
    # SpotMaxPrice: String
    LaunchTemplate:
    LaunchTemplateSpecification:
    LaunchTemplateId: !Ref MyLaunchTemplate
    # LaunchTemplateName: String
    Version: !GetAtt MyLaunchTemplate.LatestVersionNumber
    Overrides:
    - InstanceType: !Select [0, !Split [ ",", !Ref InstanceTypesOverride ] ]
    - InstanceType: !Select [1, !Split [ ",", !Ref InstanceTypesOverride ] ]
    - InstanceType: !Select [2, !Split [ ",", !Ref InstanceTypesOverride ] ]

    MinSize: !Ref NodeAutoScalingGroupMinSize
    MaxSize: !Ref NodeAutoScalingGroupMaxSize
    VPCZoneIdentifier:
    !Ref Subnets
    Tags:
    - Key: Name
    Value: !Sub "${ClusterName}-${NodeGroupName}-ASG-Node"
    PropagateAtLaunch: 'true'
    - Key: !Sub 'kubernetes.io/cluster/${ClusterName}'
    Value: 'owned'
    PropagateAtLaunch: 'true'
    UpdatePolicy:
    AutoScalingRollingUpdate:
    MinInstancesInService: !Ref NodeAutoScalingGroupDesiredSize
    MaxBatchSize: '1'
    PauseTime: 'PT5M'

    LCH:
    Type: AWS::AutoScaling::LifecycleHook
    Properties:
    AutoScalingGroupName: !Ref NodeGroup
    HeartbeatTimeout: 60
    DefaultResult: CONTINUE
    LifecycleHookName: !Sub "${NodeGroupName}-LCH"
    LifecycleTransition: autoscaling:EC2_INSTANCE_TERMINATING

    #
    # Launch Template
    #
    MyLaunchTemplate:
    Type: AWS::EC2::LaunchTemplate
    Properties:
    LaunchTemplateName: !Sub "eksLaunchTemplate-${AWS::StackName}"
    LaunchTemplateData:
    # SecurityGroupIds:
    # - !Ref NodeSecurityGroup
    TagSpecifications:
    -
    ResourceType: instance
    Tags:
    - Key: Name
    Value: !Sub "${ClusterName}-${NodeGroupName}-ASG-Node"
    - Key: KubernetesCluster
    Value: !Ref ClusterName
    - Key: !Sub 'kubernetes.io/cluster/${ClusterName}'
    Value: 'owned'
    UserData:
    Fn::Base64:
    !Sub |
    #!/bin/bash
    set -o xtrace
    iid=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
    export AWS_DEFAULT_REGION=${AWS::Region}
    ilc=`aws ec2 describe-instances --instance-ids $iid --query 'Reservations[0].Instances[0].InstanceLifecycle' --output text`
    if [ "$ilc" == "spot" ]; then
    /etc/eks/bootstrap.sh ${ClusterName} ${BootstrapArgumentsForSpotFleet}
    else
    /etc/eks/bootstrap.sh ${ClusterName} ${BootstrapArgumentsForOnDemand}
    fi
    # /etc/eks/bootstrap.sh ${ClusterName} $BootstrapArgumentsForOnDemand
    /opt/aws/bin/cfn-signal --exit-code $? \
    --stack ${AWS::StackName} \
    --resource NodeGroup \
    --region ${AWS::Region}
    IamInstanceProfile:
    Arn: !Ref NodeInstanceProfile
    KeyName: !Ref KeyName
    NetworkInterfaces:
    -
    DeviceIndex: 0
    AssociatePublicIpAddress:
    !If
    - IsASGAutoAssignPublicIp
    - 'true'
    - 'false'
    SubnetId: !Select [0, !Ref Subnets]
    Groups:
    !If
    - CreateNewNodeSG
    -
    - !Ref NodeSecurityGroup
    - !Split [ ",", !Ref ExistingNodeSecurityGroups ]
    ImageId: !Ref NodeImageId
    InstanceType: !Ref NodeInstanceType
  10. swo0k revised this gist Aug 23, 2019. 2 changed files with 5 additions and 18 deletions.
    20 changes: 3 additions & 17 deletions cfn-tf-changes.diff
    Original file line number Diff line number Diff line change
    @@ -1,30 +1,16 @@
    diff --git a/compute-workers-cfn-1.tf b/compute-workers-cfn-2.tf
    index 4cc4ad2..296ced3 100644
    index 4cc4ad2..ed45e07 100644
    --- a/compute-workers-cfn-1.tf
    +++ b/compute-workers-cfn-2.tf
    @@ -1,4 +1,3 @@
    -# cluster name bohr
    resource "aws_cloudformation_stack" "workers" {
    name = "${module.cluster_label.id}-compute-workers"

    @@ -8,7 +7,7 @@ resource "aws_cloudformation_stack" "workers" {
    ClusterName = "${module.cluster_label.id}"
    ClusterControlPlaneSecurityGroup = "${aws_security_group.masters.id}"
    NodeImageId = "${data.aws_ami.worker.id}"
    - NodeInstanceProfile = "${aws_iam_instance_profile.workers.arn}"
    + NodeInstanceProfile = "${aws_iam_instance_profile.workers.arn}" # shared with edge
    UseExistingNodeSecurityGroups = "yes"
    ExistingNodeSecurityGroups = "${aws_security_group.workers.id}"
    VpcId = "${data.terraform_remote_state.shared.swat_staging_vpc_id}"
    @@ -17,6 +16,7 @@ resource "aws_cloudformation_stack" "workers" {
    @@ -17,6 +17,7 @@ resource "aws_cloudformation_stack" "workers" {
    NodeAutoScalingGroupMinSize = "1"
    NodeAutoScalingGroupDesiredSize = "3"
    NodeAutoScalingGroupMaxSize = "9"
    + ClusterAutoscalerStatus = "enabled"
    NodeInstanceType = "c5.2xlarge"
    ASGAutoAssignPublicIp = "no"
    OnDemandBaseCapacity = "1"
    @@ -28,7 +28,7 @@ resource "aws_cloudformation_stack" "workers" {
    @@ -28,7 +29,7 @@ resource "aws_cloudformation_stack" "workers" {
    BootstrapArgumentsForOnDemand = "--kubelet-extra-args '--node-labels=lifecycle=OnDemand,node-role.kubernetes.io/worker=true'"
    }

    3 changes: 2 additions & 1 deletion compute-workers-cfn-2.tf
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,4 @@
    # cluster name bohr
    resource "aws_cloudformation_stack" "workers" {
    name = "${module.cluster_label.id}-compute-workers"

    @@ -7,7 +8,7 @@ resource "aws_cloudformation_stack" "workers" {
    ClusterName = "${module.cluster_label.id}"
    ClusterControlPlaneSecurityGroup = "${aws_security_group.masters.id}"
    NodeImageId = "${data.aws_ami.worker.id}"
    NodeInstanceProfile = "${aws_iam_instance_profile.workers.arn}" # shared with edge
    NodeInstanceProfile = "${aws_iam_instance_profile.workers.arn}"
    UseExistingNodeSecurityGroups = "yes"
    ExistingNodeSecurityGroups = "${aws_security_group.workers.id}"
    VpcId = "${data.terraform_remote_state.shared.swat_staging_vpc_id}"
  11. swo0k revised this gist Aug 23, 2019. 5 changed files with 161 additions and 4 deletions.
    66 changes: 66 additions & 0 deletions cfn-template-changes.diff
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,66 @@
    diff --git a/amazon-eks-nodegroup-with-mixed-instances.yml b/amazon-eks-nodegroup-with-mixed-instances-custom.yml
    index a0a3c99..512c50f 100644
    --- a/amazon-eks-nodegroup-with-mixed-instances.yml
    +++ b/amazon-eks-nodegroup-with-mixed-instances-custom.yml
    @@ -27,6 +27,7 @@ Metadata:
    - NodeAutoScalingGroupMinSize
    - NodeAutoScalingGroupDesiredSize
    - NodeAutoScalingGroupMaxSize
    + - ClusterAutoscalerStatus
    - NodeInstanceType
    - ASGAutoAssignPublicIp
    - OnDemandBaseCapacity
    @@ -168,6 +169,14 @@ Parameters:
    - "no"
    Default: "yes"

    + ClusterAutoscalerStatus:
    + Type: String
    + Description: "cluster-autoscaler status"
    + AllowedValues:
    + - "enabled"
    + - "disabled"
    + Default: "enabled"
    +
    OnDemandBaseCapacity:
    Type: Number
    Description: "on-demand base capacity"
    @@ -354,6 +363,15 @@ Resources:
    - Key: !Sub 'kubernetes.io/cluster/${ClusterName}'
    Value: 'owned'
    PropagateAtLaunch: 'true'
    + - Key: Namespace
    + Value: swat
    + PropagateAtLaunch: 'true'
    + - Key: Stage
    + Value: stage
    + PropagateAtLaunch: 'true'
    + - Key: !Sub 'k8s.io/cluster-autoscaler/${ClusterAutoscalerStatus}'
    + Value: 'true'
    + PropagateAtLaunch: 'true'
    UpdatePolicy:
    AutoScalingRollingUpdate:
    MinInstancesInService: !Ref NodeAutoScalingGroupDesiredSize
    @@ -389,6 +407,12 @@ Resources:
    Value: !Ref ClusterName
    - Key: !Sub 'kubernetes.io/cluster/${ClusterName}'
    Value: 'owned'
    + - Key: Namespace
    + Value: swat
    + - Key: Stage
    + Value: stage
    + - Key: !Sub 'k8s.io/cluster-autoscaler/${ClusterAutoscalerStatus}'
    + Value: 'true'
    UserData:
    Fn::Base64:
    !Sub |
    @@ -427,3 +451,9 @@ Resources:
    - !Split [ ",", !Ref ExistingNodeSecurityGroups ]
    ImageId: !Ref NodeImageId
    InstanceType: !Ref NodeInstanceType
    +
    +Outputs:
    +
    + NodeGroup:
    + Description: The node instance group
    + Value: !Ref NodeGroup
    21 changes: 21 additions & 0 deletions cfn-template.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,21 @@
    # workers cfn stack templates
    locals {
    templates_bucket = "${module.cluster_label.id}-templates"

    mixed_instances_template = "amazon-eks-nodegroup-with-mixed-instances.yml"
    }

    resource "aws_s3_bucket" "templates" {
    acl = "private"
    region = "${var.aws_region}"
    bucket = "${local.templates_bucket}"
    tags = "${module.cluster_label.tags}"
    force_destroy = "true"
    }

    resource "aws_s3_bucket_object" "mixed_instances_template" {
    bucket = "${aws_s3_bucket.templates.id}"
    key = "${local.mixed_instances_template}"
    source = "templates/${local.mixed_instances_template}"
    etag = "${filemd5("templates/${local.mixed_instances_template}")}"
    }
    35 changes: 35 additions & 0 deletions cfn-tf-changes.diff
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,35 @@
    diff --git a/compute-workers-cfn-1.tf b/compute-workers-cfn-2.tf
    index 4cc4ad2..296ced3 100644
    --- a/compute-workers-cfn-1.tf
    +++ b/compute-workers-cfn-2.tf
    @@ -1,4 +1,3 @@
    -# cluster name bohr
    resource "aws_cloudformation_stack" "workers" {
    name = "${module.cluster_label.id}-compute-workers"

    @@ -8,7 +7,7 @@ resource "aws_cloudformation_stack" "workers" {
    ClusterName = "${module.cluster_label.id}"
    ClusterControlPlaneSecurityGroup = "${aws_security_group.masters.id}"
    NodeImageId = "${data.aws_ami.worker.id}"
    - NodeInstanceProfile = "${aws_iam_instance_profile.workers.arn}"
    + NodeInstanceProfile = "${aws_iam_instance_profile.workers.arn}" # shared with edge
    UseExistingNodeSecurityGroups = "yes"
    ExistingNodeSecurityGroups = "${aws_security_group.workers.id}"
    VpcId = "${data.terraform_remote_state.shared.swat_staging_vpc_id}"
    @@ -17,6 +16,7 @@ resource "aws_cloudformation_stack" "workers" {
    NodeAutoScalingGroupMinSize = "1"
    NodeAutoScalingGroupDesiredSize = "3"
    NodeAutoScalingGroupMaxSize = "9"
    + ClusterAutoscalerStatus = "enabled"
    NodeInstanceType = "c5.2xlarge"
    ASGAutoAssignPublicIp = "no"
    OnDemandBaseCapacity = "1"
    @@ -28,7 +28,7 @@ resource "aws_cloudformation_stack" "workers" {
    BootstrapArgumentsForOnDemand = "--kubelet-extra-args '--node-labels=lifecycle=OnDemand,node-role.kubernetes.io/worker=true'"
    }

    - template_url = "https://s3.amazonaws.com/eksworkshop.com/templates/master/amazon-eks-nodegroup-with-mixed-instances.yml"
    + template_url = "https://${aws_s3_bucket.templates.bucket_regional_domain_name}/${local.mixed_instances_template}"

    lifecycle {
    ignore_changes = [
    39 changes: 39 additions & 0 deletions compute-workers-cfn-1.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,39 @@
    # cluster name bohr
    resource "aws_cloudformation_stack" "workers" {
    name = "${module.cluster_label.id}-compute-workers"

    parameters = {
    NodeGroupName = "${module.cluster_label.id}-compute-workers" # Unique identifier for the Node Group.

    ClusterName = "${module.cluster_label.id}"
    ClusterControlPlaneSecurityGroup = "${aws_security_group.masters.id}"
    NodeImageId = "${data.aws_ami.worker.id}"
    NodeInstanceProfile = "${aws_iam_instance_profile.workers.arn}"
    UseExistingNodeSecurityGroups = "yes"
    ExistingNodeSecurityGroups = "${aws_security_group.workers.id}"
    VpcId = "${data.terraform_remote_state.shared.swat_staging_vpc_id}"
    KeyName = "${data.terraform_remote_state.staging.ec2_key_name}"
    Subnets = "${join(",",values(data.terraform_remote_state.staging.private_subnets_bohr))}"
    NodeAutoScalingGroupMinSize = "1"
    NodeAutoScalingGroupDesiredSize = "3"
    NodeAutoScalingGroupMaxSize = "9"
    NodeInstanceType = "c5.2xlarge"
    ASGAutoAssignPublicIp = "no"
    OnDemandBaseCapacity = "1"
    OnDemandPercentageAboveBaseCapacity = "0" # (0-100)
    SpotInstancePools = "3" # (1-20)
    InstanceTypesOverride = "c5.2xlarge,m5.2xlarge,c4.2xlarge" # multiple spot instances to override (seperated by comma)

    BootstrapArgumentsForSpotFleet = "--kubelet-extra-args '--node-labels=lifecycle=Ec2Spot,node-role.kubernetes.io/spot-worker=true --register-with-taints=spotInstance=true:PreferNoSchedule'"
    BootstrapArgumentsForOnDemand = "--kubelet-extra-args '--node-labels=lifecycle=OnDemand,node-role.kubernetes.io/worker=true'"
    }

    template_url = "https://s3.amazonaws.com/eksworkshop.com/templates/master/amazon-eks-nodegroup-with-mixed-instances.yml"

    lifecycle {
    ignore_changes = [
    "parameters.NodeAutoScalingGroupDesiredSize",
    "parameters.OnDemandBaseCapacity",
    ]
    }
    }
    4 changes: 0 additions & 4 deletions compute-workers-cfn.tf → compute-workers-cfn-2.tf
    Original file line number Diff line number Diff line change
    @@ -24,14 +24,10 @@ resource "aws_cloudformation_stack" "workers" {
    SpotInstancePools = "3" # (1-20)
    InstanceTypesOverride = "c5.2xlarge,m5.2xlarge,c4.2xlarge" # multiple spot instances to override (seperated by comma)

    # ref: https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh
    # evictionHard: memory.available<750Mi,nodefs.available<10%,nodefs.inodesFree<5%,imagefs.available<10%,imagefs.inodesFree<5%
    BootstrapArgumentsForSpotFleet = "--kubelet-extra-args '--node-labels=lifecycle=Ec2Spot,node-role.kubernetes.io/spot-worker=true --register-with-taints=spotInstance=true:PreferNoSchedule'"
    BootstrapArgumentsForOnDemand = "--kubelet-extra-args '--node-labels=lifecycle=OnDemand,node-role.kubernetes.io/worker=true'"
    }

    # template_url = "https://amazon-eks.s3-us-west-2.amazonaws.com/cloudformation/2019-02-11/amazon-eks-nodegroup.yaml"
    # template_url = "https://s3.amazonaws.com/eksworkshop.com/templates/master/amazon-eks-nodegroup-with-mixed-instances.yml"
    template_url = "https://${aws_s3_bucket.templates.bucket_regional_domain_name}/${local.mixed_instances_template}"

    lifecycle {
  12. so0k created this gist Aug 23, 2019.
    43 changes: 43 additions & 0 deletions compute-workers-cfn.tf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,43 @@
    resource "aws_cloudformation_stack" "workers" {
    name = "${module.cluster_label.id}-compute-workers"

    parameters = {
    NodeGroupName = "${module.cluster_label.id}-compute-workers" # Unique identifier for the Node Group.

    ClusterName = "${module.cluster_label.id}"
    ClusterControlPlaneSecurityGroup = "${aws_security_group.masters.id}"
    NodeImageId = "${data.aws_ami.worker.id}"
    NodeInstanceProfile = "${aws_iam_instance_profile.workers.arn}" # shared with edge
    UseExistingNodeSecurityGroups = "yes"
    ExistingNodeSecurityGroups = "${aws_security_group.workers.id}"
    VpcId = "${data.terraform_remote_state.shared.swat_staging_vpc_id}"
    KeyName = "${data.terraform_remote_state.staging.ec2_key_name}"
    Subnets = "${join(",",values(data.terraform_remote_state.staging.private_subnets_bohr))}"
    NodeAutoScalingGroupMinSize = "1"
    NodeAutoScalingGroupDesiredSize = "3"
    NodeAutoScalingGroupMaxSize = "9"
    ClusterAutoscalerStatus = "enabled"
    NodeInstanceType = "c5.2xlarge"
    ASGAutoAssignPublicIp = "no"
    OnDemandBaseCapacity = "1"
    OnDemandPercentageAboveBaseCapacity = "0" # (0-100)
    SpotInstancePools = "3" # (1-20)
    InstanceTypesOverride = "c5.2xlarge,m5.2xlarge,c4.2xlarge" # multiple spot instances to override (seperated by comma)

    # ref: https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh
    # evictionHard: memory.available<750Mi,nodefs.available<10%,nodefs.inodesFree<5%,imagefs.available<10%,imagefs.inodesFree<5%
    BootstrapArgumentsForSpotFleet = "--kubelet-extra-args '--node-labels=lifecycle=Ec2Spot,node-role.kubernetes.io/spot-worker=true --register-with-taints=spotInstance=true:PreferNoSchedule'"
    BootstrapArgumentsForOnDemand = "--kubelet-extra-args '--node-labels=lifecycle=OnDemand,node-role.kubernetes.io/worker=true'"
    }

    # template_url = "https://amazon-eks.s3-us-west-2.amazonaws.com/cloudformation/2019-02-11/amazon-eks-nodegroup.yaml"
    # template_url = "https://s3.amazonaws.com/eksworkshop.com/templates/master/amazon-eks-nodegroup-with-mixed-instances.yml"
    template_url = "https://${aws_s3_bucket.templates.bucket_regional_domain_name}/${local.mixed_instances_template}"

    lifecycle {
    ignore_changes = [
    "parameters.NodeAutoScalingGroupDesiredSize",
    "parameters.OnDemandBaseCapacity",
    ]
    }
    }