Skip to content

Instantly share code, notes, and snippets.

@memorysaver
Created September 21, 2020 10:28
Show Gist options
  • Select an option

  • Save memorysaver/92844b72a02e4f882d7da07f1dc2adb0 to your computer and use it in GitHub Desktop.

Select an option

Save memorysaver/92844b72a02e4f882d7da07f1dc2adb0 to your computer and use it in GitHub Desktop.
AWS Batch with EFS
import os
import os.path
import base64
from aws_cdk import (
core,
aws_ec2 as ec2,
aws_ecs as ecs,
aws_iam as iam,
aws_batch as batch,
)
from aws_cdk.aws_events import Rule, Schedule
from aws_cdk.aws_events_targets import BatchJob
class BatchStack(core.Stack):
def __init__(self, scope: core.Construct, id: str, shared: core.Stack, **kwargs) -> None:
super().__init__(scope, id, **kwargs)
# configure shared resources
vpc = shared.vpc
# AWS Batch mount EFS config
# https://aws.amazon.com/premiumsupport/knowledge-center/batch-mount-efs/
user_data = """
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="
--==MYBOUNDARY==
Content-Type: text/cloud-config; charset="us-ascii"
packages:
- amazon-efs-utils
runcmd:
- file_system_id_01={}
- efs_directory=/mnt/efs
- mkdir -p ${{efs_directory}}
- echo "${{file_system_id_01}}:/ ${{efs_directory}} efs tls,_netdev" >> /etc/fstab
- mount -a -t efs defaults
--==MYBOUNDARY==--
""".format(shared.file_system.file_system_id)
print(f'current file_id: {shared.file_system.file_system_id}')
print(shared.file_system.file_system_id)
launch_template = ec2.CfnLaunchTemplate(self, "CAGRBatchJobLaunchTemplate",
launch_template_name="extra-storage-template",
launch_template_data={
"blockDeviceMappings": [{
"deviceName": "/dev/xvda",
"ebs": {
"volumeSize": 40,
"volumeType": "gp2"
}
}
],
"userData": base64.b64encode(user_data.encode('ascii')).decode('ascii'),
}
)
environment = batch.ComputeEnvironment(self, "CAGR-batch-Env",
compute_resources={
"launch_template": {
"launch_template_name": launch_template.launch_template_name,
},
"type": batch.ComputeResourceType.SPOT,
# "type": batch.ComputeResourceType.ON_DEMAND,
"bid_percentage": 75,
"vpc": vpc,
}
)
job_queue = batch.JobQueue(self, "CAGRJobQueue",
compute_environments=[{
# Defines a collection of compute resources to handle assigned batch jobs
"computeEnvironment": environment,
# Order determines the allocation order for jobs (i.e. Lower means higher preferance for job assignment)
"order": 1
}
]
)
image = ecs.ContainerImage.from_asset("./aws_stack/batch_job/", repository_name='cagr/batch_job')
job_definition = batch.JobDefinition(self, "batch-job-finlab-update",
container={
"image": image,
"memory_limit_mib": 8192,
"volumes": [
{ "host": {"sourcePath": "/mnt/efs"}, "name": "efs"}
],
"mount_points": [
{"containerPath": "/mnt/efs", "sourceVolume": "efs", "readOnly": False}
]
}
)
# https://aws.amazon.com/blogs/database/scheduling-and-running-amazon-rds-jobs-with-aws-batch-and-amazon-cloudwatch-rules/
batch_target = BatchJob(job_queue, job_definition, job_name='daily-finlab-crawling-job')
Rule(self, "daily-crawling-event",
schedule=Schedule.cron(minute="0", hour="12"),
targets=[batch_target]
)
core.CfnOutput(
self, "CAGR.RUN Job Queue Name",
value=job_queue.job_queue_name
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment