Last active
October 17, 2016 15:32
-
-
Save sahilsk/f8dfd23f23c60c13ece8a21095f3cb0a to your computer and use it in GitHub Desktop.
TaskRunner upstart script. aws, data pipeline, taskrunner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
author "Sonu Kr. Meena" | |
description "upstart script for AWS Data Pipeline Task Runner" | |
start on runlevel [2345] | |
stop on runlevel [!2345] | |
# respawn the job up to 5 times within a 10 second period. | |
# If the job exceeds these values, it will be stopped and | |
# marked as failed. | |
respawn | |
respawn limit 5 10 | |
# move to this service's working directory | |
chdir /home/ubuntu/taskrunner | |
script | |
#TaskRunner configuration | |
S3_BUCKET_URL="s3://practo-iaac/feedback-test-bucket-sonu" | |
LOGS_HOME="/var/log/taskrunner" | |
LOG_DIR="$LOGS_HOME/output" | |
WORKER_GROUP="stage-feedback-cron-wgrp" | |
RUNNER_ID="sk-runner-01" | |
AWS_REGION="us-east-1" | |
# prepare the java command | |
JAVA_OPTS="-Xmx128m -Xms128m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$LOGS_HOME" | |
DW_ARGS="" | |
#if override config file exists | |
if [ -f /etc/taskrunner/server.cfg ]; then | |
for line in `cat /etc/taskrunner/server.cfg`;do | |
DW_ARGS="$DW_ARGS -Ddw.$line" | |
done | |
fi | |
# Create log directory if not there already | |
if [ ! -d $LOG_DIR ]; then | |
mkdir -p $LOG_DIR; | |
fi | |
# construct the java command and execute it | |
JAVA_CMD="java $JAVA_OPTS $DW_ARGS -jar TaskRunner-1.0.jar \ | |
--config ./credentials.json \ | |
--workerGroup="$WORKER_GROUP" \ | |
--region=$AWS_REGION \ | |
--output=$LOG_DIR \ | |
--logUri=$S3_BUCKET_URL \ | |
--taskrunnerId=$RUNNER_ID" | |
logger -is -t "$UPSTART_JOB" "[`date -u +%Y-%m-%dT%T.%3NZ`] executing: $JAVA_CMD" | |
exec $JAVA_CMD >> /tmp/taskrunner-upstart.log 2>&1 | |
end script | |
pre-stop script | |
logger -is -t "$UPSTART_JOB" "[`date -u +%Y-%m-%dT%T.%3NZ`] (sys) Stopping" | |
end script |
IAM User
- managed policy:
AWSDataPipelineRole
{
"Version": "2012-10-17",
"Statement": [{
"Effect": "Allow",
"Action": [
"cloudwatch:*",
"datapipeline:DescribeObjects",
"datapipeline:EvaluateExpression",
"dynamodb:BatchGetItem",
"dynamodb:DescribeTable",
"dynamodb:GetItem",
"dynamodb:Query",
"dynamodb:Scan",
"dynamodb:UpdateTable",
"ec2:AuthorizeSecurityGroupIngress",
"ec2:CancelSpotInstanceRequests",
"ec2:CreateSecurityGroup",
"ec2:CreateTags",
"ec2:DeleteTags",
"ec2:Describe*",
"ec2:ModifyImageAttribute",
"ec2:ModifyInstanceAttribute",
"ec2:RequestSpotInstances",
"ec2:RunInstances",
"ec2:StartInstances",
"ec2:StopInstances",
"ec2:TerminateInstances",
"ec2:AuthorizeSecurityGroupEgress",
"ec2:DeleteSecurityGroup",
"ec2:RevokeSecurityGroupEgress",
"ec2:DescribeNetworkInterfaces",
"ec2:CreateNetworkInterface",
"ec2:DeleteNetworkInterface",
"ec2:DetachNetworkInterface",
"elasticmapreduce:*",
"iam:GetInstanceProfile",
"iam:GetRole",
"iam:GetRolePolicy",
"iam:ListAttachedRolePolicies",
"iam:ListRolePolicies",
"iam:ListInstanceProfiles",
"iam:PassRole",
"rds:DescribeDBInstances",
"rds:DescribeDBSecurityGroups",
"redshift:DescribeClusters",
"redshift:DescribeClusterSecurityGroups",
"s3:CreateBucket",
"s3:DeleteObject",
"s3:Get*",
"s3:List*",
"s3:Put*",
"sdb:BatchPutAttributes",
"sdb:Select*",
"sns:GetTopicAttributes",
"sns:ListTopics",
"sns:Publish",
"sns:Subscribe",
"sns:Unsubscribe",
"sqs:CreateQueue",
"sqs:Delete*",
"sqs:GetQueue*",
"sqs:PurgeQueue",
"sqs:ReceiveMessage"
],
"Resource": ["*"]
}]
}
- Custom policy
s3 permission
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Stmt1464158960000",
"Effect": "Allow",
"Action": [
"s3:GetBucketLocation",
"s3:GetObject",
"s3:ListBucket",
"s3:ListBucketMultipartUploads",
"s3:ListBucketVersions",
"s3:ListMultipartUploadParts"
],
"Resource": [
"*"
]
},
{
"Sid": "Stmt1464159014000",
"Effect": "Allow",
"Action": [
"s3:*"
],
"Resource": [
"arn:aws:s3:::/feedback-test-bucket-sonu/",
"arn:aws:s3:::/feedback-test-bucket-sonu*"
]
}
]
}
DataPipeline Permissions
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Stmt1464589113000",
"Effect": "Allow",
"Action": [
"datapipeline:GetPipelineDefinition",
"datapipeline:PollForTask",
"datapipeline:QueryObjects",
"datapipeline:ReportTaskProgress",
"datapipeline:ReportTaskRunnerHeartbeat",
"datapipeline:SetStatus",
"datapipeline:SetTaskStatus"
],
"Resource": [
"*"
]
}
]
}
credentials.json
{
"access-id":"<aws-key>",
"private-key":"< aws secret>",
"endpoint":"https://datapipeline.us-east-1.amazonaws.com",
"region":"us-east-1",
"log-uri":"s3://datapipeline-log"
}
Data pipeline not available in singapore yet. But it's ok. US-east will work.
Installation instruction
Install task runner
wget https://s3.amazonaws.com/datapipeline-us-east-1/us-east-1/software/latest/TaskRunner/TaskRunner-1.0.jar
Do check for latest jar from this url: http://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-how-task-runner-user-managed.html
Install jre.
sudo apt-get install default-jre
Confirm:
java -version
java should be >1.6
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Install instructions