Skip to content

Instantly share code, notes, and snippets.

@sahilsk
Last active October 17, 2016 15:32
Show Gist options
  • Save sahilsk/f8dfd23f23c60c13ece8a21095f3cb0a to your computer and use it in GitHub Desktop.
Save sahilsk/f8dfd23f23c60c13ece8a21095f3cb0a to your computer and use it in GitHub Desktop.
TaskRunner upstart script. aws, data pipeline, taskrunner
author "Sonu Kr. Meena"
description "upstart script for AWS Data Pipeline Task Runner"
start on runlevel [2345]
stop on runlevel [!2345]
# respawn the job up to 5 times within a 10 second period.
# If the job exceeds these values, it will be stopped and
# marked as failed.
respawn
respawn limit 5 10
# move to this service's working directory
chdir /home/ubuntu/taskrunner
script
#TaskRunner configuration
S3_BUCKET_URL="s3://practo-iaac/feedback-test-bucket-sonu"
LOGS_HOME="/var/log/taskrunner"
LOG_DIR="$LOGS_HOME/output"
WORKER_GROUP="stage-feedback-cron-wgrp"
RUNNER_ID="sk-runner-01"
AWS_REGION="us-east-1"
# prepare the java command
JAVA_OPTS="-Xmx128m -Xms128m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$LOGS_HOME"
DW_ARGS=""
#if override config file exists
if [ -f /etc/taskrunner/server.cfg ]; then
for line in `cat /etc/taskrunner/server.cfg`;do
DW_ARGS="$DW_ARGS -Ddw.$line"
done
fi
# Create log directory if not there already
if [ ! -d $LOG_DIR ]; then
mkdir -p $LOG_DIR;
fi
# construct the java command and execute it
JAVA_CMD="java $JAVA_OPTS $DW_ARGS -jar TaskRunner-1.0.jar \
--config ./credentials.json \
--workerGroup="$WORKER_GROUP" \
--region=$AWS_REGION \
--output=$LOG_DIR \
--logUri=$S3_BUCKET_URL \
--taskrunnerId=$RUNNER_ID"
logger -is -t "$UPSTART_JOB" "[`date -u +%Y-%m-%dT%T.%3NZ`] executing: $JAVA_CMD"
exec $JAVA_CMD >> /tmp/taskrunner-upstart.log 2>&1
end script
pre-stop script
logger -is -t "$UPSTART_JOB" "[`date -u +%Y-%m-%dT%T.%3NZ`] (sys) Stopping"
end script
@sahilsk
Copy link
Author

sahilsk commented Jun 13, 2016

@sahilsk
Copy link
Author

sahilsk commented Jun 13, 2016

IAM User

  • managed policy: AWSDataPipelineRole
{
  "Version": "2012-10-17",
  "Statement": [{
      "Effect": "Allow",
      "Action": [
        "cloudwatch:*",
        "datapipeline:DescribeObjects",
        "datapipeline:EvaluateExpression",
        "dynamodb:BatchGetItem",
        "dynamodb:DescribeTable",
        "dynamodb:GetItem",
        "dynamodb:Query",
        "dynamodb:Scan",
        "dynamodb:UpdateTable",
        "ec2:AuthorizeSecurityGroupIngress",
        "ec2:CancelSpotInstanceRequests",
        "ec2:CreateSecurityGroup",
        "ec2:CreateTags",
        "ec2:DeleteTags",
        "ec2:Describe*",
        "ec2:ModifyImageAttribute",
        "ec2:ModifyInstanceAttribute",
        "ec2:RequestSpotInstances",
        "ec2:RunInstances",
        "ec2:StartInstances",
        "ec2:StopInstances",
        "ec2:TerminateInstances",
        "ec2:AuthorizeSecurityGroupEgress", 
        "ec2:DeleteSecurityGroup", 
        "ec2:RevokeSecurityGroupEgress", 
        "ec2:DescribeNetworkInterfaces", 
        "ec2:CreateNetworkInterface", 
        "ec2:DeleteNetworkInterface", 
        "ec2:DetachNetworkInterface",
        "elasticmapreduce:*",
        "iam:GetInstanceProfile",
        "iam:GetRole",
        "iam:GetRolePolicy",
        "iam:ListAttachedRolePolicies",
        "iam:ListRolePolicies",
        "iam:ListInstanceProfiles",
        "iam:PassRole",
        "rds:DescribeDBInstances",
        "rds:DescribeDBSecurityGroups",
        "redshift:DescribeClusters",
        "redshift:DescribeClusterSecurityGroups",
        "s3:CreateBucket",
        "s3:DeleteObject",
        "s3:Get*",
        "s3:List*",
        "s3:Put*",
        "sdb:BatchPutAttributes",
        "sdb:Select*",
        "sns:GetTopicAttributes",
        "sns:ListTopics",
        "sns:Publish",
        "sns:Subscribe",
        "sns:Unsubscribe",
        "sqs:CreateQueue", 
        "sqs:Delete*", 
        "sqs:GetQueue*", 
        "sqs:PurgeQueue", 
        "sqs:ReceiveMessage" 
      ],
      "Resource": ["*"]
    }]
}

  • Custom policy

s3 permission

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "Stmt1464158960000",
            "Effect": "Allow",
            "Action": [
                "s3:GetBucketLocation",
                "s3:GetObject",
                "s3:ListBucket",
                "s3:ListBucketMultipartUploads",
                "s3:ListBucketVersions",
                "s3:ListMultipartUploadParts"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Sid": "Stmt1464159014000",
            "Effect": "Allow",
            "Action": [
                "s3:*"
            ],
            "Resource": [
                "arn:aws:s3:::/feedback-test-bucket-sonu/",
               "arn:aws:s3:::/feedback-test-bucket-sonu*"
            ]
        }
    ]
}

DataPipeline Permissions

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "Stmt1464589113000",
            "Effect": "Allow",
            "Action": [
                "datapipeline:GetPipelineDefinition",
                "datapipeline:PollForTask",
                "datapipeline:QueryObjects",
                "datapipeline:ReportTaskProgress",
                "datapipeline:ReportTaskRunnerHeartbeat",
                "datapipeline:SetStatus",
                "datapipeline:SetTaskStatus"
            ],
            "Resource": [
                "*"
            ]
        }
    ]
}

@sahilsk
Copy link
Author

sahilsk commented Jun 13, 2016

credentials.json

{
  "access-id":"<aws-key>",
  "private-key":"< aws secret>",
  "endpoint":"https://datapipeline.us-east-1.amazonaws.com",
  "region":"us-east-1",
  "log-uri":"s3://datapipeline-log"
}

Data pipeline not available in singapore yet. But it's ok. US-east will work.

@sahilsk
Copy link
Author

sahilsk commented Jun 21, 2016

Installation instruction

Install task runner

   wget https://s3.amazonaws.com/datapipeline-us-east-1/us-east-1/software/latest/TaskRunner/TaskRunner-1.0.jar

Do check for latest jar from this url: http://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-how-task-runner-user-managed.html

Install jre.

    sudo apt-get install default-jre

Confirm:
java -version

java should be >1.6

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment