Last active
December 11, 2020 08:48
-
-
Save cagdas1/ddbef58c2d3a3fa92be4ddd15721fcd0 to your computer and use it in GitHub Desktop.
Mongo ETL w/Glue
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import * as cdk from "@aws-cdk/core"; | |
import * as glue from "@aws-cdk/aws-glue"; | |
import * as s3 from "@aws-cdk/aws-s3"; | |
import * as s3Deployment from "@aws-cdk/aws-s3-deployment"; | |
import * as iam from "@aws-cdk/aws-iam"; | |
import { replaceValues } from "./lib"; | |
import { config } from "dotenv"; | |
config(); | |
const PYTHON_VERSION = "3"; | |
const GLUE_VERSION = "1.0"; | |
//This value must be glueetl for Apache Spark | |
const COMMAND_NAME = "glueetl"; | |
const JDBC_PATH = "dependencies/cdata.jdbc.mongodb.jar"; | |
const { RTK, MONGO_SERVER, MONGO_USER, MONGO_PASSWORD, MONGO_PORT, MONGO_SSL, MONGO_DATABASE, COLLECTIONS, BUCKET_NAME }= process.env; | |
class MongoGlueETLStack extends cdk.Stack { | |
constructor(scope: cdk.Construct, id: string, props?: cdk.StackProps) { | |
super(scope, id, props); | |
const s3Bucket = new s3.Bucket(this, "etl-bucket", { | |
bucketName: BUCKET_NAME, | |
removalPolicy: cdk.RemovalPolicy.DESTROY | |
}); | |
const dependenciesDeployment = new s3Deployment.BucketDeployment(this, "dependencies-deployment", { | |
sources: [s3Deployment.Source.asset("../dependencies")], | |
destinationBucket: s3Bucket, | |
destinationKeyPrefix: "dependencies" | |
}); | |
// Replace hardcoded values in script | |
replaceValues( | |
"scripts/script.py", | |
RTK as string, | |
MONGO_SERVER as string, | |
MONGO_USER as string, | |
MONGO_PASSWORD as string, | |
MONGO_PORT as string, | |
MONGO_SSL == "true" ? "True" : "False", | |
MONGO_DATABASE as string, | |
`s3://${BUCKET_NAME}/${MONGO_DATABASE as string}/`, | |
COLLECTIONS as string | |
); | |
const scriptsDeployment = new s3Deployment.BucketDeployment(this, "scripts-deployment", { | |
sources: [s3Deployment.Source.asset("scripts")], | |
destinationBucket: s3Bucket, | |
destinationKeyPrefix: "scripts" | |
}); | |
const glueRole = new iam.Role(this, "glue-role", { | |
roleName: "glue-etl-role", | |
assumedBy: new iam.ServicePrincipal("glue.amazonaws.com"), | |
managedPolicies: [ | |
iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonS3FullAccess") | |
], | |
}); | |
const glueJob = new glue.CfnJob(this, "glue-job", { | |
name: "glue-job", | |
role: glueRole.roleArn, | |
command: { | |
name: COMMAND_NAME, | |
pythonVersion: PYTHON_VERSION, | |
scriptLocation: `s3://${s3Bucket.bucketName}/scripts/script.py` | |
}, | |
glueVersion: GLUE_VERSION, | |
defaultArguments: { | |
"--extra-jars": `s3://${s3Bucket.bucketName}/${JDBC_PATH}` | |
} | |
}); | |
const glueTrigger = new glue.CfnTrigger(this, "glue-trigger", { | |
name: "etl-trigger", | |
schedule: "cron(5 * * * ? *)", | |
type: "SCHEDULED", | |
actions: [ | |
{ | |
jobName: glueJob.name | |
} | |
], | |
startOnCreation: true | |
}); | |
glueTrigger.addDependsOn(glueJob); | |
} | |
} | |
const app = new cdk.App(); | |
new MongoGlueETLStack(app, "MongoGlueETLStack", { | |
env: { | |
region: process.env.AWS_REGION, | |
account: process.env.AWS_ACCOUNT_ID | |
} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment