Skip to content

Instantly share code, notes, and snippets.

@jhannah
Created September 5, 2025 22:20
Show Gist options
  • Save jhannah/0c8dc539f732c7b79fb5716fc7b14bdb to your computer and use it in GitHub Desktop.
Save jhannah/0c8dc539f732c7b79fb5716fc7b14bdb to your computer and use it in GitHub Desktop.
Terraform: AWS Glue: A Python script + dependencies from a requirements.txt file

Claude Sonnet 4:

In Terraform how do I create a Glue Job from Python script foo.py, and also specify additional-python-modules from a requirements.txt file I provide?

# S3 bucket for Glue scripts
resource "aws_s3_bucket" "glue_scripts" {
  bucket = "my-glue-scripts-bucket"
}

# Upload Python script to S3
resource "aws_s3_object" "glue_script" {
  bucket = aws_s3_bucket.glue_scripts.bucket
  key    = "scripts/foo.py"
  source = "${path.module}/scripts/foo.py"
  etag   = filemd5("${path.module}/scripts/foo.py")
}

# IAM role for Glue
resource "aws_iam_role" "glue_role" {
  name = "glue-job-role"

  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Effect = "Allow"
        Principal = {
          Service = "glue.amazonaws.com"
        }
      }
    ]
  })
}

resource "aws_iam_role_policy_attachment" "glue_service_role" {
  role       = aws_iam_role.glue_role.name
  policy_arn = "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole"
}

# Read and format requirements.txt
locals {
  requirements_content = file("${path.module}/requirements.txt")
  python_modules = join(",", [
    for line in split("\n", local.requirements_content) :
    trimspace(line) if trimspace(line) != "" && !startswith(trimspace(line), "#")
  ])
}

# Glue Job
resource "aws_glue_job" "etl_job" {
  name         = "my-etl-job"
  role_arn     = aws_iam_role.glue_role.arn
  glue_version = "4.0"

  command {
    script_location = "s3://${aws_s3_bucket.glue_scripts.bucket}/scripts/foo.py"
    python_version  = "3"
  }

  default_arguments = {
    "--job-bookmark-option"           = "job-bookmark-enable"
    "--enable-metrics"               = "true"
    "--enable-continuous-cloudwatch-log" = "true"
    "--additional-python-modules"    = local.python_modules
    "--TempDir"                      = "s3://${aws_s3_bucket.glue_scripts.bucket}/temp/"
  }

  execution_property {
    max_concurrent_runs = 1
  }

  max_retries       = 1
  timeout           = 120
  worker_type       = "G.1X"
  number_of_workers = 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment