Last active
April 13, 2022 16:11
-
-
Save edonosotti/1fab01f479ae4f76d145972294d9a9b1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ================================================== | |
# Create a BigQuery Data Transfer job with Terraform | |
# ================================================== | |
# This code was written for this article: | |
# https://medium.com/@edonosotti/importing-big-data-in-bigquery-with-data-transfer-service-and-terraform-dc55fdfdc821 | |
# | |
# CSV file template: <<EOF | |
# customer_id,card_id,amount | |
# 1,0001,10.00 | |
# 2,0002,2.00 | |
# 3,0003,8.00 | |
# EOF | |
# -------------------------------------------------- | |
# VARIABLES | |
# Set these before applying the configuration | |
# Do not store sensitive values in a configuration | |
# file, pass them at runtime via environment | |
# variables or with `-var` | |
# -------------------------------------------------- | |
variable project_id { | |
type = string | |
description = "Google Cloud Platform Project ID" | |
} | |
variable region { | |
type = string | |
description = "Google Cloud Platform region" | |
} | |
variable aws_s3_bucket_name { | |
type = string | |
description = "Name of the AWS S3 bucket" | |
} | |
variable aws_access_key_id { | |
type = string | |
description = "AWS Access Key ID" | |
} | |
variable aws_secret_access_key { | |
type = string | |
description = "AWS Secret Access Key" | |
} | |
# -------------------------------------------------- | |
# TERRAFORM CONFIGURATION | |
# Setting the provider and the project | |
# -------------------------------------------------- | |
provider "google" { | |
project = var.project_id | |
region = var.region | |
zone = var.zone | |
user_project_override = true | |
} | |
# -------------------------------------------------- | |
# RESOURCES | |
# Note the comments below | |
# -------------------------------------------------- | |
# Enable the required services | |
resource "google_project_service" "services" { | |
for_each = toset([ | |
"bigquery.googleapis.com", | |
"bigquerydatatransfer.googleapis.com", | |
"bigquerystorage.googleapis.com" | |
]) | |
project = var.project_id | |
service = each.key | |
disable_dependent_services = false | |
disable_on_destroy = false | |
} | |
# Create the BigQuery dataset | |
resource "google_bigquery_dataset" "import_dataset" { | |
dataset_id = "import_data" | |
location = var.region | |
depends_on = [google_project_service.services] | |
} | |
# Create the destination table | |
resource "google_bigquery_table" "import_transactions" { | |
dataset_id = google_bigquery_dataset.import_dataset.dataset_id | |
table_id = "import_transactions" | |
# Enable table partitioning per-day | |
time_partitioning { | |
type = "DAY" | |
} | |
schema = jsonencode([ | |
{ | |
"name" = "customer_id", | |
"type" = "INT64", | |
"mode" = "STRING", | |
"description" = "Customer ID" | |
}, | |
{ | |
"name" = "card_id", | |
"type" = "STRING", | |
"mode" = "REQUIRED", | |
"description" = "Card ID" | |
}, | |
{ | |
"name" = "amount", | |
"type" = "FLOAT64", | |
"mode" = "REQUIRED", | |
"description" = "Paid amount" | |
} | |
]) | |
depends_on = [google_bigquery_dataset.import_dataset] | |
} | |
# Create the scheduled data transfer | |
resource "google_bigquery_data_transfer_config" "import_connections" { | |
project = var.project_id | |
display_name = "import-transactions" | |
location = var.region | |
data_source_id = "amazon_s3" | |
schedule = "every day 00:30" | |
destination_dataset_id = google_bigquery_dataset.import_dataset.dataset_id | |
params = { | |
destination_table_name_template = google_bigquery_table.import_transactions.table_id | |
# Note the complex path with wildcards and variables | |
data_path = "s3://${var.aws_s3_bucket_name}/Transactions/{run_time-24h|\"%Y-%m-%d\"} */CreditCard-*.csv" | |
access_key_id = var.aws_access_key_id | |
file_format = "CSV" | |
max_bad_records = 0 | |
ignore_unknown_values = false | |
field_delimiter = "," | |
skip_leading_rows = 0 | |
allow_quoted_newlines = false | |
allow_jagged_rows = false | |
} | |
sensitive_params { | |
secret_access_key = var.aws_secret_access_key | |
} | |
email_preferences { | |
enable_failure_email = true | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment