Created
April 26, 2023 11:36
-
-
Save divyang4481/e240c4dbdd6ac61b8deeff9b9e8e5654 to your computer and use it in GitHub Desktop.
org is remove from tf script for https://github.com/GoogleCloudPlatform/dataplex-labs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Copyright 2022 Google LLC | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
/****************************************** | |
Local variables declaration | |
*****************************************/ | |
locals { | |
project_id = "${var.project_id}" | |
project_nbr = "${var.project_number}" | |
admin_upn_fqn = "${var.gcp_account_name}" | |
location = "${var.gcp_region}" | |
location_multi = "${var.gcp_multi_region}" | |
zone = "${var.gcp_zone}" | |
umsa = "lab-sa" | |
umsa_fqn = "${local.umsa}@${local.project_id}.iam.gserviceaccount.com" | |
lab_dpms_nm = "lab-dpms-${local.project_nbr}" | |
lab_spark_bucket = "lab-spark-bucket-${local.project_nbr}" | |
lab_spark_bucket_fqn = "gs://dew-lab-spark-${local.project_nbr}" | |
lab_vpc_nm = "lab-vpc-${local.project_nbr}" | |
lab_subnet_nm = "lab-snet" | |
lab_subnet_cidr = "10.0.0.0/16" | |
lab_sensitive_data_bucket_raw= "raw-data-sensitive-${local.project_nbr}" | |
lab_data_bucket_raw = "raw-data-${local.project_nbr}" | |
lab_code_bucket = "raw-code-${local.project_nbr}" | |
lab_notebook_bucket = "raw-notebook-${local.project_nbr}" | |
lab_model_bucket = "raw-model-${local.project_nbr}" | |
lab_bundle_bucket = "raw-model-mleap-bundle-${local.project_nbr}" | |
lab_metrics_bucket = "raw-model-metrics-${local.project_nbr}" | |
lab_scheduled_output_bucket = "scheduled-runs-output-${local.project_nbr}" | |
lab_data_bucket_curated = "curated-data-${local.project_nbr}" | |
lab_data_bucket_product = "product-data-${local.project_nbr}" | |
CC_GMSA_FQN = "service-${local.project_nbr}@cloudcomposer-accounts.iam.gserviceaccount.com" | |
GCE_GMSA_FQN = "${local.project_nbr}[email protected]" | |
CLOUD_COMPOSER2_IMG_VERSION = "${var.cloud_composer_image_version}" | |
bq_connector_jar_gcs_uri = "${var.bq_connector_jar_gcs_uri}" | |
} | |
/****************************************** | |
1. Enable Google APIs in parallel | |
*****************************************/ | |
module "activate_service_apis" { | |
source = "terraform-google-modules/project-factory/google//modules/project_services" | |
project_id = var.project_id | |
enable_apis = true | |
activate_apis = [ | |
"compute.googleapis.com", | |
"dataproc.googleapis.com", | |
"bigqueryconnection.googleapis.com", | |
"bigquerydatapolicy.googleapis.com", | |
"storage-component.googleapis.com", | |
"bigquerystorage.googleapis.com", | |
"datacatalog.googleapis.com", | |
"dataplex.googleapis.com", | |
"bigquery.googleapis.com" , | |
"cloudresourcemanager.googleapis.com", | |
"cloudidentity.googleapis.com", | |
"storage.googleapis.com", | |
"composer.googleapis.com", | |
"metastore.googleapis.com", | |
"dlp.googleapis.com", | |
"logging.googleapis.com", | |
"monitoring.googleapis.com", | |
"dataplex.googleapis.com", | |
"datacatalog.googleapis.com", | |
"cloudresourcemanager.googleapis.com", | |
"datapipelines.googleapis.com", | |
"cloudscheduler.googleapis.com", | |
"datalineage.googleapis.com" | |
] | |
disable_services_on_destroy = false | |
} | |
/******************************************* | |
Introducing sleep to minimize errors from | |
dependencies having not completed | |
********************************************/ | |
resource "time_sleep" "sleep_after_activate_service_apis" { | |
create_duration = "60s" | |
depends_on = [ | |
module.activate_service_apis | |
] | |
} | |
/****************************************** | |
2. Project-scoped Org Policy Updates | |
*****************************************/ | |
/******************************************* | |
Introducing sleep to minimize errors from | |
dependencies having not completed | |
********************************************/ | |
resource "time_sleep" "sleep_after_apis_and_org_policies" { | |
create_duration = "60s" | |
depends_on = [ | |
time_sleep.sleep_after_activate_service_apis | |
] | |
} | |
/****************************************** | |
3. Create User Managed Service Account | |
*****************************************/ | |
module "umsa_creation" { | |
source = "terraform-google-modules/service-accounts/google" | |
project_id = local.project_id | |
names = ["${local.umsa}"] | |
display_name = "User Managed Service Account" | |
description = "User Managed Service Account for Dataplex lab" | |
depends_on = [time_sleep.sleep_after_apis_and_org_policies] | |
} | |
/****************************************** | |
4a. Grant IAM roles to User Managed Service Account | |
*****************************************/ | |
module "umsa_role_grants" { | |
source = "terraform-google-modules/iam/google//modules/member_iam" | |
service_account_address = "${local.umsa_fqn}" | |
prefix = "serviceAccount" | |
project_id = local.project_id | |
project_roles = [ | |
"roles/iam.serviceAccountUser", | |
"roles/iam.serviceAccountTokenCreator", | |
"roles/storage.objectAdmin", | |
"roles/storage.admin", | |
"roles/metastore.admin", | |
"roles/metastore.editor", | |
"roles/metastore.user", | |
"roles/metastore.metadataEditor", | |
"roles/dataproc.worker", | |
"roles/dataproc.editor", | |
"roles/bigquery.dataEditor", | |
"roles/bigquery.admin", | |
"roles/viewer", | |
"roles/composer.worker", | |
"roles/composer.admin", | |
"roles/serviceusage.serviceUsageConsumer" | |
] | |
depends_on = [ | |
module.umsa_creation | |
] | |
} | |
# IAM role grants to Google Managed Service Account for Cloud Composer 2 | |
module "gmsa_role_grants_cc" { | |
source = "terraform-google-modules/iam/google//modules/member_iam" | |
service_account_address = "${local.CC_GMSA_FQN}" | |
prefix = "serviceAccount" | |
project_id = local.project_id | |
project_roles = [ | |
"roles/composer.ServiceAgentV2Ext", | |
] | |
depends_on = [ | |
module.umsa_role_grants | |
] | |
} | |
# IAM role grants to Google Managed Service Account for Compute Engine (for Cloud Composer 2 to download images) | |
module "gmsa_role_grants_gce" { | |
source = "terraform-google-modules/iam/google//modules/member_iam" | |
service_account_address = "${local.GCE_GMSA_FQN}" | |
prefix = "serviceAccount" | |
project_id = local.project_id | |
project_roles = [ | |
"roles/editor", | |
] | |
depends_on = [ | |
module.umsa_role_grants | |
] | |
} | |
/****************************************************** | |
5. Grant Service Account Impersonation privilege to yourself/Admin User | |
******************************************************/ | |
module "umsa_impersonate_privs_to_admin" { | |
source = "terraform-google-modules/iam/google//modules/service_accounts_iam/" | |
service_accounts = ["${local.umsa_fqn}"] | |
project = local.project_id | |
mode = "additive" | |
bindings = { | |
"roles/iam.serviceAccountUser" = [ | |
"user:${local.admin_upn_fqn}" | |
], | |
"roles/iam.serviceAccountTokenCreator" = [ | |
"user:${local.admin_upn_fqn}" | |
] | |
} | |
depends_on = [ | |
module.umsa_creation | |
] | |
} | |
/****************************************************** | |
6. Grant IAM roles to Admin User/yourself | |
******************************************************/ | |
module "administrator_role_grants" { | |
source = "terraform-google-modules/iam/google//modules/projects_iam" | |
projects = ["${local.project_id}"] | |
mode = "additive" | |
bindings = { | |
"roles/storage.admin" = [ | |
"user:${local.admin_upn_fqn}", | |
] | |
"roles/metastore.admin" = [ | |
"user:${local.admin_upn_fqn}", | |
] | |
"roles/dataproc.admin" = [ | |
"user:${local.admin_upn_fqn}", | |
] | |
"roles/bigquery.admin" = [ | |
"user:${local.admin_upn_fqn}", | |
] | |
"roles/bigquery.user" = [ | |
"user:${local.admin_upn_fqn}", | |
] | |
"roles/bigquery.dataEditor" = [ | |
"user:${local.admin_upn_fqn}", | |
] | |
"roles/bigquery.jobUser" = [ | |
"user:${local.admin_upn_fqn}", | |
] | |
"roles/composer.environmentAndStorageObjectViewer" = [ | |
"user:${local.admin_upn_fqn}", | |
] | |
"roles/iam.serviceAccountUser" = [ | |
"user:${local.admin_upn_fqn}", | |
] | |
"roles/iam.serviceAccountTokenCreator" = [ | |
"user:${local.admin_upn_fqn}", | |
] | |
"roles/composer.admin" = [ | |
"user:${local.admin_upn_fqn}", | |
] | |
"roles/compute.networkAdmin" = [ | |
"user:${local.admin_upn_fqn}", | |
] | |
} | |
depends_on = [ | |
module.umsa_role_grants, | |
module.umsa_impersonate_privs_to_admin | |
] | |
} | |
/******************************************* | |
Introducing sleep to minimize errors from | |
dependencies having not completed | |
********************************************/ | |
resource "time_sleep" "sleep_after_identities_permissions" { | |
create_duration = "120s" | |
depends_on = [ | |
module.umsa_creation, | |
module.umsa_role_grants, | |
module.umsa_impersonate_privs_to_admin, | |
module.administrator_role_grants, | |
module.gmsa_role_grants_cc, | |
module.gmsa_role_grants_gce | |
] | |
} | |
/************************************************************************ | |
7. Create VPC network & subnet | |
***********************************************************************/ | |
module "vpc_creation" { | |
source = "terraform-google-modules/network/google" | |
project_id = local.project_id | |
network_name = local.lab_vpc_nm | |
routing_mode = "REGIONAL" | |
subnets = [ | |
{ | |
subnet_name = "${local.lab_subnet_nm}" | |
subnet_ip = "${local.lab_subnet_cidr}" | |
subnet_region = "${local.location}" | |
subnet_range = local.lab_subnet_cidr | |
subnet_private_access = true | |
} | |
] | |
depends_on = [ | |
time_sleep.sleep_after_identities_permissions | |
] | |
} | |
/****************************************** | |
8. Create Firewall rules | |
*****************************************/ | |
resource "google_compute_firewall" "allow_intra_snet_ingress_to_any" { | |
project = local.project_id | |
name = "allow-intra-snet-ingress-to-any" | |
network = local.lab_vpc_nm | |
direction = "INGRESS" | |
source_ranges = [local.lab_subnet_cidr] | |
allow { | |
protocol = "all" | |
} | |
description = "Creates firewall rule to allow ingress from within subnet on all ports, all protocols" | |
depends_on = [ | |
module.vpc_creation | |
] | |
} | |
/******************************************* | |
Introducing sleep to minimize errors from | |
dependencies having not completed | |
********************************************/ | |
resource "time_sleep" "sleep_after_network_and_firewall_creation" { | |
create_duration = "120s" | |
depends_on = [ | |
module.vpc_creation, | |
google_compute_firewall.allow_intra_snet_ingress_to_any | |
] | |
} | |
/****************************************** | |
9. Create Storage bucket | |
*****************************************/ | |
resource "google_storage_bucket" "lab_spark_bucket_creation" { | |
project = local.project_id | |
name = local.lab_spark_bucket | |
location = local.location | |
uniform_bucket_level_access = true | |
force_destroy = true | |
depends_on = [ | |
time_sleep.sleep_after_identities_permissions | |
] | |
} | |
resource "google_storage_bucket" "lab_data_bucket_raw_creation" { | |
project = local.project_id | |
name = local.lab_data_bucket_raw | |
location = local.location_multi | |
uniform_bucket_level_access = true | |
force_destroy = true | |
depends_on = [ | |
time_sleep.sleep_after_identities_permissions | |
] | |
} | |
resource "google_storage_bucket" "lab_sensitive_data_bucket_raw_creation" { | |
project = local.project_id | |
name = local.lab_sensitive_data_bucket_raw | |
location = local.location_multi | |
uniform_bucket_level_access = true | |
force_destroy = true | |
depends_on = [ | |
time_sleep.sleep_after_identities_permissions | |
] | |
} | |
resource "google_storage_bucket" "lab_code_bucket_creation" { | |
project = local.project_id | |
name = local.lab_code_bucket | |
location = local.location | |
uniform_bucket_level_access = true | |
force_destroy = true | |
depends_on = [ | |
time_sleep.sleep_after_identities_permissions | |
] | |
} | |
resource "google_storage_bucket" "lab_notebook_bucket_creation" { | |
project = local.project_id | |
name = local.lab_notebook_bucket | |
location = local.location | |
uniform_bucket_level_access = true | |
force_destroy = true | |
depends_on = [ | |
time_sleep.sleep_after_identities_permissions | |
] | |
} | |
resource "google_storage_bucket" "lab_model_bucket_creation" { | |
project = local.project_id | |
name = local.lab_model_bucket | |
location = local.location | |
uniform_bucket_level_access = true | |
force_destroy = true | |
depends_on = [ | |
time_sleep.sleep_after_identities_permissions | |
] | |
} | |
resource "google_storage_bucket" "lab_metrics_bucket_creation" { | |
project = local.project_id | |
name = local.lab_metrics_bucket | |
location = local.location | |
uniform_bucket_level_access = true | |
force_destroy = true | |
depends_on = [ | |
time_sleep.sleep_after_identities_permissions | |
] | |
} | |
resource "google_storage_bucket" "lab_bundle_bucket_creation" { | |
project = local.project_id | |
name = local.lab_bundle_bucket | |
location = local.location | |
uniform_bucket_level_access = true | |
force_destroy = true | |
depends_on = [ | |
time_sleep.sleep_after_identities_permissions | |
] | |
} | |
resource "google_storage_bucket" "lab_data_bucket_curated_creation" { | |
project = local.project_id | |
name = local.lab_data_bucket_curated | |
location = local.location_multi | |
uniform_bucket_level_access = true | |
force_destroy = true | |
depends_on = [ | |
time_sleep.sleep_after_identities_permissions | |
] | |
} | |
resource "google_storage_bucket" "lab_data_bucket_product_creation" { | |
project = local.project_id | |
name = local.lab_data_bucket_product | |
location = local.location_multi | |
uniform_bucket_level_access = true | |
force_destroy = true | |
depends_on = [ | |
time_sleep.sleep_after_identities_permissions | |
] | |
} | |
/******************************************* | |
Introducing sleep to minimize errors from | |
dependencies having not completed | |
********************************************/ | |
resource "time_sleep" "sleep_after_bucket_creation" { | |
create_duration = "60s" | |
depends_on = [ | |
google_storage_bucket.lab_data_bucket_raw_creation, | |
google_storage_bucket.lab_code_bucket_creation, | |
google_storage_bucket.lab_notebook_bucket_creation, | |
google_storage_bucket.lab_spark_bucket_creation, | |
google_storage_bucket.lab_model_bucket_creation, | |
google_storage_bucket.lab_metrics_bucket_creation, | |
google_storage_bucket.lab_bundle_bucket_creation, | |
google_storage_bucket.lab_data_bucket_curated_creation, | |
google_storage_bucket.lab_data_bucket_product_creation, | |
google_storage_bucket.lab_sensitive_data_bucket_raw_creation | |
] | |
} | |
/****************************************** | |
10. Copy of datasets, scripts and notebooks to buckets | |
******************************************/ | |
variable "notebooks_to_upload" { | |
type = map(string) | |
default = { | |
"../notebooks/chicago-crimes-analysis/chicago-crimes-analytics.ipynb" = "chicago-crimes-analysis/chicago-crimes-analytics.ipynb", | |
"../notebooks/icecream-sales-forecasting/icecream-sales-forecasting.ipynb" = "icecream-sales-forecasting/icecream-sales-forecasting.ipynb", | |
"../notebooks/telco-customer-churn-prediction/preprocessing.ipynb" = "telco-customer-churn-prediction/preprocessing.ipynb", | |
"../notebooks/telco-customer-churn-prediction/model_training.ipynb" = "telco-customer-churn-prediction/model_training.ipynb", | |
"../notebooks/telco-customer-churn-prediction/hyperparameter_tuning.ipynb" = "telco-customer-churn-prediction/hyperparameter_tuning.ipynb", | |
"../notebooks/telco-customer-churn-prediction/batch_scoring.ipynb" = "telco-customer-churn-prediction/batch_scoring.ipynb", | |
"../notebooks/retail-transactions-anomaly-detection/retail-transactions-anomaly-detection.ipynb" = "retail-transactions-anomaly-detection/retail-transactions-anomaly-detection.ipynb", | |
} | |
} | |
resource "google_storage_bucket_object" "upload_to_gcs_notebooks" { | |
for_each = var.notebooks_to_upload | |
name = each.value | |
source = "${path.module}/${each.key}" | |
bucket = "${local.lab_notebook_bucket}" | |
depends_on = [ | |
time_sleep.sleep_after_bucket_creation | |
] | |
} | |
variable "csv_datasets_to_upload" { | |
type = map(string) | |
default = { | |
"../datasets/cell-tower-anomaly-detection/reference_data/ctad_service_threshold_ref.csv"="cell-tower-anomaly-detection/reference_data/ctad_service_threshold_ref.csv", | |
"../datasets/cell-tower-anomaly-detection/transactions_data/ctad_transactions.csv"="cell-tower-anomaly-detection/transactions_data/ctad_transactions.csv", | |
"../datasets/icecream-sales-forecasting/isf_icecream_sales_transactions.csv"="icecream-sales-forecasting/isf_icecream_sales_transactions.csv", | |
"../datasets/telco-customer-churn-prediction/machine_learning_scoring/tccp_customer_churn_score_candidates.csv"="telco-customer-churn-prediction/machine_learning_scoring/tccp_customer_churn_score_candidates.csv", | |
"../datasets/telco-customer-churn-prediction/machine_learning_training/tccp_customer_churn_train_candidates.csv"="telco-customer-churn-prediction/machine_learning_training/tccp_customer_churn_train_candidates.csv", | |
"../datasets/chicago-crimes/reference_data/crimes_chicago_iucr_ref.csv"="chicago-crimes/reference_data/crimes_chicago_iucr_ref.csv", | |
} | |
} | |
resource "google_storage_bucket_object" "upload_to_gcs_datasets_raw" { | |
for_each = var.csv_datasets_to_upload | |
name = each.value | |
source = "${path.module}/${each.key}" | |
bucket = "${local.lab_data_bucket_raw}" | |
depends_on = [ | |
time_sleep.sleep_after_bucket_creation | |
] | |
} | |
variable "sensitive_csv_datasets_to_upload" { | |
type = map(string) | |
default = { | |
"../datasets/banking/customers_raw/credit_card_customers/date=2022-05-01/credit_card_customers.csv"="credit_card_customers/date=2022-05-01/credit_card_customers.csv", | |
"../datasets/banking/customers_raw/customers/date=2022-05-01/customers.csv"="customers/date=2022-05-01/customers.csv", | |
} | |
} | |
resource "google_storage_bucket_object" "upload_to_gcs_sensitive_datasets_raw" { | |
for_each = var.sensitive_csv_datasets_to_upload | |
name = each.value | |
source = "${path.module}/${each.key}" | |
bucket = "${local.lab_sensitive_data_bucket_raw}" | |
depends_on = [ | |
time_sleep.sleep_after_bucket_creation | |
] | |
} | |
variable "parquet_datasets_to_upload" { | |
type = map(string) | |
default = { | |
"../datasets/cell-tower-anomaly-detection/master_data/ctad_part-00000-fc7d6e20-dbda-4143-91b5-d9414310dfd1-c000.snappy.parquet"="cell-tower-anomaly-detection/master_data/ctad_part-00000-fc7d6e20-dbda-4143-91b5-d9414310dfd1-c000.snappy.parquet", | |
"../datasets/cell-tower-anomaly-detection/master_data/ctad_part-00001-fc7d6e20-dbda-4143-91b5-d9414310dfd1-c000.snappy.parquet"="cell-tower-anomaly-detection/master_data/ctad_part-00001-fc7d6e20-dbda-4143-91b5-d9414310dfd1-c000.snappy.parquet", | |
"../datasets/cell-tower-anomaly-detection/master_data/ctad_part-00002-fc7d6e20-dbda-4143-91b5-d9414310dfd1-c000.snappy.parquet"="cell-tower-anomaly-detection/master_data/ctad_part-00002-fc7d6e20-dbda-4143-91b5-d9414310dfd1-c000.snappy.parquet", | |
"../datasets/cell-tower-anomaly-detection/master_data/ctad_part-00003-fc7d6e20-dbda-4143-91b5-d9414310dfd1-c000.snappy.parquet"="cell-tower-anomaly-detection/master_data/ctad_part-00003-fc7d6e20-dbda-4143-91b5-d9414310dfd1-c000.snappy.parquet" | |
"../datasets/retail-transactions-anomaly-detection/rtad_sales.parquet"="retail-transactions-anomaly-detection/rtad_sales.parquet" | |
} | |
} | |
resource "google_storage_bucket_object" "upload_to_gcs_datasets_curated" { | |
for_each = var.parquet_datasets_to_upload | |
name = each.value | |
source = "${path.module}/${each.key}" | |
bucket = "${local.lab_data_bucket_curated}" | |
depends_on = [ | |
time_sleep.sleep_after_bucket_creation | |
] | |
} | |
variable "code_to_upload" { | |
type = map(string) | |
default = { | |
"../scripts/spark-sql/retail-transactions-anomaly-detection/retail-transactions-anomaly-detection.sql" = "spark-sql/retail-transactions-anomaly-detection/retail-transactions-anomaly-detection.sql" | |
"../scripts/pyspark/chicago-crimes-analytics/curate_crimes.py" = "pyspark/chicago-crimes-analytics/curate_crimes.py" | |
"../scripts/pyspark/chicago-crimes-analytics/crimes_report.py" = "pyspark/chicago-crimes-analytics/crimes_report.py" | |
"../scripts/pyspark/nyc-taxi-trip-analytics/taxi_trips_data_generator.py" = "pyspark/nyc-taxi-trip-analytics/taxi_trips_data_generator.py" | |
"../scripts/airflow/chicago-crimes-analytics/bq_lineage_pipeline.py" = "airflow/chicago-crimes-analytics/bq_lineage_pipeline.py" | |
"../scripts/airflow/chicago-crimes-analytics/spark_custom_lineage_pipeline.py" = "airflow/chicago-crimes-analytics/spark_custom_lineage_pipeline.py" | |
} | |
} | |
resource "google_storage_bucket_object" "upload_to_gcs_code_raw" { | |
for_each = var.code_to_upload | |
name = each.value | |
source = "${path.module}/${each.key}" | |
bucket = "${local.lab_code_bucket}" | |
depends_on = [ | |
time_sleep.sleep_after_bucket_creation | |
] | |
} | |
/******************************************* | |
Introducing sleep to minimize errors from | |
dependencies having not completed | |
********************************************/ | |
resource "time_sleep" "sleep_after_network_and_storage_steps" { | |
create_duration = "120s" | |
depends_on = [ | |
time_sleep.sleep_after_network_and_firewall_creation, | |
time_sleep.sleep_after_bucket_creation, | |
google_storage_bucket_object.upload_to_gcs_datasets_raw, | |
google_storage_bucket_object.upload_to_gcs_datasets_curated, | |
google_storage_bucket_object.upload_to_gcs_notebooks, | |
google_storage_bucket_object.upload_to_gcs_code_raw | |
] | |
} | |
/****************************************** | |
11. Dataproc Metastore with gRPC endpoint | |
******************************************/ | |
resource "google_dataproc_metastore_service" "datalake_metastore" { | |
provider = google-beta | |
service_id = local.lab_dpms_nm | |
location = local.location | |
tier = "DEVELOPER" | |
maintenance_window { | |
hour_of_day = 2 | |
day_of_week = "SUNDAY" | |
} | |
hive_metastore_config { | |
version = "3.1.2" | |
endpoint_protocol = "GRPC" | |
} | |
metadata_integration { | |
data_catalog_config { | |
enabled = true | |
} | |
} | |
depends_on = [ | |
module.administrator_role_grants, | |
time_sleep.sleep_after_network_and_storage_steps | |
] | |
} | |
/****************************************** | |
12. Cloud Composer | |
******************************************/ | |
resource "google_composer_environment" "create_cloud_composer_env" { | |
name = "oda-${local.project_nbr}-cc2" | |
region = local.location | |
provider = google-beta | |
config { | |
software_config { | |
image_version = local.CLOUD_COMPOSER2_IMG_VERSION | |
env_variables = { | |
AIRFLOW_VAR_PROJECT_ID = "${local.project_id}" | |
AIRFLOW_VAR_PROJECT_NBR = "${local.project_nbr}" | |
AIRFLOW_VAR_REGION = "${local.location}" | |
AIRFLOW_VAR_REGION_MULTI = "${local.location_multi}" | |
AIRFLOW_VAR_SUBNET = "${local.lab_subnet_nm}" | |
AIRFLOW_VAR_UMSA = "${local.umsa}" | |
} | |
} | |
node_config { | |
network = local.lab_vpc_nm | |
subnetwork = local.lab_subnet_nm | |
service_account = local.umsa_fqn | |
} | |
} | |
depends_on = [ | |
time_sleep.sleep_after_network_and_firewall_creation | |
] | |
timeouts { | |
create = "75m" | |
} | |
} | |
/******************************************* | |
Introducing sleep to minimize errors from | |
dependencies having not completed | |
********************************************/ | |
resource "time_sleep" "sleep_after_composer_creation" { | |
create_duration = "180s" | |
depends_on = [ | |
google_composer_environment.create_cloud_composer_env | |
] | |
} | |
/****************************************** | |
13. Cloud Composer 2 DAG bucket capture so we can upload DAG to it | |
******************************************/ | |
output "CLOUD_COMPOSER_DAG_BUCKET" { | |
value = google_composer_environment.create_cloud_composer_env.config.0.dag_gcs_prefix | |
} | |
/******************************************* | |
14. Upload Airflow DAG to Composer DAG bucket | |
******************************************/ | |
variable "airflow_dags_to_upload" { | |
type = map(string) | |
default = { | |
"../scripts/airflow/chicago-crimes-analytics/bq_lineage_pipeline.py" = "dags/chicago-crimes-analytics/bq_lineage_pipeline.py", | |
"../scripts/airflow/chicago-crimes-analytics/spark_custom_lineage_pipeline.py" = "dags/chicago-crimes-analytics/spark_custom_lineage_pipeline.py" | |
} | |
} | |
resource "google_storage_bucket_object" "upload_dags_to_airflow_dag_bucket" { | |
for_each = var.airflow_dags_to_upload | |
name = each.value | |
source = "${path.module}/${each.key}" | |
bucket = substr(substr(google_composer_environment.create_cloud_composer_env.config.0.dag_gcs_prefix, 5, length(google_composer_environment.create_cloud_composer_env.config.0.dag_gcs_prefix)), 0, (length(google_composer_environment.create_cloud_composer_env.config.0.dag_gcs_prefix)-10)) | |
depends_on = [ | |
time_sleep.sleep_after_composer_creation | |
] | |
} | |
/****************************************** | |
DONE | |
******************************************/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment