Last active
November 28, 2023 07:07
-
-
Save Gatsby-Lee/2230adb9a23df42932ef30b2ff839149 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# filename: exp_hudi_export_0_14_0.yaml | |
name: exp-hudi-export-with-0-14-0 | |
virtualClusterId: <emr-on-eks-virtual-cluster-id> | |
executionRoleArn: <emr-on-eks-execution-role> | |
# emr-6.15.0-latest has Hudi 0.14.0 | |
# emr-6.14.0-latest has Hudi 0.13.1 | |
# ref: https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/emr-eks-6.15.0.html | |
releaseLabel: emr-6.15.0-latest | |
jobDriver: | |
sparkSubmitJobDriver: | |
# emr-containers will append "file://" to the entryPoint if the protocol is not defined. | |
# - this "/usr/lib/hudi/hudi-utilities-bundle.jar" | |
# becomes "file:///usr/lib/hudi/hudi-utilities-bundle.jar" | |
# - using "file://" requires to set '--conf spark.kubernetes.file.upload.path' | |
entryPoint: s3://<s3-bucket-path>/hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar | |
entryPointArguments: | |
- --source-base-path | |
- s3://<s3-bucket-path-source>/<hudi-table-path>/ | |
- --target-output-path | |
- s3://<s3-bucket-path-target>/<hudi-table-path>/ | |
- --output-format | |
- hudi | |
# sparkSubmitParameters will be handled as a single string without any new line. | |
# ref: https://yaml-multiline.info/ | |
sparkSubmitParameters: --class org.apache.hudi.utilities.HoodieSnapshotExporter | |
--jars s3://<s3-bucket-path>/hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar,s3://<s3-bucket-path>/hudi-spark3-bundle_2.12-0.14.0-amzn-0.jar | |
--conf spark.executor.instances=1 | |
--conf spark.executor.memory=2G | |
--conf spark.executor.cores=2 | |
--conf spark.driver.memory=2G | |
--conf spark.kubernetes.driver.podTemplateFile=s3://<s3-bucket-path>/emr_etl_driver_ondemand_required_v200_staging.yml | |
--conf spark.kubernetes.executor.podTemplateFile=s3://<s3-bucket-path>/emr_etl_executor_spot_required_v200_staging.yml | |
configurationOverrides: | |
applicationConfiguration: | |
- classification: spark-defaults | |
properties: | |
# @note: the bool value has to be wrapped with quote; otherwise, it won't be treated as string. | |
spark.dynamicAllocation.enabled: "false" | |
tags: | |
environment: staging |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment