Last active
June 30, 2020 16:07
-
-
Save JacobJohansen/ed9191ff9abd1eb83c99f8caea88ca29 to your computer and use it in GitHub Desktop.
EMR configuration that supports zeppelin, presto, hue, spark, and hive configured to access AWS Glue. Zeppelin is also configured to used an s3 bucket as a notebook store
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"Classification":"spark-hive-site", | |
"Properties":{ | |
"hive.metastore.client.factory.class":"com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory", | |
"hive.metastore.schema.verification": "false" | |
} | |
}, | |
{ | |
"Classification":"hive-site", | |
"Properties":{ | |
"hive.metastore.client.factory.class":"com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory", | |
"hive.metastore.schema.verification": "false" | |
} | |
}, | |
{ | |
"Classification":"spark", | |
"Properties":{ | |
"maximizeResourceAllocation": "true" | |
} | |
}, | |
{ | |
"Classification":"spark-defaults", | |
"Properties":{ | |
"spark.driver.extraClassPath":"/usr/lib/hadoop-lzo/lib/:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/:/usr/share/aws/emr/emrfs/auxlib/:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar:/home/hadoop/extrajars/*", | |
"spark.executor.extraClassPath":"/usr/lib/hadoop-lzo/lib/:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/:/usr/share/aws/emr/emrfs/auxlib/:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar:/home/hadoop/extrajars/*" | |
} | |
}, | |
{ | |
"Classification":"presto-connector-hive", | |
"Properties":{ | |
"hive.metastore": "glue", | |
"hive.parquet.use-column-names": "true" | |
} | |
}, | |
{ | |
"Classification": "zeppelin-env", | |
"Properties": { | |
}, | |
"Configurations": [ | |
{ | |
"Classification": "export", | |
"Properties": { | |
"ZEPPELIN_NOTEBOOK_S3_BUCKET": "${bucket}", | |
"ZEPPELIN_NOTEBOOK_S3_USER": "global", | |
"SPARK_SUBMIT_OPTIONS": "\"--jars ${jars}\"", | |
"ZEPPELIN_NOTEBOOK_STORAGE": "\"org.apache.zeppelin.notebook.repo.S3NotebookRepo\"" | |
}, | |
"Configurations": [ | |
] | |
} | |
] | |
} | |
] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Configure delta lake jar | |
mkdir -p /home/hadoop/extrajars | |
curl https://repo1.maven.org/maven2/io/delta/delta-core_2.12/0.6.1/delta-core_2.12-0.6.1.jar --output ${delta_jar_path} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment