Last active
September 12, 2017 15:38
-
-
Save mrchristine/0533e339f6973a53bc61f0f0eb548908 to your computer and use it in GitHub Desktop.
Databricks Rest API spark-submit w/ run-now
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| usage="Add jars to the input arguments to specify the spark job. -h list the supported spark versions" | |
| RUNTIME_VERSION="3.2.x-scala2.11" | |
| NODE_TYPE="r3.xlarge" | |
| while getopts ':hs:' option; do | |
| case "$option" in | |
| h) echo "$usage" | |
| curl -s -n https://myenv.cloud.databricks.com/api/2.0/clusters/spark-versions | jq . | |
| exit | |
| ;; | |
| s) RUNTIME_VERSION=$OPTARG | |
| ;; | |
| \?) printf "illegal option: -%s\n" "$OPTARG" >&2 | |
| echo "$usage" >&2 | |
| exit 1 | |
| ;; | |
| esac | |
| done | |
| shift $((OPTIND - 1)) | |
| ## 2 parts to use spark-submit within Databricks running locally. | |
| for jar in "$@" | |
| do | |
| echo "Path: " $jar | |
| echo "Filename: " $(basename $jar) | |
| fname=$(basename $jar) | |
| path="/home/myuser/jars/" | |
| # 1. Upload library using DBFS to a specific directory. | |
| curl -n \ | |
| -F contents=@${jar} -F path=${path}"/"${fname} -F overwrite="true" \ | |
| https://myenv.cloud.databricks.com/api/2.0/dbfs/put | |
| echo "Spark Version: $RUNTIME_VERSION" | |
| echo "DBFS Jar Path: dbfs:${path}${fname}" | |
| spark_submit_args=$(cat << EOF | |
| { | |
| "name": "My Example SparkPi Job", | |
| "new_cluster" : { | |
| "spark_version": "$RUNTIME_VERSION", | |
| "node_type_id": "$NODE_TYPE", | |
| "num_workers": 1 }, | |
| "email_notifications": | |
| {"on_start": [],"on_success": [],"on_failure": []}, | |
| "timeout_seconds": 3600, | |
| "max_retries": 1, | |
| "spark_submit_task": | |
| {"parameters": [ "--conf", "spark.driver.maxResultSize=5g", | |
| "--class","org.apache.spark.examples.SparkPi", | |
| "dbfs:${path}${fname}", "10"]}} | |
| EOF | |
| ) | |
| # 2. Use the DBFS path you uploaded to in part 1 | |
| job_id=`curl -X POST -s -n -H 'Content-Type:application/json' -d "$spark_submit_args" https://myenv.cloud.databricks.com/api/2.0/jobs/create` | |
| run_status=`curl -X POST -n -H 'Content-Type:application/json' -d "$job_id" https://myenv.cloud.databricks.com/api/2.0/jobs/run-now` | |
| done | |
| echo -e "\nCompleted!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment