Skip to content

Instantly share code, notes, and snippets.

@mrchristine
Last active September 12, 2017 15:38
Show Gist options
  • Save mrchristine/0533e339f6973a53bc61f0f0eb548908 to your computer and use it in GitHub Desktop.
Save mrchristine/0533e339f6973a53bc61f0f0eb548908 to your computer and use it in GitHub Desktop.
Databricks Rest API spark-submit w/ run-now
#!/bin/bash
usage="Add jars to the input arguments to specify the spark job. -h list the supported spark versions"
RUNTIME_VERSION="3.2.x-scala2.11"
NODE_TYPE="r3.xlarge"
while getopts ':hs:' option; do
case "$option" in
h) echo "$usage"
curl -s -n https://myenv.cloud.databricks.com/api/2.0/clusters/spark-versions | jq .
exit
;;
s) RUNTIME_VERSION=$OPTARG
;;
\?) printf "illegal option: -%s\n" "$OPTARG" >&2
echo "$usage" >&2
exit 1
;;
esac
done
shift $((OPTIND - 1))
## 2 parts to use spark-submit within Databricks running locally.
for jar in "$@"
do
echo "Path: " $jar
echo "Filename: " $(basename $jar)
fname=$(basename $jar)
path="/home/myuser/jars/"
# 1. Upload library using DBFS to a specific directory.
curl -n \
-F contents=@${jar} -F path=${path}"/"${fname} -F overwrite="true" \
https://myenv.cloud.databricks.com/api/2.0/dbfs/put
echo "Spark Version: $RUNTIME_VERSION"
echo "DBFS Jar Path: dbfs:${path}${fname}"
spark_submit_args=$(cat << EOF
{
"name": "My Example SparkPi Job",
"new_cluster" : {
"spark_version": "$RUNTIME_VERSION",
"node_type_id": "$NODE_TYPE",
"num_workers": 1 },
"email_notifications":
{"on_start": [],"on_success": [],"on_failure": []},
"timeout_seconds": 3600,
"max_retries": 1,
"spark_submit_task":
{"parameters": [ "--conf", "spark.driver.maxResultSize=5g",
"--class","org.apache.spark.examples.SparkPi",
"dbfs:${path}${fname}", "10"]}}
EOF
)
# 2. Use the DBFS path you uploaded to in part 1
job_id=`curl -X POST -s -n -H 'Content-Type:application/json' -d "$spark_submit_args" https://myenv.cloud.databricks.com/api/2.0/jobs/create`
run_status=`curl -X POST -n -H 'Content-Type:application/json' -d "$job_id" https://myenv.cloud.databricks.com/api/2.0/jobs/run-now`
done
echo -e "\nCompleted!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment