Skip to content

Instantly share code, notes, and snippets.

@readytheory
Created May 28, 2020 00:01
Show Gist options
  • Save readytheory/0250826a174f086ccce8516c74b4643f to your computer and use it in GitHub Desktop.
Save readytheory/0250826a174f086ccce8516c74b4643f to your computer and use it in GitHub Desktop.
Databrick Notebook launch from laptops script
# Here is job that runs a notebook -- hello world for running a job without
# logging into the DB GUI.
# The secrets file looks like:
#
# [hello]
# token = dapi25c47c... # you get this from databricks account
# brix_instance = dbc-bfd54799-f....cloud.databricks.com # url of your instance
# brix_user_dir = your username, for me the email address i signed up with
#
# This includes aws_attributes.instance_profile_arn ... this is an instance profiel
# from amazon . . . you can't see in the script, but this is how the script is
# granted authorization to write into s3.
# DBdocs May 2020: https://docs.databricks.com/dev-tools/api/latest/jobs.html
import configparser
import requests
import json
secrets_file = "/run/secrets/hello.token"
config = configparser.ConfigParser()
config.read(secrets_file)
token = config['hello']['token']
brix_instance = config['hello']['brix_instance']
headers = { 'Authorization' : f"Bearer {token}",
'Content-Type' : 'application/json'}
url = f"https://{brix_instance}/api/2.0/jobs/runs/submit"
body = {
"run_name": "api_run_hello",
"new_cluster":
{
"num_workers": 2,
"spark_version": "6.5.x-scala2.11",
"spark_conf": {},
"aws_attributes": {
"first_on_demand": 0,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "us-west-2c",
"instance_profile_arn": "arn:aws:iam::94xxxxxxxx:instance-profile/mutihopper",
"spot_bid_price_percent": 60,
"ebs_volume_type": "GENERAL_PURPOSE_SSD",
"ebs_volume_count": 3,
"ebs_volume_size": 32
},
"node_type_id": "m5a.large",
"driver_node_type_id": "m5a.large",
"ssh_public_keys": [],
"custom_tags": {
"RunName": "cheap_hello_world",
"JobId": "1"
},
"spark_env_vars": {
"PYSPARK_PYTHON": "/databricks/python3/bin/python3"
},
"enable_elastic_disk": False,
"cluster_source": "JOB",
"init_scripts": []
},
"notebook_task" : {
"notebook_path" : "/Users/folder_name/notebook_name"
}
}
r = requests.post(url, headers=headers, data=json.dumps(body))
print(r.text)
data = json.loads(r.text)
url = f"https://{brix_instance}/api/2.0/jobs/runs/get"
r = requests.get(url, headers=headers, params=data)
print(r.text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment