Created
May 28, 2020 00:01
-
-
Save readytheory/0250826a174f086ccce8516c74b4643f to your computer and use it in GitHub Desktop.
Databrick Notebook launch from laptops script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Here is job that runs a notebook -- hello world for running a job without | |
# logging into the DB GUI. | |
# The secrets file looks like: | |
# | |
# [hello] | |
# token = dapi25c47c... # you get this from databricks account | |
# brix_instance = dbc-bfd54799-f....cloud.databricks.com # url of your instance | |
# brix_user_dir = your username, for me the email address i signed up with | |
# | |
# This includes aws_attributes.instance_profile_arn ... this is an instance profiel | |
# from amazon . . . you can't see in the script, but this is how the script is | |
# granted authorization to write into s3. | |
# DBdocs May 2020: https://docs.databricks.com/dev-tools/api/latest/jobs.html | |
import configparser | |
import requests | |
import json | |
secrets_file = "/run/secrets/hello.token" | |
config = configparser.ConfigParser() | |
config.read(secrets_file) | |
token = config['hello']['token'] | |
brix_instance = config['hello']['brix_instance'] | |
headers = { 'Authorization' : f"Bearer {token}", | |
'Content-Type' : 'application/json'} | |
url = f"https://{brix_instance}/api/2.0/jobs/runs/submit" | |
body = { | |
"run_name": "api_run_hello", | |
"new_cluster": | |
{ | |
"num_workers": 2, | |
"spark_version": "6.5.x-scala2.11", | |
"spark_conf": {}, | |
"aws_attributes": { | |
"first_on_demand": 0, | |
"availability": "SPOT_WITH_FALLBACK", | |
"zone_id": "us-west-2c", | |
"instance_profile_arn": "arn:aws:iam::94xxxxxxxx:instance-profile/mutihopper", | |
"spot_bid_price_percent": 60, | |
"ebs_volume_type": "GENERAL_PURPOSE_SSD", | |
"ebs_volume_count": 3, | |
"ebs_volume_size": 32 | |
}, | |
"node_type_id": "m5a.large", | |
"driver_node_type_id": "m5a.large", | |
"ssh_public_keys": [], | |
"custom_tags": { | |
"RunName": "cheap_hello_world", | |
"JobId": "1" | |
}, | |
"spark_env_vars": { | |
"PYSPARK_PYTHON": "/databricks/python3/bin/python3" | |
}, | |
"enable_elastic_disk": False, | |
"cluster_source": "JOB", | |
"init_scripts": [] | |
}, | |
"notebook_task" : { | |
"notebook_path" : "/Users/folder_name/notebook_name" | |
} | |
} | |
r = requests.post(url, headers=headers, data=json.dumps(body)) | |
print(r.text) | |
data = json.loads(r.text) | |
url = f"https://{brix_instance}/api/2.0/jobs/runs/get" | |
r = requests.get(url, headers=headers, params=data) | |
print(r.text) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment