readytheory · May 28, 2020 00:01
diff --git a/hello_databricks_api.py b/hello_databricks_api.py
 # Here is  job that runs a notebook -- hello world for running a job without
 # logging into the DB GUI.
 # The secrets file looks like:
 #
 #     [hello]
 #     token = dapi25c47c...  # you get this from databricks account
 #     brix_instance = dbc-bfd54799-f....cloud.databricks.com  # url of your instance
 #     brix_user_dir = your username, for me the email address i signed up with
 #
 # This includes aws_attributes.instance_profile_arn ... this is an instance profiel
 # from amazon . . . you can't see in the script, but this is how the script is
 # granted authorization to write into s3.  

 # DBdocs May 2020: https://docs.databricks.com/dev-tools/api/latest/jobs.html


 import configparser
 import requests
 import json

 secrets_file = "/run/secrets/hello.token"

 config = configparser.ConfigParser()
 config.read(secrets_file)
 token = config['hello']['token']
 brix_instance = config['hello']['brix_instance']
 headers = { 'Authorization'  : f"Bearer {token}",
            'Content-Type' : 'application/json'}

 url = f"https://{brix_instance}/api/2.0/jobs/runs/submit"

 body = {
    "run_name": "api_run_hello",
    "new_cluster":
    {
        "num_workers": 2,
        "spark_version": "6.5.x-scala2.11",
        "spark_conf": {},
        "aws_attributes": {
            "first_on_demand": 0,
            "availability": "SPOT_WITH_FALLBACK",
            "zone_id": "us-west-2c",
            "instance_profile_arn": "arn:aws:iam::94xxxxxxxx:instance-profile/mutihopper",
            "spot_bid_price_percent": 60,
            "ebs_volume_type": "GENERAL_PURPOSE_SSD",
            "ebs_volume_count": 3,
            "ebs_volume_size": 32
        },
        "node_type_id": "m5a.large",
        "driver_node_type_id": "m5a.large",
        "ssh_public_keys": [],
        "custom_tags": {
            "RunName": "cheap_hello_world",
            "JobId": "1"
        },
        "spark_env_vars": {
            "PYSPARK_PYTHON": "/databricks/python3/bin/python3"
        },
        "enable_elastic_disk": False,
        "cluster_source": "JOB",
        "init_scripts": []
    },

    "notebook_task" : {
        "notebook_path" : "/Users/folder_name/notebook_name"
    } 
 }

 r = requests.post(url, headers=headers, data=json.dumps(body))

 print(r.text)

 data = json.loads(r.text)

 url = f"https://{brix_instance}/api/2.0/jobs/runs/get"

 r = requests.get(url, headers=headers, params=data)

 print(r.text)
	# Here is job that runs a notebook -- hello world for running a job without
	# logging into the DB GUI.
	# The secrets file looks like:
	#
	# [hello]
	# token = dapi25c47c... # you get this from databricks account
	# brix_instance = dbc-bfd54799-f....cloud.databricks.com # url of your instance
	# brix_user_dir = your username, for me the email address i signed up with
	#
	# This includes aws_attributes.instance_profile_arn ... this is an instance profiel
	# from amazon . . . you can't see in the script, but this is how the script is
	# granted authorization to write into s3.

	# DBdocs May 2020: https://docs.databricks.com/dev-tools/api/latest/jobs.html


	import configparser
	import requests
	import json

	secrets_file = "/run/secrets/hello.token"

	config = configparser.ConfigParser()
	config.read(secrets_file)
	token = config['hello']['token']
	brix_instance = config['hello']['brix_instance']
	headers = { 'Authorization' : f"Bearer {token}",
	'Content-Type' : 'application/json'}

	url = f"https://{brix_instance}/api/2.0/jobs/runs/submit"

	body = {
	"run_name": "api_run_hello",
	"new_cluster":
	{
	"num_workers": 2,
	"spark_version": "6.5.x-scala2.11",
	"spark_conf": {},
	"aws_attributes": {
	"first_on_demand": 0,
	"availability": "SPOT_WITH_FALLBACK",
	"zone_id": "us-west-2c",
	"instance_profile_arn": "arn:aws:iam::94xxxxxxxx:instance-profile/mutihopper",
	"spot_bid_price_percent": 60,
	"ebs_volume_type": "GENERAL_PURPOSE_SSD",
	"ebs_volume_count": 3,
	"ebs_volume_size": 32
	},
	"node_type_id": "m5a.large",
	"driver_node_type_id": "m5a.large",
	"ssh_public_keys": [],
	"custom_tags": {
	"RunName": "cheap_hello_world",
	"JobId": "1"
	},
	"spark_env_vars": {
	"PYSPARK_PYTHON": "/databricks/python3/bin/python3"
	},
	"enable_elastic_disk": False,
	"cluster_source": "JOB",
	"init_scripts": []
	},

	"notebook_task" : {
	"notebook_path" : "/Users/folder_name/notebook_name"
	}
	}

	r = requests.post(url, headers=headers, data=json.dumps(body))

	print(r.text)

	data = json.loads(r.text)

	url = f"https://{brix_instance}/api/2.0/jobs/runs/get"

	r = requests.get(url, headers=headers, params=data)

	print(r.text)
No results found