Last active
May 22, 2020 19:05
-
-
Save mndrake/7cf020ec8349239fa11c4a19dde375fa to your computer and use it in GitHub Desktop.
Databricks cluster creation and config for Databricks Connect
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!python | |
import functools | |
import json | |
import os | |
import requests | |
import urllib | |
import uuid | |
import configparser | |
# TODO: CURRENTLY ONLY WORKS FOR AWS, NEED TO ADD ADDITIONAL PARSING FOR AZURE | |
# Assumes that the Databricks CLI is installed and configured | |
DATABRICKS_PROFILE = 'DEFAULT' | |
CLUSTER_DEFINITION = { | |
"cluster_name": "dc-demo", | |
"idempotency_token": str(uuid.getnode()), | |
"spark_version": "6.4.x-cpu-ml-scala2.11", | |
"driver_node_type_id": "i3.xlarge", | |
"node_type_id": "i3.xlarge", | |
"spark_conf": {}, | |
"autoscale": { | |
"min_workers": 2, | |
"max_workers": 8 | |
}, | |
"aws_attributes": { | |
"first_on_demand": 1, | |
"availability": "SPOT_WITH_FALLBACK", | |
"zone_id": "us-west-2c", | |
"spot_bid_price_percent": 100, | |
"ebs_volume_count": 0 | |
}, | |
"autotermination_minutes": 60 | |
} | |
# retrieve databricks config for the profile | |
cli_config = configparser.ConfigParser() | |
cli_config.read(os.path.expanduser('~/.databrickscfg')) | |
profile_config = cli_config[DATABRICKS_PROFILE] | |
# set databricks constants for the profile | |
DATABRICKS_HOST = profile_config['host'] | |
DATABRICKS_TOKEN = profile_config['token'] | |
def api_request(route, body=None): | |
""" | |
Databricks API request wrapper. | |
doc page: https://docs.databricks.com/dev-tools/api/latest/index.html | |
""" | |
url = urllib.parse.urljoin(DATABRICKS_HOST, os.path.join('api', route)) | |
headers = {'Authorization': f'Bearer {DATABRICKS_TOKEN}'} | |
if body is None: | |
response = requests.get(url, headers=headers) | |
else: | |
response = requests.post(url, headers=headers, json=body) | |
return response.json() | |
# create cluster | |
cluster_info = api_request('2.0/clusters/create', CLUSTER_DEFINITION) | |
# create cluster config | |
cluster_config = { | |
"host": DATABRICKS_HOST, | |
"token": DATABRICKS_TOKEN, | |
"cluster_id": cluster_info['cluster_id'], | |
"port": "15001" | |
} | |
# update databricks-connect with cluster config | |
with open(os.path.expanduser('~/.databricks-connect'), 'w') as f: | |
json.dump(cluster_config, f) | |
print('-- cluster info --\n', cluster_info) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment