Created
April 5, 2021 04:59
-
-
Save lordlinus/149fe9cc38dc4b4dc75bfff8e6c2baf3 to your computer and use it in GitHub Desktop.
Azure devops yaml pipeline to deploy Azure Databricks cluster ( end to end )
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Azure DevOps pipeline to build Databricks cluster | |
# This pipeline depends on "variable-group-01" to provide the below variables | |
# ARM_CLIENT_ID | |
# ARM_SUBSCRIPTION_ID | |
# ARM_TENANT_ID | |
# mySecret NOTE: This variable is mapped to ARM_CLIENT_SECRET below | |
trigger: | |
- starter | |
variables: | |
- group: variable-group-01 | |
- name: RESOURCE_GROUP | |
value: "rg-test-01" | |
- name: LOCATION | |
value: "southeastasia" | |
- name: DATABRICKS_WORKSPACE | |
value: "TestWorkspace" | |
- name: MANAGEMENT_RESOURCE_ENDPOINT | |
value: "https://management.core.windows.net/" | |
- name: AZURE_DATABRICKS_APP_ID | |
value: "2ff814a6-3304-4ab8-85cb-cd0e6f879c1d" | |
- name: DATABRICKS_CLUSTER_NAME | |
value: "test-cluster-01" | |
- name: DATABRICKS_SPARK_VERSION | |
value: "7.3.x-scala2.12" | |
- name: DATABRICKS_NODE_TYPE | |
value: "Standard_D3_v2" | |
- name: DATABRICKS_NUM_WORKERS | |
value: 3 | |
- name: DATABRICKS_SPARK_CONF | |
value: '{"spark.speculation":"true","spark.databricks.delta.preview.enabled":"true"}' | |
- name: DATABRICKS_AUTO_TERMINATE_MINUTES | |
value: 60 | |
pool: | |
vmImage: ubuntu-latest | |
steps: | |
- script: sudo apt update && sudo apt install jq | |
displayName: 'Install dependencies' | |
- script: | | |
set -o errexit | |
set -o nounset | |
set -o pipefail | |
# Login using service principle | |
echo "Logging in using Azure service priciple" | |
az login --service-principal -u $ARM_CLIENT_ID -p $ARM_CLIENT_SECRET --tenant $ARM_TENANT_ID | |
az account set -s $ARM_SUBSCRIPTION_ID | |
# Create Resource Group if not exists | |
# NOTE: you can get list of az location from "az account list-locations | jq .[].name" | |
if [[ $(az group exists --resource-group $RESOURCE_GROUP) = "false" ]]; then | |
echo "Resource Group does not exists, so creating.." | |
az group create --name $RESOURCE_GROUP --location $LOCATION | |
fi | |
# Enable install of extensions without prompt | |
az config set extension.use_dynamic_install=yes_without_prompt | |
# Create databricks workspace using extenstion | |
# The extension will automatically install the first time you run an az databricks workspace command | |
# Ref: https://docs.microsoft.com/en-us/cli/azure/ext/databricks/databricks?view=azure-cli-latest | |
if [[ $(az databricks workspace list | jq .[].name | grep -w $DATABRICKS_WORKSPACE) = $DATABRICKS_WORKSPACE ]]; then | |
echo "Databricks workspace does not exists, so creating.." | |
az databricks workspace create \ | |
--location $LOCATION \ | |
--name $DATABRICKS_WORKSPACE \ | |
--sku trial \ | |
--resource-group $RESOURCE_GROUP \ | |
--enable-no-public-ip \ | |
--tags environment=demo level=level3 | |
fi | |
# Get workspace id in the given resource group e.g. /subscriptions/(subscription_id)/resourceGroups/(rg)/providers/Microsoft.Databricks/workspaces/(databricks_workspace) | |
wsId=$(az resource show --resource-type Microsoft.Databricks/workspaces -g $RESOURCE_GROUP -n "$DATABRICKS_WORKSPACE" --query id -o tsv) | |
echo "Workspce ID: $wsId" | |
# Get workspace url e.g. adb-xxxxxxxxxxxxxxxx.x.azuredatabricks.net | |
workspaceUrl=$(az resource show --resource-type Microsoft.Databricks/workspaces -g "$RESOURCE_GROUP" -n "$DATABRICKS_WORKSPACE" --query properties.workspaceUrl --output tsv) | |
echo "Workspce URL: $workspaceUrl" | |
# token response for the azure databricks app | |
token_response=$(az account get-access-token --resource $AZURE_DATABRICKS_APP_ID) | |
# echo $token_response | |
# Extract accessToken value | |
token=$(jq .accessToken -r <<< "$token_response") | |
echo "Token: $token" | |
# Get the Azure Management Resource endpoint token | |
# https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/aad/service-prin-aad-token#--get-the-azure-management-resource-endpoint-token | |
az_mgmt_resource_endpoint=$(curl -X GET -H 'Content-Type: application/x-www-form-urlencoded' \ | |
-d 'grant_type=client_credentials&client_id='$ARM_CLIENT_ID'&resource='$MANAGEMENT_RESOURCE_ENDPOINT'&client_secret='$ARM_CLIENT_SECRET \ | |
https://login.microsoftonline.com/$ARM_TENANT_ID/oauth2/token) | |
# Extract the access_token value | |
mgmt_access_token=$(jq .access_token -r <<< "$az_mgmt_resource_endpoint" ) | |
echo "Management Access Token: $mgmt_access_token" | |
# Create PAT token valid for 5 min (300 sec) | |
pat_token_response=$(curl -X POST \ | |
-H "Authorization: Bearer $token" \ | |
-H "X-Databricks-Azure-SP-Management-Token: $mgmt_access_token" \ | |
-H "X-Databricks-Azure-Workspace-Resource-Id: $wsId" \ | |
-d '{"lifetime_seconds": 300,"comment": "this is an example token"}' \ | |
https://$workspaceUrl/api/2.0/token/create | |
) | |
# Print PAT token | |
pat_token=$(jq .token_value -r <<< "$pat_token_response") | |
echo $pat_token | |
# List PAT tokens (OPTIONAL) | |
curl -X GET \ | |
-H "Authorization: Bearer $token" \ | |
-H "X-Databricks-Azure-SP-Management-Token: $mgmt_access_token" \ | |
-H "X-Databricks-Azure-Workspace-Resource-Id: $wsId" \ | |
https://$workspaceUrl/api/2.0/token/list | |
# List current clusters (OPTIONAL) and could be used to determine the next command e.g. create,restart,terminate etc | |
curl -X GET \ | |
-H "Authorization: Bearer $token" \ | |
-H "X-Databricks-Azure-SP-Management-Token: $mgmt_access_token" \ | |
-H "X-Databricks-Azure-Workspace-Resource-Id: $wsId" \ | |
https://$workspaceUrl/api/2.0/clusters/list | |
# Create Cluster config | |
JSON_STRING=$( jq -n -c \ | |
--arg cn "$DATABRICKS_CLUSTER_NAME" \ | |
--arg sv "$DATABRICKS_SPARK_VERSION" \ | |
--arg nt "$DATABRICKS_NODE_TYPE" \ | |
--arg nw "$DATABRICKS_NUM_WORKERS" \ | |
--arg sc "$DATABRICKS_SPARK_CONF" \ | |
--arg at "$DATABRICKS_AUTO_TERMINATE_MINUTES" \ | |
'{cluster_name: $cn, | |
spark_version: $sv, | |
node_type_id: $nt, | |
num_workers: ($nw|tonumber), | |
autotermination_minutes: ($at|tonumber), | |
spark_conf: ($sc|fromjson)}' ) | |
# Create a new Cluster | |
# Reference: https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/ | |
cluster_id_response=$(curl -X POST \ | |
-H "Authorization: Bearer $token" \ | |
-H "X-Databricks-Azure-SP-Management-Token: $mgmt_access_token" \ | |
-H "X-Databricks-Azure-Workspace-Resource-Id: $wsId" \ | |
-d $JSON_STRING \ | |
https://$workspaceUrl/api/2.0/clusters/create) | |
# Print cluster_id | |
cluster_id=$(jq .cluster_id -r <<< "$cluster_id_response") | |
echo "Cluster id: $cluster_id" | |
env: | |
ARM_CLIENT_SECRET: $(mySecret) | |
displayName: 'Run a multi-line script to deploy Databricks Cluster' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment