Skip to content

Instantly share code, notes, and snippets.

@alexott
Last active February 8, 2025 20:18
Show Gist options
  • Save alexott/001208f8f3c2ae5880b1d70ad64ddcfd to your computer and use it in GitHub Desktop.
Save alexott/001208f8f3c2ae5880b1d70ad64ddcfd to your computer and use it in GitHub Desktop.
AAD -> DBX synchronizer using Terraform

This gist contains Terraform code that allows to synchronize groups & users from AAD into the Databricks workspace, without need to setup SCIM connector.

This is an extended version of initial synchronizer implemented by Serge.

To start, download all .tf files to some place, and create a file terraform.tfvars with following content:

groups = {
  "My AAD group" = {
    workspace_access = true
    databricks_sql_access = true
    allow_cluster_create = true
    allow_instance_pool_create = false
    admin = false
  },
  "AAD group 2" = {
    .....
  }
}

Please store the state in remote location as described in documentation

// read group members of given groups from AzureAD every time Terraform is started
data "azuread_group" "this" {
for_each = local.all_groups
display_name = each.value
}
locals {
all_groups = toset(keys(var.groups))
admin_groups = toset([for k,v in var.groups: k if v.admin])
}
// create or remove groups within databricks - all governed by "groups" variable
resource "databricks_group" "this" {
for_each = data.azuread_group.this
display_name = each.key
external_id = data.azuread_group.this[each.key].object_id
workspace_access = var.groups[each.key].workspace_access
databricks_sql_access = var.groups[each.key].databricks_sql_access
allow_cluster_create = var.groups[each.key].allow_cluster_create
allow_instance_pool_create = var.groups[each.key].allow_instance_pool_create
force = true
}
locals {
all_members = toset(flatten([for group in values(data.azuread_group.this) : group.members] ))
}
// Extract information about real users
data "azuread_users" "users" {
ignore_missing = true
object_ids = local.all_members
}
locals {
all_users = {
for user in data.azuread_users.users.users: user.object_id => user
}
}
// all governed by AzureAD, create or remove users from databricks workspace
resource "databricks_user" "this" {
for_each = local.all_users
user_name = lower(local.all_users[each.key]["user_principal_name"])
display_name = local.all_users[each.key]["display_name"]
active = local.all_users[each.key]["account_enabled"]
external_id = each.key
force = true
}
// Provision Service Principals
data "azuread_service_principals" "spns" {
object_ids = toset(setsubtract(local.all_members, data.azuread_users.users.object_ids))
}
locals {
all_spns = {
for sp in data.azuread_service_principals.spns.service_principals: sp.object_id => sp
}
}
resource "databricks_service_principal" "sp" {
for_each = local.all_spns
application_id = local.all_spns[each.key]["application_id"]
display_name = local.all_spns[each.key]["display_name"]
active = local.all_spns[each.key]["account_enabled"]
external_id = each.key
force = true
}
locals {
merged_data = merge(databricks_user.this, databricks_service_principal.sp)
}
// put users to respective groups
resource "databricks_group_member" "this" {
for_each = toset(flatten([
for group, details in data.azuread_group.this : [
for member in details["members"] : jsonencode({
group = databricks_group.this[group].id,
member = local.merged_data[member].id
})
]
]))
group_id = jsondecode(each.value).group
member_id = jsondecode(each.value).member
}
// Provisioning Admins
data "azuread_group" "admins" {
for_each = local.admin_groups
display_name = each.value
}
data "databricks_group" "admins" {
display_name = "admins"
}
resource "databricks_group_member" "admins" {
for_each = toset(flatten([
for group, details in data.azuread_group.admins : [
for member in details["members"] : local.merged_data[member].id
]
]))
group_id = data.databricks_group.admins.id
member_id = each.value
}
terraform {
required_providers {
databricks = {
source = "databricks/databricks"
version = "1.1.0"
}
azuread = {
source = "hashicorp/azuread"
version = "2.22.0"
}
}
}
provider "azuread" {
# Configuration options
}
provider "databricks" {
}
# we can use optional + defaults, then it would be easier to use, but it's an experimental feature
variable "groups" {
description = "Map of AAD group names into object describing workspace & Databricks SQL access permissions"
type = map(object({
workspace_access = bool
databricks_sql_access = bool
allow_cluster_create = bool
allow_instance_pool_create = bool
admin = bool # if this group for Databricks admins
}))
}
# Create a variable in the terraform.tfvars with following content
# groups = {
# "AAD Group Name" = {
# workspace_access = true
# databricks_sql_access = false
# allow_cluster_create = false
# allow_instance_pool_create = false
# admin = false
# }
# }
@sivadotblog
Copy link

sivadotblog commented Mar 16, 2023

PART 1

locals {
  all_groups = toset(keys(var.groups))
}

#Read AAD group & its members
data "azuread_group" "this" {
  for_each     = local.all_groups
  display_name = each.value
}

locals {
  all_group_members = toset(flatten([for group in values(data.azuread_group.this) : group.members]))
}

#Create those AAD Group in Azure Databricks with respective entitelements 

resource "databricks_group" "this" {
  for_each                   = data.azuread_group.this
  display_name               = each.key
  external_id                = data.azuread_group.this[each.key].object_id
  workspace_access           = var.groups[each.key].workspace_access
  databricks_sql_access      = var.groups[each.key].databricks_sql_access
  allow_cluster_create       = var.groups[each.key].allow_cluster_create
  allow_instance_pool_create = var.groups[each.key].allow_instance_pool_create
  force                      = true
}

locals {
  group_members = [
    for group, details in data.azuread_group.this : details.members
  ]
}

# Read AAD Member info from AAD Group Members
data "azuread_users" "users" {
  ignore_missing = true
  object_ids     = flatten(local.group_members)
}

locals {
  all_aad_users = {
    for user in distinct(data.azuread_users.users.users) : user.object_id => user
  }
}

#remove duplicates n all_aad_users




// all governed by AzureAD, create or remove users from databricks workspace

resource "databricks_user" "this" {
  for_each     = local.all_aad_users
  user_name    = lower(local.all_aad_users[each.key]["user_principal_name"])
  display_name = local.all_aad_users[each.key]["display_name"]
  active       = local.all_aad_users[each.key]["account_enabled"]
  external_id  = each.key
  force        = true
}
#SPN
locals {
  all_spns = toset(keys(var.spns))
}


data "databricks_group" "admins" {
  display_name = "admins"
}

resource "databricks_service_principal" "spn" {
  for_each       = local.all_spns
  application_id = each.key
}

resource "databricks_group_member" "admin_spn" {
  for_each  = databricks_service_principal.spn
  group_id  = data.databricks_group.admins.id
  member_id = each.value.id
  depends_on = [
    databricks_service_principal.spn
  ]
}

PART 2

locals {
  all_groups   = toset(keys(var.groups))
  admin_groups = toset([for k, v in var.groups : k if v.admin])
}
data "azuread_group" "this" {
  for_each     = local.all_groups
  display_name = each.value
}

locals {
  group_members = [
    for group, details in data.azuread_group.this : details.members
  ]
}
data "azuread_users" "users" {
  ignore_missing = true
  object_ids     = flatten(local.group_members)
}

data "databricks_group" "users" {
  display_name = "users"
}

data "databricks_user" "all_users" {
  for_each = data.databricks_group.users.users
  user_id  = each.value
}

locals {
  all_adb_user = {
    for user in data.databricks_user.all_users : user.user_name => user.id
  }
}
data "databricks_group" "all_groups" {
  for_each     = local.all_groups
  display_name = each.value
}

locals {
  adb_grp_id = {
    for group in data.databricks_group.all_groups : group.display_name => group.id
  }
}

locals {
  aad_user_emails = {
    for user in data.azuread_users.users.users : user.object_id => lower(user.user_principal_name)
  }
}


locals {
  az_grp_mem_map = toset(flatten([
    for group, details in data.azuread_group.this : [
      for member in details["members"] : {
        group  = lookup(local.adb_grp_id, group)
        member = lookup(local.all_adb_user, lookup(local.aad_user_emails, member, "not_found"), "not_found")
      }
    ]
  ]))

}

resource "databricks_group_member" "this" {

  for_each = { for data, member in local.az_grp_mem_map : data.member => data if member.member != "not_found" }

  group_id  = each.value.group
  member_id = each.value.member
}

data "azuread_group" "admins" {
  for_each     = local.admin_groups
  display_name = each.value
}

data "databricks_group" "admins" {
  display_name = "admins"
}

locals {
  az_admin_mem = toset(flatten([
    for group, details in data.azuread_group.admins : [
      for member in details["members"] : {
        group  = data.databricks_group.admins.id
        member = lookup(local.all_adb_user, lookup(local.aad_user_emails, member, "not_found"), "not_found")
      }
    ]
  ]))

}

resource "databricks_group_member" "admins" {

  for_each = { for data, member in local.az_admin_mem : data.member => data if member.member != "not_found" }

  group_id  = each.value.group
  member_id = each.value.member
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment