From f53bd9730bd04f7290013b6818fe4f41b1f7409f Mon Sep 17 00:00:00 2001 From: Dima Date: Thu, 7 Mar 2024 01:08:14 +0200 Subject: [PATCH 1/3] fix: support single node --- README.md | 6 +++--- cluster.tf | 44 +++++++++++++++++++++++++++++++------------- variables.tf | 2 ++ versions.tf | 2 +- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 2790a57..aef3662 100644 --- a/README.md +++ b/README.md @@ -164,14 +164,14 @@ module "metastore_assignment" { |------|---------| | [terraform](#requirement\_terraform) | >=1.0.0 | | [azurerm](#requirement\_azurerm) | >=3.40.0 | -| [databricks](#requirement\_databricks) | >=1.14.2 | +| [databricks](#requirement\_databricks) | >=1.38.0 | ## Providers | Name | Version | |------|---------| | [azurerm](#provider\_azurerm) | >=3.40.0 | -| [databricks](#provider\_databricks) | >=1.14.2 | +| [databricks](#provider\_databricks) | >=1.38.0 | ## Modules @@ -213,7 +213,7 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [clusters](#input\_clusters) | Set of objects with parameters to configure Databricks clusters and assign permissions to it for certain custom groups |
set(object({
cluster_name = string
spark_version = optional(string, "13.3.x-scala2.12")
spark_conf = optional(map(any), {})
cluster_conf_passthrought = optional(bool, false)
spark_env_vars = optional(map(any), {})
data_security_mode = optional(string, "USER_ISOLATION")
node_type_id = optional(string, "Standard_D3_v2")
autotermination_minutes = optional(number, 30)
min_workers = optional(number, 1)
max_workers = optional(number, 2)
availability = optional(string, "ON_DEMAND_AZURE")
first_on_demand = optional(number, 0)
spot_bid_max_price = optional(number, 1)
cluster_log_conf_destination = optional(string, null)
init_scripts_workspace = optional(set(string), [])
init_scripts_volumes = optional(set(string), [])
init_scripts_dbfs = optional(set(string), [])
init_scripts_abfss = optional(set(string), [])
single_user_name = optional(string, null)
permissions = optional(set(object({
group_name = string
permission_level = string
})), [])
pypi_library_repository = optional(set(string), [])
maven_library_repository = optional(set(object({
coordinates = string
exclusions = set(string)
})), [])
}))
| `[]` | no | +| [clusters](#input\_clusters) | Set of objects with parameters to configure Databricks clusters and assign permissions to it for certain custom groups |
set(object({
cluster_name = string
spark_version = optional(string, "13.3.x-scala2.12")
spark_conf = optional(map(any), {})
cluster_conf_passthrought = optional(bool, false)
spark_env_vars = optional(map(any), {})
data_security_mode = optional(string, "USER_ISOLATION")
node_type_id = optional(string, "Standard_D3_v2")
autotermination_minutes = optional(number, 30)
min_workers = optional(number, 1)
max_workers = optional(number, 2)
availability = optional(string, "ON_DEMAND_AZURE")
first_on_demand = optional(number, 0)
spot_bid_max_price = optional(number, 1)
cluster_log_conf_destination = optional(string, null)
init_scripts_workspace = optional(set(string), [])
init_scripts_volumes = optional(set(string), [])
init_scripts_dbfs = optional(set(string), [])
init_scripts_abfss = optional(set(string), [])
single_user_name = optional(string, null)
single_node_enable = optional(bool, false)
custom_tags = optional(map(string), { "ResourceClass" = "SingleNode" })
permissions = optional(set(object({
group_name = string
permission_level = string
})), [])
pypi_library_repository = optional(set(string), [])
maven_library_repository = optional(set(object({
coordinates = string
exclusions = set(string)
})), [])
}))
| `[]` | no | | [create\_databricks\_access\_policy\_to\_key\_vault](#input\_create\_databricks\_access\_policy\_to\_key\_vault) | Boolean flag to enable creation of Key Vault Access Policy for Databricks Global Service Principal. | `bool` | `true` | no | | [custom\_cluster\_policies](#input\_custom\_cluster\_policies) | Provides an ability to create custom cluster policy, assign it to cluster and grant CAN\_USE permissions on it to certain custom groups
name - name of custom cluster policy to create
can\_use - list of string, where values are custom group names, there groups have to be created with Terraform;
definition - JSON document expressed in Databricks Policy Definition Language. No need to call 'jsonencode()' function on it when providing a value; |
list(object({
name = string
can_use = list(string)
definition = any
}))
|
[
{
"can_use": null,
"definition": null,
"name": null
}
]
| no | | [global\_databricks\_sp\_object\_id](#input\_global\_databricks\_sp\_object\_id) | Global 'AzureDatabricks' SP object id. Used to create Key Vault Access Policy for Secret Scope | `string` | `"9b38785a-6e08-4087-a0c4-20634343f21f"` | no | diff --git a/cluster.tf b/cluster.tf index 3fe51a1..b7c2643 100644 --- a/cluster.tf +++ b/cluster.tf @@ -1,29 +1,47 @@ +locals { + spark_conf_single_node = { + "spark.master" = "local[*]" + "spark.databricks.cluster.profile" = "singleNode" + } + conf_passthrought = { + "spark.databricks.cluster.profile" : "serverless", + "spark.databricks.repl.allowedLanguages" : "python,sql", + "spark.databricks.passthrough.enabled" : "true", + "spark.databricks.pyspark.enableProcessIsolation" : "true" + } +} + resource "databricks_cluster" "cluster" { for_each = { for cluster in var.clusters : cluster.cluster_name => cluster } cluster_name = each.value.cluster_name spark_version = each.value.spark_version - spark_conf = each.value.cluster_conf_passthrought ? merge({ - "spark.databricks.cluster.profile" : "serverless", - "spark.databricks.repl.allowedLanguages" : "python,sql", - "spark.databricks.passthrough.enabled" : "true", - "spark.databricks.pyspark.enableProcessIsolation" : "true" - }, each.value.spark_conf) : each.value.spark_conf + spark_conf = merge( + each.value.cluster_conf_passthrought ? local.conf_passthrought : {}, + each.value.single_node_enable == true ? local.spark_conf_single_node : {}, + each.value.spark_conf) spark_env_vars = each.value.spark_env_vars data_security_mode = each.value.cluster_conf_passthrought ? null : each.value.data_security_mode node_type_id = each.value.node_type_id autotermination_minutes = each.value.autotermination_minutes single_user_name = each.value.single_user_name + custom_tags = each.value.single_node_enable ? each.value.custom_tags : null - autoscale { - min_workers = each.value.min_workers - max_workers = each.value.max_workers + dynamic "azure_attributes" { + for_each = each.value.single_node_enable == true ? [] : [1] + content { + availability = each.value.availability + first_on_demand = each.value.first_on_demand + spot_bid_max_price = each.value.spot_bid_max_price + } } - azure_attributes { - availability = each.value.availability - first_on_demand = each.value.first_on_demand - spot_bid_max_price = each.value.spot_bid_max_price + dynamic "autoscale" { + for_each = each.value.single_node_enable == true ? [] : [1] + content { + min_workers = each.value.min_workers + max_workers = each.value.max_workers + } } dynamic "cluster_log_conf" { diff --git a/variables.tf b/variables.tf index 515600f..7cdd912 100644 --- a/variables.tf +++ b/variables.tf @@ -166,6 +166,8 @@ variable "clusters" { init_scripts_dbfs = optional(set(string), []) init_scripts_abfss = optional(set(string), []) single_user_name = optional(string, null) + single_node_enable = optional(bool, false) + custom_tags = optional(map(string), { "ResourceClass" = "SingleNode" }) permissions = optional(set(object({ group_name = string permission_level = string diff --git a/versions.tf b/versions.tf index 60901f7..634ca9c 100644 --- a/versions.tf +++ b/versions.tf @@ -8,7 +8,7 @@ terraform { } databricks = { source = "databricks/databricks" - version = ">=1.14.2" + version = ">=1.38.0" } } } From dff3af8455ff07b08a7cc7adbf156fef4b6ef87f Mon Sep 17 00:00:00 2001 From: Dima Date: Thu, 7 Mar 2024 15:54:06 +0200 Subject: [PATCH 2/3] fix: changed tags --- README.md | 6 +++--- cluster.tf | 2 +- variables.tf | 2 +- versions.tf | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index aef3662..73e8549 100644 --- a/README.md +++ b/README.md @@ -164,14 +164,14 @@ module "metastore_assignment" { |------|---------| | [terraform](#requirement\_terraform) | >=1.0.0 | | [azurerm](#requirement\_azurerm) | >=3.40.0 | -| [databricks](#requirement\_databricks) | >=1.38.0 | +| [databricks](#requirement\_databricks) | >=1.30.0 | ## Providers | Name | Version | |------|---------| | [azurerm](#provider\_azurerm) | >=3.40.0 | -| [databricks](#provider\_databricks) | >=1.38.0 | +| [databricks](#provider\_databricks) | >=1.30.0 | ## Modules @@ -213,7 +213,7 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [clusters](#input\_clusters) | Set of objects with parameters to configure Databricks clusters and assign permissions to it for certain custom groups |
set(object({
cluster_name = string
spark_version = optional(string, "13.3.x-scala2.12")
spark_conf = optional(map(any), {})
cluster_conf_passthrought = optional(bool, false)
spark_env_vars = optional(map(any), {})
data_security_mode = optional(string, "USER_ISOLATION")
node_type_id = optional(string, "Standard_D3_v2")
autotermination_minutes = optional(number, 30)
min_workers = optional(number, 1)
max_workers = optional(number, 2)
availability = optional(string, "ON_DEMAND_AZURE")
first_on_demand = optional(number, 0)
spot_bid_max_price = optional(number, 1)
cluster_log_conf_destination = optional(string, null)
init_scripts_workspace = optional(set(string), [])
init_scripts_volumes = optional(set(string), [])
init_scripts_dbfs = optional(set(string), [])
init_scripts_abfss = optional(set(string), [])
single_user_name = optional(string, null)
single_node_enable = optional(bool, false)
custom_tags = optional(map(string), { "ResourceClass" = "SingleNode" })
permissions = optional(set(object({
group_name = string
permission_level = string
})), [])
pypi_library_repository = optional(set(string), [])
maven_library_repository = optional(set(object({
coordinates = string
exclusions = set(string)
})), [])
}))
| `[]` | no | +| [clusters](#input\_clusters) | Set of objects with parameters to configure Databricks clusters and assign permissions to it for certain custom groups |
set(object({
cluster_name = string
spark_version = optional(string, "13.3.x-scala2.12")
spark_conf = optional(map(any), {})
cluster_conf_passthrought = optional(bool, false)
spark_env_vars = optional(map(any), {})
data_security_mode = optional(string, "USER_ISOLATION")
node_type_id = optional(string, "Standard_D3_v2")
autotermination_minutes = optional(number, 30)
min_workers = optional(number, 1)
max_workers = optional(number, 2)
availability = optional(string, "ON_DEMAND_AZURE")
first_on_demand = optional(number, 0)
spot_bid_max_price = optional(number, 1)
cluster_log_conf_destination = optional(string, null)
init_scripts_workspace = optional(set(string), [])
init_scripts_volumes = optional(set(string), [])
init_scripts_dbfs = optional(set(string), [])
init_scripts_abfss = optional(set(string), [])
single_user_name = optional(string, null)
single_node_enable = optional(bool, false)
custom_tags = optional(map(string), {})
permissions = optional(set(object({
group_name = string
permission_level = string
})), [])
pypi_library_repository = optional(set(string), [])
maven_library_repository = optional(set(object({
coordinates = string
exclusions = set(string)
})), [])
}))
| `[]` | no | | [create\_databricks\_access\_policy\_to\_key\_vault](#input\_create\_databricks\_access\_policy\_to\_key\_vault) | Boolean flag to enable creation of Key Vault Access Policy for Databricks Global Service Principal. | `bool` | `true` | no | | [custom\_cluster\_policies](#input\_custom\_cluster\_policies) | Provides an ability to create custom cluster policy, assign it to cluster and grant CAN\_USE permissions on it to certain custom groups
name - name of custom cluster policy to create
can\_use - list of string, where values are custom group names, there groups have to be created with Terraform;
definition - JSON document expressed in Databricks Policy Definition Language. No need to call 'jsonencode()' function on it when providing a value; |
list(object({
name = string
can_use = list(string)
definition = any
}))
|
[
{
"can_use": null,
"definition": null,
"name": null
}
]
| no | | [global\_databricks\_sp\_object\_id](#input\_global\_databricks\_sp\_object\_id) | Global 'AzureDatabricks' SP object id. Used to create Key Vault Access Policy for Secret Scope | `string` | `"9b38785a-6e08-4087-a0c4-20634343f21f"` | no | diff --git a/cluster.tf b/cluster.tf index b7c2643..7bdb96e 100644 --- a/cluster.tf +++ b/cluster.tf @@ -25,7 +25,7 @@ resource "databricks_cluster" "cluster" { node_type_id = each.value.node_type_id autotermination_minutes = each.value.autotermination_minutes single_user_name = each.value.single_user_name - custom_tags = each.value.single_node_enable ? each.value.custom_tags : null + custom_tags = merge(each.value.single_node_enable ? { "ResourceClass" = "SingleNode" } : {}, each.value.custom_tags) dynamic "azure_attributes" { for_each = each.value.single_node_enable == true ? [] : [1] diff --git a/variables.tf b/variables.tf index 7cdd912..96b478e 100644 --- a/variables.tf +++ b/variables.tf @@ -167,7 +167,7 @@ variable "clusters" { init_scripts_abfss = optional(set(string), []) single_user_name = optional(string, null) single_node_enable = optional(bool, false) - custom_tags = optional(map(string), { "ResourceClass" = "SingleNode" }) + custom_tags = optional(map(string), {}) permissions = optional(set(object({ group_name = string permission_level = string diff --git a/versions.tf b/versions.tf index 634ca9c..471f531 100644 --- a/versions.tf +++ b/versions.tf @@ -8,7 +8,7 @@ terraform { } databricks = { source = "databricks/databricks" - version = ">=1.38.0" + version = ">=1.30.0" } } } From 2936d1de06690ea78107ad0bb4a1f68bf632c3c8 Mon Sep 17 00:00:00 2001 From: Dima Date: Mon, 11 Mar 2024 10:53:24 +0200 Subject: [PATCH 3/3] fix: changed cluster attribute --- cluster.tf | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cluster.tf b/cluster.tf index 7bdb96e..dc486cd 100644 --- a/cluster.tf +++ b/cluster.tf @@ -27,17 +27,14 @@ resource "databricks_cluster" "cluster" { single_user_name = each.value.single_user_name custom_tags = merge(each.value.single_node_enable ? { "ResourceClass" = "SingleNode" } : {}, each.value.custom_tags) - dynamic "azure_attributes" { - for_each = each.value.single_node_enable == true ? [] : [1] - content { - availability = each.value.availability - first_on_demand = each.value.first_on_demand - spot_bid_max_price = each.value.spot_bid_max_price - } + azure_attributes { + availability = each.value.availability + first_on_demand = each.value.first_on_demand + spot_bid_max_price = each.value.spot_bid_max_price } dynamic "autoscale" { - for_each = each.value.single_node_enable == true ? [] : [1] + for_each = each.value.single_node_enable ? [] : [1] content { min_workers = each.value.min_workers max_workers = each.value.max_workers