Skip to content

Commit

Permalink
Merge pull request #3520 from annuay-google/annuay/fix-min-master-ver…
Browse files Browse the repository at this point in the history
…sion

Use version prefix in conjunction with release channels
  • Loading branch information
annuay-google authored Jan 9, 2025
2 parents 43585e3 + 443b895 commit a0c6937
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 9 deletions.
15 changes: 9 additions & 6 deletions examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ deployment_groups:
source: modules/scheduler/gke-cluster
use: [gke-a3-ultra-net-0]
settings:
release_channel: RAPID
system_node_pool_machine_type: "e2-standard-16"
system_node_pool_disk_size_gb: $(vars.system_node_pool_disk_size_gb)
system_node_pool_taints: []
Expand All @@ -106,11 +105,6 @@ deployment_groups:
master_authorized_networks:
- cidr_block: $(vars.authorized_cidr) # Allows your machine to run the kubectl command. Required for multi network setup.
display_name: "kubectl-access-network"
maintenance_exclusions:
- name: no-minor-or-node-upgrades-indefinite
start_time: "2024-12-01T00:00:00Z"
end_time: "2025-12-22T00:00:00Z"
exclusion_scope: NO_MINOR_OR_NODE_UPGRADES
additional_networks:
$(concat(
[{
Expand All @@ -127,6 +121,15 @@ deployment_groups:
}],
gke-a3-ultra-rdma-net.subnetwork_interfaces_gke
))
# Cluster versions cannot be updated through the toolkit after creation
# Please manage cluster version from the Google Cloud Console directly
version_prefix: "1.31."
release_channel: RAPID
maintenance_exclusions:
- name: no-minor-or-node-upgrades-indefinite
start_time: "2024-12-01T00:00:00Z"
end_time: "2025-12-22T00:00:00Z"
exclusion_scope: NO_MINOR_OR_NODE_UPGRADES
outputs: [instructions]

- id: a3-ultragpu-pool
Expand Down
2 changes: 2 additions & 0 deletions modules/scheduler/gke-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ limitations under the License.
|------|------|
| [google-beta_google_container_cluster.gke_cluster](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_container_cluster) | resource |
| [google-beta_google_container_node_pool.system_node_pools](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_container_node_pool) | resource |
| [google-beta_google_container_engine_versions.version_prefix_filter](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/data-sources/google_container_engine_versions) | data source |
| [google_client_config.default](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/client_config) | data source |
| [google_project.project](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/project) | data source |

Expand Down Expand Up @@ -192,6 +193,7 @@ limitations under the License.
| <a name="input_timeout_create"></a> [timeout\_create](#input\_timeout\_create) | Timeout for creating a node pool | `string` | `null` | no |
| <a name="input_timeout_update"></a> [timeout\_update](#input\_timeout\_update) | Timeout for updating a node pool | `string` | `null` | no |
| <a name="input_upgrade_settings"></a> [upgrade\_settings](#input\_upgrade\_settings) | Defines gke cluster upgrade settings. It is highly recommended that you define all max\_surge and max\_unavailable.<br/>If max\_surge is not specified, it would be set to a default value of 0.<br/>If max\_unavailable is not specified, it would be set to a default value of 1. | <pre>object({<br/> strategy = string<br/> max_surge = optional(number)<br/> max_unavailable = optional(number)<br/> })</pre> | <pre>{<br/> "max_surge": 0,<br/> "max_unavailable": 1,<br/> "strategy": "SURGE"<br/>}</pre> | no |
| <a name="input_version_prefix"></a> [version\_prefix](#input\_version\_prefix) | If provided, Terraform will only return versions that match the string prefix. For example, `1.31.` will match all `1.31` series releases. Since this is just a string match, it's recommended that you append a `.` after minor versions to ensure that prefixes such as `1.3` don't match versions like `1.30.1-gke.10` accidentally. | `string` | `"1.31."` | no |
| <a name="input_zone"></a> [zone](#input\_zone) | Zone for a zonal cluster. | `string` | `null` | no |

## Outputs
Expand Down
18 changes: 15 additions & 3 deletions modules/scheduler/gke-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,16 @@ data "google_project" "project" {
project_id = var.project_id
}

data "google_container_engine_versions" "version_prefix_filter" {
provider = google-beta
location = var.cluster_availability_type == "ZONAL" ? var.zone : var.region
version_prefix = var.version_prefix
}

locals {
master_version = var.min_master_version != null ? var.min_master_version : data.google_container_engine_versions.version_prefix_filter.latest_master_version
}

resource "google_container_cluster" "gke_cluster" {
provider = google-beta

Expand Down Expand Up @@ -159,7 +169,7 @@ resource "google_container_cluster" "gke_cluster" {
release_channel {
channel = var.release_channel
}
min_master_version = var.min_master_version
min_master_version = local.master_version

maintenance_policy {
daily_maintenance_window {
Expand Down Expand Up @@ -212,7 +222,8 @@ resource "google_container_cluster" "gke_cluster" {
lifecycle {
# Ignore all changes to the default node pool. It's being removed after creation.
ignore_changes = [
node_config
node_config,
min_master_version,
]
precondition {
condition = var.default_max_pods_per_node == null || var.networking_mode == "VPC_NATIVE"
Expand Down Expand Up @@ -250,7 +261,7 @@ resource "google_container_node_pool" "system_node_pools" {
name = var.system_node_pool_name
cluster = var.cluster_reference_type == "NAME" ? google_container_cluster.gke_cluster.name : google_container_cluster.gke_cluster.self_link
location = var.cluster_availability_type == "ZONAL" ? var.zone : var.region
version = var.min_master_version
version = local.master_version

autoscaling {
total_min_node_count = var.system_node_pool_node_count.total_min_nodes
Expand Down Expand Up @@ -319,6 +330,7 @@ resource "google_container_node_pool" "system_node_pools" {
ignore_changes = [
node_config[0].labels,
node_config[0].taint,
version,
]
precondition {
condition = contains(["SURGE"], local.upgrade_settings.strategy)
Expand Down
6 changes: 6 additions & 0 deletions modules/scheduler/gke-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ variable "min_master_version" {
default = null
}

variable "version_prefix" {
description = "If provided, Terraform will only return versions that match the string prefix. For example, `1.31.` will match all `1.31` series releases. Since this is just a string match, it's recommended that you append a `.` after minor versions to ensure that prefixes such as `1.3` don't match versions like `1.30.1-gke.10` accidentally."
type = string
default = "1.31."
}

variable "maintenance_start_time" {
description = "Start time for daily maintenance operations. Specified in GMT with `HH:MM` format."
type = string
Expand Down

0 comments on commit a0c6937

Please sign in to comment.