Skip to content

Commit

Permalink
EKS cluster ignores subnet_ids changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
miguelhar committed Mar 4, 2025
1 parent 068d13f commit 4f96c3e
Show file tree
Hide file tree
Showing 11 changed files with 113 additions and 60 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ parameters:
default: "v3.11.2"
terraform_version:
type: string
default: "1.9.3"
default: "1.10.5"
hcledit_version:
type: string
default: "0.2.9"
Expand Down
3 changes: 2 additions & 1 deletion modules/eks/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ resource "aws_eks_cluster" "this" {
lifecycle {
ignore_changes = [
encryption_config,
kubernetes_network_config
kubernetes_network_config,
vpc_config[0].subnet_ids
]
}
}
Expand Down
2 changes: 1 addition & 1 deletion modules/infra/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ module "bastion" {
locals {
cost_usage_report_info = var.domino_cur.provision_cost_usage_report && length(module.cost_usage_report) > 0 ? module.cost_usage_report[0].info : null
bastion_info = var.bastion.enabled && length(module.bastion) > 0 ? module.bastion[0].info : null
add_s3_pol = local.create_s3 ? [module.storage.info.s3.iam_policy_arn] : []
add_s3_pol = [module.storage.info.s3.iam_policy_arn]
add_ecr_pol = local.create_ecr ? concat([module.storage.info.ecr.iam_policy_arn], local.add_s3_pol) : local.add_s3_pol
node_iam_policies = local.cost_usage_report_info != null ? concat(local.add_ecr_pol, [local.cost_usage_report_info.cur_iam_policy_arn]) : local.add_ecr_pol
}
2 changes: 1 addition & 1 deletion modules/infra/submodules/network/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ output "info" {
eips = [for k, eip in aws_eip.public : eip.public_ip]
vpc_cidrs = local.create_vpc ? aws_vpc.this[0].cidr_block : data.aws_vpc.provided[0].cidr_block
pod_cidrs = local.pod_cidr_blocks
s3_cidrs = local.create_vpc && var.network.create_s3_endpoint ? data.aws_prefix_list.s3[0].cidr_blocks : null
s3_cidrs = local.create_vpc ? data.aws_prefix_list.s3[0].cidr_blocks : null
ecr_endpoint = local.create_ecr_endpoint ? {
security_group_id = aws_security_group.ecr_endpoint[0].id
} : null
Expand Down
11 changes: 6 additions & 5 deletions modules/infra/submodules/network/vpc.tf
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ resource "aws_vpc_endpoint" "s3" {
}
}


data "aws_prefix_list" "s3" {
count = local.create_vpc ? 1 : 0
prefix_list_id = aws_vpc_endpoint.s3[0].prefix_list_id
}

resource "aws_security_group" "s3_endpoint" {
count = local.create_s3_endpoint ? 1 : 0
name = "${var.deploy_id}-s3-endpoint"
Expand Down Expand Up @@ -82,11 +88,6 @@ resource "aws_vpc_endpoint" "s3_interface" {
depends_on = [aws_vpc_endpoint.s3]
}

data "aws_prefix_list" "s3" {
count = local.create_vpc && var.network.create_s3_endpoint ? 1 : 0
prefix_list_id = aws_vpc_endpoint.s3[0].prefix_list_id
}

resource "aws_security_group" "ecr_endpoint" {
count = local.create_ecr_endpoint ? 1 : 0
name = "${var.deploy_id}-ecr"
Expand Down
4 changes: 2 additions & 2 deletions modules/infra/submodules/storage/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ No modules.
| [aws_ecr_repository.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ecr_repository) | resource |
| [aws_efs_access_point.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/efs_access_point) | resource |
| [aws_efs_file_system.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/efs_file_system) | resource |
| [aws_efs_mount_target.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/efs_mount_target) | resource |
| [aws_efs_mount_target.eks_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/efs_mount_target) | resource |
| [aws_fsx_ontap_file_system.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/fsx_ontap_file_system) | resource |
| [aws_fsx_ontap_storage_virtual_machine.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/fsx_ontap_storage_virtual_machine) | resource |
| [aws_fsx_ontap_volume.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/fsx_ontap_volume) | resource |
Expand Down Expand Up @@ -95,7 +95,7 @@ No modules.
|------|-------------|------|---------|:--------:|
| <a name="input_deploy_id"></a> [deploy\_id](#input\_deploy\_id) | Domino Deployment ID | `string` | n/a | yes |
| <a name="input_kms_info"></a> [kms\_info](#input\_kms\_info) | key\_id = KMS key id.<br/> key\_arn = KMS key arn.<br/> enabled = KMS key is enabled | <pre>object({<br/> key_id = string<br/> key_arn = string<br/> enabled = bool<br/> })</pre> | n/a | yes |
| <a name="input_network_info"></a> [network\_info](#input\_network\_info) | id = VPC ID.<br/> subnets = {<br/> public = List of public Subnets.<br/> [{<br/> name = Subnet name.<br/> subnet\_id = Subnet ud<br/> az = Subnet availability\_zone<br/> az\_id = Subnet availability\_zone\_id<br/> }]<br/> private = List of private Subnets.<br/> [{<br/> name = Subnet name.<br/> subnet\_id = Subnet ud<br/> az = Subnet availability\_zone<br/> az\_id = Subnet availability\_zone\_id<br/> }]<br/> pod = List of pod Subnets.<br/> [{<br/> name = Subnet name.<br/> subnet\_id = Subnet ud<br/> az = Subnet availability\_zone<br/> az\_id = Subnet availability\_zone\_id<br/> }]<br/> } | <pre>object({<br/> vpc_id = string<br/> subnets = object({<br/> public = optional(list(object({<br/> name = string<br/> subnet_id = string<br/> az = string<br/> az_id = string<br/> })), [])<br/> private = list(object({<br/> name = string<br/> subnet_id = string<br/> az = string<br/> az_id = string<br/> }))<br/> pod = optional(list(object({<br/> name = string<br/> subnet_id = string<br/> az = string<br/> az_id = string<br/> })), [])<br/> })<br/> })</pre> | n/a | yes |
| <a name="input_network_info"></a> [network\_info](#input\_network\_info) | id = VPC ID.<br/> subnets = {<br/> public = List of public Subnets.<br/> [{<br/> name = Subnet name.<br/> subnet\_id = Subnet ud<br/> az = Subnet availability\_zone<br/> az\_id = Subnet availability\_zone\_id<br/> }]<br/> private = List of private Subnets.<br/> [{<br/> name = Subnet name.<br/> subnet\_id = Subnet id<br/> az = Subnet availability\_zone<br/> az\_id = Subnet availability\_zone\_id<br/> }]<br/> pod = List of pod Subnets.<br/> [{<br/> name = Subnet name.<br/> subnet\_id = Subnet ud<br/> az = Subnet availability\_zone<br/> az\_id = Subnet availability\_zone\_id<br/> }]<br/> } | <pre>object({<br/> vpc_id = string<br/> subnets = object({<br/> public = optional(list(object({<br/> name = string<br/> subnet_id = string<br/> az = string<br/> az_id = string<br/> })), [])<br/> private = list(object({<br/> name = string<br/> subnet_id = string<br/> az = string<br/> az_id = string<br/> }))<br/> pod = optional(list(object({<br/> name = string<br/> subnet_id = string<br/> az = string<br/> az_id = string<br/> })), [])<br/> })<br/> })</pre> | n/a | yes |
| <a name="input_region"></a> [region](#input\_region) | AWS region for the deployment | `string` | n/a | yes |
| <a name="input_storage"></a> [storage](#input\_storage) | storage = {<br/> filesystem\_type = File system type(netapp\|efs\|none)<br/> efs = {<br/> access\_point\_path = Filesystem path for efs.<br/> backup\_vault = {<br/> create = Create backup vault for EFS toggle.<br/> force\_destroy = Toggle to allow automatic destruction of all backups when destroying.<br/> backup = {<br/> schedule = Cron-style schedule for EFS backup vault (default: once a day at 12pm).<br/> cold\_storage\_after = Move backup data to cold storage after this many days.<br/> delete\_after = Delete backup data after this many days.<br/> }<br/> }<br/> }<br/> netapp = {<br/> migrate\_from\_efs = {<br/> enabled = When enabled, both EFS and NetApp resources will be provisioned simultaneously during the migration period.<br/> datasync = {<br/> enabled = Toggle to enable AWS DataSync for automated data transfer from EFS to NetApp FSx.<br/> schedule = Cron-style schedule for the DataSync task, specifying how often the data transfer will occur (default: hourly).<br/> verify\_mode = One of: POINT\_IN\_TIME\_CONSISTENT, ONLY\_FILES\_TRANSFERRED, NONE.<br/> }<br/> }<br/> deployment\_type = netapp ontap deployment type,('MULTI\_AZ\_1', 'MULTI\_AZ\_2', 'SINGLE\_AZ\_1', 'SINGLE\_AZ\_2')<br/> storage\_capacity = Filesystem Storage capacity<br/> throughput\_capacity = Filesystem throughput capacity<br/> automatic\_backup\_retention\_days = How many days to keep backups<br/> daily\_automatic\_backup\_start\_time = Start time in 'HH:MM' format to initiate backups<br/><br/> storage\_capacity\_autosizing = Options for the FXN automatic storage capacity increase, cloudformation template<br/> enabled = Enable automatic storage capacity increase.<br/> threshold = Used storage capacity threshold.<br/> percent\_capacity\_increase = The percentage increase in storage capacity when used storage exceeds<br/> LowFreeDataStorageCapacityThreshold. Minimum increase is 10 %.<br/> notification\_email\_address = The email address for alarm notification.<br/> }<br/> volume = {<br/> create = Create a volume associated with the filesystem.<br/> name\_suffix = The suffix to name the volume<br/> storage\_efficiency\_enabled = Toggle storage\_efficiency\_enabled<br/> junction\_path = filesystem junction path<br/> size\_in\_megabytes = The size of the volume<br/> }<br/> s3 = {<br/> force\_destroy\_on\_deletion = Toogle to allow recursive deletion of all objects in the s3 buckets. if 'false' terraform will NOT be able to delete non-empty buckets.<br/> }<br/> ecr = {<br/> force\_destroy\_on\_deletion = Toogle to allow recursive deletion of all objects in the ECR repositories. if 'false' terraform will NOT be able to delete non-empty repositories.<br/> }<br/> enable\_remote\_backup = Enable tagging required for cross-account backups<br/> costs\_enabled = Determines whether to provision domino cost related infrastructures, ie, long term storage<br/> }<br/> } | <pre>object({<br/> filesystem_type = string<br/> efs = optional(object({<br/> access_point_path = optional(string)<br/> backup_vault = optional(object({<br/> create = optional(bool)<br/> force_destroy = optional(bool)<br/> backup = optional(object({<br/> schedule = optional(string)<br/> cold_storage_after = optional(number)<br/> delete_after = optional(number)<br/> }))<br/> }))<br/> }))<br/> netapp = optional(object({<br/> migrate_from_efs = optional(object({<br/> enabled = optional(bool)<br/> datasync = optional(object({<br/> enabled = optional(bool)<br/> target = optional(string)<br/> schedule = optional(string)<br/> verify_mode = optional(string)<br/> }))<br/> }))<br/> deployment_type = optional(string)<br/> storage_capacity = optional(number)<br/> throughput_capacity = optional(number)<br/> automatic_backup_retention_days = optional(number)<br/> daily_automatic_backup_start_time = optional(string)<br/> storage_capacity_autosizing = optional(object({<br/> enabled = optional(bool)<br/> threshold = optional(number)<br/> percent_capacity_increase = optional(number)<br/> notification_email_address = optional(string)<br/> }))<br/> volume = optional(object({<br/> name_suffix = optional(string)<br/> storage_efficiency_enabled = optional(bool)<br/> create = optional(bool)<br/> junction_path = optional(string)<br/> size_in_megabytes = optional(number)<br/> }))<br/> }))<br/> s3 = optional(object({<br/> create = optional(bool)<br/> force_destroy_on_deletion = optional(bool)<br/> }))<br/> ecr = optional(object({<br/> create = optional(bool)<br/> force_destroy_on_deletion = optional(bool)<br/> }))<br/> enable_remote_backup = optional(bool)<br/> costs_enabled = optional(bool)<br/> })</pre> | n/a | yes |
| <a name="input_use_fips_endpoint"></a> [use\_fips\_endpoint](#input\_use\_fips\_endpoint) | Use aws FIPS endpoints | `bool` | `false` | no |
Expand Down
20 changes: 12 additions & 8 deletions modules/infra/submodules/storage/efs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ resource "aws_efs_file_system" "eks" {
kms_key_id = local.kms_key_arn

tags = merge(local.backup_tagging, {
"Name" = var.deploy_id
"Name" = var.deploy_id
"migrated" = "aws_efs_mount_target"
})

lifecycle {
Expand All @@ -31,12 +32,11 @@ resource "aws_security_group" "efs" {
}
}


resource "aws_efs_mount_target" "eks" {
count = local.deploy_efs ? length(local.private_subnet_ids) : 0
resource "aws_efs_mount_target" "eks_cluster" {
for_each = local.deploy_efs ? { for sb in var.network_info.subnets.private : sb.name => sb } : {}
file_system_id = aws_efs_file_system.eks[0].id
security_groups = [aws_security_group.efs[0].id]
subnet_id = element(local.private_subnet_ids, count.index)
subnet_id = each.value.subnet_id
}


Expand Down Expand Up @@ -75,7 +75,11 @@ moved {
to = aws_efs_mount_target.eks[0]
}

moved {
from = aws_efs_access_point.eks
to = aws_efs_access_point.eks[0]

removed {
from = aws_efs_mount_target.eks

lifecycle {
destroy = false
}
}
2 changes: 1 addition & 1 deletion modules/infra/submodules/storage/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ variable "network_info" {
private = List of private Subnets.
[{
name = Subnet name.
subnet_id = Subnet ud
subnet_id = Subnet id
az = Subnet availability_zone
az_id = Subnet availability_zone_id
}]
Expand Down
52 changes: 25 additions & 27 deletions modules/nodes/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,37 +22,12 @@ locals {
taints = coalesce(ng.gpu, false) || anytrue([for itype in ng.instance_types : length(data.aws_ec2_instance_type.all[itype].gpus) > 0]) ? distinct(concat(local.gpu_taints, ng.taints)) : ng.taints
})
}
}

data "aws_ec2_instance_type_offerings" "nodes" {
for_each = {
for name, ng in local.node_groups :
name => ng.instance_types
}

filter {
name = "instance-type"
values = each.value
}

location_type = "availability-zone-id"
}

data "aws_ami" "custom" {
for_each = toset([for k, v in local.node_groups : v.ami if v.ami != null])

filter {
name = "image-id"
values = [each.value]
}
}

locals {
multi_zone_node_groups = [
for ng_name, ng in local.node_groups : {
ng_name = ng_name
sb_name = join("_", [for sb_name, sb in var.network_info.subnets.private : sb.az_id if contains(ng.availability_zone_ids, sb.az_id)])
subnet = { for sb_name, sb in var.network_info.subnets.private : sb_name => sb if contains(ng.availability_zone_ids, sb.az_id) }
subnet = { for sb in var.network_info.subnets.private : sb.name => sb if contains(ng.availability_zone_ids, sb.az_id) }
availability_zones = [for sb in var.network_info.subnets.private : sb.az if contains(ng.availability_zone_ids, sb.az_id)]
node_group = merge(ng, {
availability_zone_ids = [for sb in var.network_info.subnets.private : sb.az_id if contains(ng.availability_zone_ids, sb.az_id)]
Expand All @@ -66,7 +41,7 @@ locals {
for ng_name, ng in local.node_groups : [
for sb_name, sb in var.network_info.subnets.private : {
ng_name = ng_name
sb_name = sb_name
sb_name = sb.name
subnet = sb
node_group = merge(ng, {
availability_zone_ids = [sb.az_id]
Expand All @@ -82,6 +57,29 @@ locals {
node_groups_by_name = { for ngz in local.node_groups_per_zone : "${ngz.ng_name}-${ngz.sb_name}" => ngz }
}

data "aws_ec2_instance_type_offerings" "nodes" {
for_each = {
for name, ng in local.node_groups :
name => ng.instance_types
}

filter {
name = "instance-type"
values = each.value
}

location_type = "availability-zone-id"
}

data "aws_ami" "custom" {
for_each = toset([for k, v in local.node_groups : v.ami if v.ami != null])

filter {
name = "image-id"
values = [each.value]
}
}

resource "terraform_data" "calico_setup" {
count = try(fileexists(var.eks_info.k8s_pre_setup_sh_file), false) ? 1 : 0

Expand Down
3 changes: 2 additions & 1 deletion modules/nodes/nodes.tf
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,13 @@ data "aws_ssm_parameter" "eks_gpu_ami_release_version" {
name = "/aws/service/eks/optimized-ami/${var.eks_info.cluster.version}/amazon-linux-2023/x86_64/nvidia/recommended/release_version"
}


resource "aws_eks_node_group" "node_groups" {
for_each = local.node_groups_by_name
cluster_name = var.eks_info.cluster.specs.name
version = each.value.node_group.ami != null ? null : var.eks_info.cluster.version
release_version = each.value.node_group.ami != null ? null : (each.value.node_group.gpu ? nonsensitive(data.aws_ssm_parameter.eks_gpu_ami_release_version.value) : nonsensitive(data.aws_ssm_parameter.eks_ami_release_version.value))
node_group_name = "${var.eks_info.cluster.specs.name}-${each.key}"
node_group_name = each.key
node_role_arn = var.eks_info.nodes.roles[0].arn
subnet_ids = try(lookup(each.value.node_group, "single_nodegroup", false), false) ? [for s in values(each.value.subnet) : s.subnet_id] : [each.value.subnet.subnet_id]
force_update_version = true
Expand Down
Loading

0 comments on commit 4f96c3e

Please sign in to comment.