diff --git a/.circleci/config.yml b/.circleci/config.yml
index 536232b7..67f8df0c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -6,7 +6,7 @@ parameters:
default: "v3.11.2"
terraform_version:
type: string
- default: "1.9.3"
+ default: "1.10.5"
hcledit_version:
type: string
default: "0.2.9"
diff --git a/modules/eks/cluster.tf b/modules/eks/cluster.tf
index d082b971..12db3fe6 100644
--- a/modules/eks/cluster.tf
+++ b/modules/eks/cluster.tf
@@ -74,7 +74,8 @@ resource "aws_eks_cluster" "this" {
lifecycle {
ignore_changes = [
encryption_config,
- kubernetes_network_config
+ kubernetes_network_config,
+ vpc_config[0].subnet_ids
]
}
}
diff --git a/modules/infra/main.tf b/modules/infra/main.tf
index 62cb83f6..f1e7fb75 100644
--- a/modules/infra/main.tf
+++ b/modules/infra/main.tf
@@ -110,7 +110,7 @@ module "bastion" {
locals {
cost_usage_report_info = var.domino_cur.provision_cost_usage_report && length(module.cost_usage_report) > 0 ? module.cost_usage_report[0].info : null
bastion_info = var.bastion.enabled && length(module.bastion) > 0 ? module.bastion[0].info : null
- add_s3_pol = local.create_s3 ? [module.storage.info.s3.iam_policy_arn] : []
+ add_s3_pol = [module.storage.info.s3.iam_policy_arn]
add_ecr_pol = local.create_ecr ? concat([module.storage.info.ecr.iam_policy_arn], local.add_s3_pol) : local.add_s3_pol
node_iam_policies = local.cost_usage_report_info != null ? concat(local.add_ecr_pol, [local.cost_usage_report_info.cur_iam_policy_arn]) : local.add_ecr_pol
}
diff --git a/modules/infra/submodules/network/outputs.tf b/modules/infra/submodules/network/outputs.tf
index 7de8e87c..51011749 100644
--- a/modules/infra/submodules/network/outputs.tf
+++ b/modules/infra/submodules/network/outputs.tf
@@ -16,7 +16,7 @@ output "info" {
eips = [for k, eip in aws_eip.public : eip.public_ip]
vpc_cidrs = local.create_vpc ? aws_vpc.this[0].cidr_block : data.aws_vpc.provided[0].cidr_block
pod_cidrs = local.pod_cidr_blocks
- s3_cidrs = local.create_vpc && var.network.create_s3_endpoint ? data.aws_prefix_list.s3[0].cidr_blocks : null
+ s3_cidrs = local.create_vpc ? data.aws_prefix_list.s3[0].cidr_blocks : null
ecr_endpoint = local.create_ecr_endpoint ? {
security_group_id = aws_security_group.ecr_endpoint[0].id
} : null
diff --git a/modules/infra/submodules/network/vpc.tf b/modules/infra/submodules/network/vpc.tf
index 3b193311..80fb498c 100644
--- a/modules/infra/submodules/network/vpc.tf
+++ b/modules/infra/submodules/network/vpc.tf
@@ -49,6 +49,12 @@ resource "aws_vpc_endpoint" "s3" {
}
}
+
+data "aws_prefix_list" "s3" {
+ count = local.create_vpc ? 1 : 0
+ prefix_list_id = aws_vpc_endpoint.s3[0].prefix_list_id
+}
+
resource "aws_security_group" "s3_endpoint" {
count = local.create_s3_endpoint ? 1 : 0
name = "${var.deploy_id}-s3-endpoint"
@@ -82,11 +88,6 @@ resource "aws_vpc_endpoint" "s3_interface" {
depends_on = [aws_vpc_endpoint.s3]
}
-data "aws_prefix_list" "s3" {
- count = local.create_vpc && var.network.create_s3_endpoint ? 1 : 0
- prefix_list_id = aws_vpc_endpoint.s3[0].prefix_list_id
-}
-
resource "aws_security_group" "ecr_endpoint" {
count = local.create_ecr_endpoint ? 1 : 0
name = "${var.deploy_id}-ecr"
diff --git a/modules/infra/submodules/storage/README.md b/modules/infra/submodules/storage/README.md
index 401cb55a..9ffcf44c 100644
--- a/modules/infra/submodules/storage/README.md
+++ b/modules/infra/submodules/storage/README.md
@@ -38,7 +38,7 @@ No modules.
| [aws_ecr_repository.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ecr_repository) | resource |
| [aws_efs_access_point.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/efs_access_point) | resource |
| [aws_efs_file_system.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/efs_file_system) | resource |
-| [aws_efs_mount_target.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/efs_mount_target) | resource |
+| [aws_efs_mount_target.eks_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/efs_mount_target) | resource |
| [aws_fsx_ontap_file_system.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/fsx_ontap_file_system) | resource |
| [aws_fsx_ontap_storage_virtual_machine.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/fsx_ontap_storage_virtual_machine) | resource |
| [aws_fsx_ontap_volume.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/fsx_ontap_volume) | resource |
@@ -95,7 +95,7 @@ No modules.
|------|-------------|------|---------|:--------:|
| [deploy\_id](#input\_deploy\_id) | Domino Deployment ID | `string` | n/a | yes |
| [kms\_info](#input\_kms\_info) | key\_id = KMS key id.
key\_arn = KMS key arn.
enabled = KMS key is enabled |
object({| n/a | yes | -| [network\_info](#input\_network\_info) | id = VPC ID.
key_id = string
key_arn = string
enabled = bool
})
object({| n/a | yes | +| [network\_info](#input\_network\_info) | id = VPC ID.
vpc_id = string
subnets = object({
public = optional(list(object({
name = string
subnet_id = string
az = string
az_id = string
})), [])
private = list(object({
name = string
subnet_id = string
az = string
az_id = string
}))
pod = optional(list(object({
name = string
subnet_id = string
az = string
az_id = string
})), [])
})
})
object({| n/a | yes | | [region](#input\_region) | AWS region for the deployment | `string` | n/a | yes | | [storage](#input\_storage) | storage = {
vpc_id = string
subnets = object({
public = optional(list(object({
name = string
subnet_id = string
az = string
az_id = string
})), [])
private = list(object({
name = string
subnet_id = string
az = string
az_id = string
}))
pod = optional(list(object({
name = string
subnet_id = string
az = string
az_id = string
})), [])
})
})
object({| n/a | yes | | [use\_fips\_endpoint](#input\_use\_fips\_endpoint) | Use aws FIPS endpoints | `bool` | `false` | no | diff --git a/modules/infra/submodules/storage/efs.tf b/modules/infra/submodules/storage/efs.tf index c54d12d7..b80b0b80 100644 --- a/modules/infra/submodules/storage/efs.tf +++ b/modules/infra/submodules/storage/efs.tf @@ -7,7 +7,8 @@ resource "aws_efs_file_system" "eks" { kms_key_id = local.kms_key_arn tags = merge(local.backup_tagging, { - "Name" = var.deploy_id + "Name" = var.deploy_id + "migrated" = "aws_efs_mount_target" }) lifecycle { @@ -31,12 +32,11 @@ resource "aws_security_group" "efs" { } } - -resource "aws_efs_mount_target" "eks" { - count = local.deploy_efs ? length(local.private_subnet_ids) : 0 +resource "aws_efs_mount_target" "eks_cluster" { + for_each = local.deploy_efs ? { for sb in var.network_info.subnets.private : sb.name => sb } : {} file_system_id = aws_efs_file_system.eks[0].id security_groups = [aws_security_group.efs[0].id] - subnet_id = element(local.private_subnet_ids, count.index) + subnet_id = each.value.subnet_id } @@ -75,7 +75,11 @@ moved { to = aws_efs_mount_target.eks[0] } -moved { - from = aws_efs_access_point.eks - to = aws_efs_access_point.eks[0] + +removed { + from = aws_efs_mount_target.eks + + lifecycle { + destroy = false + } } diff --git a/modules/infra/submodules/storage/variables.tf b/modules/infra/submodules/storage/variables.tf index ef266428..6a2b1660 100644 --- a/modules/infra/submodules/storage/variables.tf +++ b/modules/infra/submodules/storage/variables.tf @@ -171,7 +171,7 @@ variable "network_info" { private = List of private Subnets. [{ name = Subnet name. - subnet_id = Subnet ud + subnet_id = Subnet id az = Subnet availability_zone az_id = Subnet availability_zone_id }] diff --git a/modules/nodes/main.tf b/modules/nodes/main.tf index 2745fc17..ab299b52 100644 --- a/modules/nodes/main.tf +++ b/modules/nodes/main.tf @@ -22,37 +22,12 @@ locals { taints = coalesce(ng.gpu, false) || anytrue([for itype in ng.instance_types : length(data.aws_ec2_instance_type.all[itype].gpus) > 0]) ? distinct(concat(local.gpu_taints, ng.taints)) : ng.taints }) } -} - -data "aws_ec2_instance_type_offerings" "nodes" { - for_each = { - for name, ng in local.node_groups : - name => ng.instance_types - } - - filter { - name = "instance-type" - values = each.value - } - - location_type = "availability-zone-id" -} - -data "aws_ami" "custom" { - for_each = toset([for k, v in local.node_groups : v.ami if v.ami != null]) - - filter { - name = "image-id" - values = [each.value] - } -} -locals { multi_zone_node_groups = [ for ng_name, ng in local.node_groups : { ng_name = ng_name sb_name = join("_", [for sb_name, sb in var.network_info.subnets.private : sb.az_id if contains(ng.availability_zone_ids, sb.az_id)]) - subnet = { for sb_name, sb in var.network_info.subnets.private : sb_name => sb if contains(ng.availability_zone_ids, sb.az_id) } + subnet = { for sb in var.network_info.subnets.private : sb.name => sb if contains(ng.availability_zone_ids, sb.az_id) } availability_zones = [for sb in var.network_info.subnets.private : sb.az if contains(ng.availability_zone_ids, sb.az_id)] node_group = merge(ng, { availability_zone_ids = [for sb in var.network_info.subnets.private : sb.az_id if contains(ng.availability_zone_ids, sb.az_id)] @@ -66,7 +41,7 @@ locals { for ng_name, ng in local.node_groups : [ for sb_name, sb in var.network_info.subnets.private : { ng_name = ng_name - sb_name = sb_name + sb_name = sb.name subnet = sb node_group = merge(ng, { availability_zone_ids = [sb.az_id] @@ -82,6 +57,29 @@ locals { node_groups_by_name = { for ngz in local.node_groups_per_zone : "${ngz.ng_name}-${ngz.sb_name}" => ngz } } +data "aws_ec2_instance_type_offerings" "nodes" { + for_each = { + for name, ng in local.node_groups : + name => ng.instance_types + } + + filter { + name = "instance-type" + values = each.value + } + + location_type = "availability-zone-id" +} + +data "aws_ami" "custom" { + for_each = toset([for k, v in local.node_groups : v.ami if v.ami != null]) + + filter { + name = "image-id" + values = [each.value] + } +} + resource "terraform_data" "calico_setup" { count = try(fileexists(var.eks_info.k8s_pre_setup_sh_file), false) ? 1 : 0 diff --git a/modules/nodes/nodes.tf b/modules/nodes/nodes.tf index 0d0897c0..8ee3cc32 100644 --- a/modules/nodes/nodes.tf +++ b/modules/nodes/nodes.tf @@ -76,12 +76,13 @@ data "aws_ssm_parameter" "eks_gpu_ami_release_version" { name = "/aws/service/eks/optimized-ami/${var.eks_info.cluster.version}/amazon-linux-2023/x86_64/nvidia/recommended/release_version" } + resource "aws_eks_node_group" "node_groups" { for_each = local.node_groups_by_name cluster_name = var.eks_info.cluster.specs.name version = each.value.node_group.ami != null ? null : var.eks_info.cluster.version release_version = each.value.node_group.ami != null ? null : (each.value.node_group.gpu ? nonsensitive(data.aws_ssm_parameter.eks_gpu_ami_release_version.value) : nonsensitive(data.aws_ssm_parameter.eks_ami_release_version.value)) - node_group_name = "${var.eks_info.cluster.specs.name}-${each.key}" + node_group_name = each.key node_role_arn = var.eks_info.nodes.roles[0].arn subnet_ids = try(lookup(each.value.node_group, "single_nodegroup", false), false) ? [for s in values(each.value.subnet) : s.subnet_id] : [each.value.subnet.subnet_id] force_update_version = true diff --git a/tests/deploy/ci-deploy.sh b/tests/deploy/ci-deploy.sh index 887acc44..94c063da 100755 --- a/tests/deploy/ci-deploy.sh +++ b/tests/deploy/ci-deploy.sh @@ -132,26 +132,74 @@ deploy_latest_ami_nodes() { deploy 'nodes' } -# Not used atm, scaffold for seamless future use. set_infra_imports() { - printf "Nothing to import into the infra module.\n" - local import_file="${INFRA_DIR}/imports.tf.tmp" - local import_file_tmp="${import_file}.tmp" - return 0 # Remove return if used. - set_import "$import_file" "$import_file_tmp" + printf "Generating infra imports.\n" + local import_file_tmp="${INFRA_DIR}/imports.tf.tmp" + local region + local deploy_id + local fs_id + + region=$(hcledit attribute get region -f "$INFRA_VARS" | jq -r) + deploy_id=$(hcledit attribute get deploy_id -f "$INFRA_VARS" | jq -r) + + : >"$import_file_tmp" + printf "Generating infra imports for EFS mount points.\n" + + fs_id=$(aws efs describe-file-systems \ + --region "$region" \ + --query "FileSystems[?Tags[?Key==\`deploy_id\` && Value==\`$deploy_id\`]].FileSystemId" \ + --output text) || { + echo "Failed to get fs_id" + return 1 + } + + if [ -z "${fs_id// /}" ]; then + echo "Error: fs_id is not set or empty." + return 1 + fi + + printf "Processing file system: %s.\n" "$fs_id" + + subnet_ids=$(aws efs describe-mount-targets \ + --file-system-id "$fs_id" \ + --region "$region" \ + --query 'MountTargets[*].SubnetId' \ + --output text) + + subnet_map=$(aws ec2 describe-subnets \ + --subnet-ids $subnet_ids \ + --query 'Subnets[*].{Id:SubnetId,Name:Tags[?Key==`Name`].Value | [0]}' \ + --output json | jq 'map({(.Id): .Name}) | add') + + aws efs describe-mount-targets \ + --file-system-id "$fs_id" \ + --region "$region" \ + --query 'MountTargets[*].[MountTargetId, SubnetId]' \ + --output json | jq -c '.[]' | while read -r mount_point; do + mount_target_id=$(echo "$mount_point" | jq -r '.[0]') + subnet_id=$(echo "$mount_point" | jq -r '.[1]') + subnet_name=$(echo "$subnet_map" | jq -r ".\"$subnet_id\"") + cat <<-EOF >>"$import_file_tmp" + import { + to = module.infra.module.storage.aws_efs_mount_target.eks_cluster["$subnet_name"] + id = "$mount_target_id" + } +EOF + done + + set_import "$INFRA_DIR" "$import_file_tmp" } # Not used atm, scaffold for seamless future use. set_cluster_imports() { printf "Nothing to import into the cluster module.\n" - local import_file="${CLUSTER_DIR}/imports.tf.tmp" - local import_file_tmp="${import_file}.tmp" + local import_file_tmp="${CLUSTER_DIR}/imports.tf.tmp" return 0 # Remove return if used. - set_import "$import_file" "$import_file_tmp" + set_import "$CLUSTER_DIR" "$import_file_tmp" } set_nodes_imports() { - local import_file_tmp="${NODES_DIR}/nodes-imports.tf.tmp" + local import_file_tmp="${NODES_DIR}/imports.tf.tmp" cat <<-EOF >"$import_file_tmp" import { to = module.nodes.aws_eks_addon.pre_compute_addons["vpc-cni"] @@ -170,8 +218,8 @@ set_import() { if [[ ! -f "$import_file" ]] || ! grep -Fqx -f "$import_file_tmp" "$import_file"; then printf "Adding import from %s to %s.\n\n" "$import_file_tmp" "$import_file" - cat "$import_file_tmp" >>"$import_file" - printf "Import file:\n" && cat "$import_file" + printf "Import file:\n" + tee -a "$import_file" <"$import_file_tmp" else printf "Import on %s already present on %s.\n" "$import_file" "$import_file_tmp" fi
filesystem_type = string
efs = optional(object({
access_point_path = optional(string)
backup_vault = optional(object({
create = optional(bool)
force_destroy = optional(bool)
backup = optional(object({
schedule = optional(string)
cold_storage_after = optional(number)
delete_after = optional(number)
}))
}))
}))
netapp = optional(object({
migrate_from_efs = optional(object({
enabled = optional(bool)
datasync = optional(object({
enabled = optional(bool)
target = optional(string)
schedule = optional(string)
verify_mode = optional(string)
}))
}))
deployment_type = optional(string)
storage_capacity = optional(number)
throughput_capacity = optional(number)
automatic_backup_retention_days = optional(number)
daily_automatic_backup_start_time = optional(string)
storage_capacity_autosizing = optional(object({
enabled = optional(bool)
threshold = optional(number)
percent_capacity_increase = optional(number)
notification_email_address = optional(string)
}))
volume = optional(object({
name_suffix = optional(string)
storage_efficiency_enabled = optional(bool)
create = optional(bool)
junction_path = optional(string)
size_in_megabytes = optional(number)
}))
}))
s3 = optional(object({
create = optional(bool)
force_destroy_on_deletion = optional(bool)
}))
ecr = optional(object({
create = optional(bool)
force_destroy_on_deletion = optional(bool)
}))
enable_remote_backup = optional(bool)
costs_enabled = optional(bool)
})