From 9e5ea04c332e5fd06b35e4a04e6df1def0279bc4 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Thu, 29 Feb 2024 17:18:12 +0000 Subject: [PATCH 01/42] feat: terraform to add ArangoDB to AWS environment --- infra/ecr.tf | 4 + infra/ecs_main_arango.tf | 199 ++++++++++++++++++ ...ecs_main_arango_container_definitions.json | 22 ++ infra/main.tf | 3 + infra/security_groups.tf | 66 ++++++ 5 files changed, 294 insertions(+) create mode 100644 infra/ecs_main_arango.tf create mode 100644 infra/ecs_main_arango_container_definitions.json diff --git a/infra/ecr.tf b/infra/ecr.tf index 8ebfccd..2ab41db 100644 --- a/infra/ecr.tf +++ b/infra/ecr.tf @@ -94,6 +94,10 @@ resource "aws_ecr_repository" "mlflow" { name = "${var.prefix}-mlflow" } +resource "aws_ecr_repository" "arango" { + name = "${var.prefix}-arango" +} + resource "aws_vpc_endpoint" "ecr_dkr" { vpc_id = aws_vpc.main.id service_name = "com.amazonaws.${data.aws_region.aws_region.name}.ecr.dkr" diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf new file mode 100644 index 0000000..1fe6f8b --- /dev/null +++ b/infra/ecs_main_arango.tf @@ -0,0 +1,199 @@ +resource "aws_ecs_service" "arango" { + name = "${var.prefix}-arango" + cluster = "${aws_ecs_cluster.main_cluster.id}" + task_definition = "${aws_ecs_task_definition.arango_service.arn}" + desired_count = 1 + launch_type = "FARGATE" + + network_configuration { + subnets = ["${aws_subnet.private_with_egress.*.id[0]}"] + security_groups = ["${aws_security_group.arango_service.id}"] + } + + load_balancer { + target_group_arn = "${aws_lb_target_group.arango.arn}" + container_port = "8529" + container_name = "arango" + } + + depends_on = [ + # The target group must have been associated with the listener first + "aws_lb_listener.arango", + ] +} + +resource "aws_ecs_task_definition" "arango_service" { + family = "${var.prefix}-arango" + container_definitions = "${data.template_file.arango_service_container_definitions.rendered}" + execution_role_arn = "${aws_iam_role.arango_task_execution.arn}" + task_role_arn = "${aws_iam_role.arango_task.arn}" + network_mode = "awsvpc" + cpu = "${local.arango_container_cpu}" + memory = "${local.arango_container_memory}" + requires_compatibilities = ["FARGATE"] + + lifecycle { + ignore_changes = [ + "revision", + ] + } +} + +data "template_file" "arango_service_container_definitions" { + template = "${file("${path.module}/ecs_main_arango_container_definitions.json")}" + + vars = { + container_image = "339713044404.dkr.ecr.eu-west-2.amazonaws.com/data-workspace-dev-a-arango:latest" + container_name = "arango" + log_group = "${aws_cloudwatch_log_group.arango.name}" + log_region = "${data.aws_region.aws_region.name}" + cpu = "${local.arango_container_cpu}" + memory = "${local.arango_container_memory}" + } +} + +resource "aws_cloudwatch_log_group" "arango" { + name = "${var.prefix}-arango" + retention_in_days = "3653" +} + +resource "aws_iam_role" "arango_task_execution" { + name = "${var.prefix}-arango-task-execution" + path = "/" + assume_role_policy = "${data.aws_iam_policy_document.arango_task_execution_ecs_tasks_assume_role.json}" +} + +data "aws_iam_policy_document" "arango_task_execution_ecs_tasks_assume_role" { + statement { + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ecs-tasks.amazonaws.com"] + } + } +} + +resource "aws_iam_role_policy_attachment" "arango_task_execution" { + role = "${aws_iam_role.arango_task_execution.name}" + policy_arn = "${aws_iam_policy.arango_task_execution.arn}" +} + +resource "aws_iam_policy" "arango_task_execution" { + name = "${var.prefix}-arango-task-execution" + path = "/" + policy = "${data.aws_iam_policy_document.arango_task_execution.json}" +} + +data "aws_iam_policy_document" "arango_task_execution" { + statement { + actions = [ + "logs:CreateLogStream", + "logs:PutLogEvents", + ] + + resources = [ + "${aws_cloudwatch_log_group.arango.arn}:*", + ] + } + + statement { + actions = [ + "ecr:BatchGetImage", + "ecr:GetDownloadUrlForLayer", + ] + + resources = [ + "${aws_ecr_repository.arango.arn}", + ] + } + + statement { + actions = [ + "ecr:GetAuthorizationToken", + ] + + resources = [ + "*", + ] + } +} + +resource "aws_iam_role" "arango_task" { + name = "${var.prefix}-arango-task" + path = "/" + assume_role_policy = "${data.aws_iam_policy_document.arango_task_ecs_tasks_assume_role.json}" +} + +data "aws_iam_policy_document" "arango_task_ecs_tasks_assume_role" { + statement { + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ecs-tasks.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "arango_ecs" { + name = "${var.prefix}-arango-ecs" + path = "/" + assume_role_policy = data.aws_iam_policy_document.arango_ecs_assume_role.json +} + +resource "aws_iam_role_policy_attachment" "arango_ecs" { + role = aws_iam_role.arango_ecs.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceRole" +} + +data "aws_iam_policy_document" "arango_ecs_assume_role" { + statement { + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ecs.amazonaws.com"] + } + } +} + +resource "aws_lb" "arango" { + name = "${var.prefix}-arango" + load_balancer_type = "network" + security_groups = ["${aws_security_group.arango_lb.id}"] + enable_deletion_protection = true + timeouts {} + + subnet_mapping { + subnet_id = "${aws_subnet.public.*.id[0]}" + + } +} + +resource "aws_lb_listener" "arango" { + load_balancer_arn = "${aws_lb.arango.arn}" + port = "8529" + protocol = "TCP" + + default_action { + target_group_arn = "${aws_lb_target_group.arango.id}" + type = "forward" + } +} + +resource "aws_lb_target_group" "arango" { + name = "${var.prefix}-arango" + port = "8529" + vpc_id = "${aws_vpc.main.id}" + target_type = "ip" + protocol = "TCP" + preserve_client_ip = true + + health_check { + protocol = "TCP" + interval = 10 + healthy_threshold = 2 + unhealthy_threshold = 2 + } +} \ No newline at end of file diff --git a/infra/ecs_main_arango_container_definitions.json b/infra/ecs_main_arango_container_definitions.json new file mode 100644 index 0000000..f321c58 --- /dev/null +++ b/infra/ecs_main_arango_container_definitions.json @@ -0,0 +1,22 @@ +[ + { + "name": "${container_name}", + "image": "${container_image}", + "memoryReservation": ${memory}, + "cpu": ${cpu}, + "essential": true, + "portMappings": [{ + "containerPort": 8529, + "protocol": "tcp" + }], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "${log_group}", + "awslogs-region": "${log_region}", + "awslogs-stream-prefix": "${container_name}" + } + }, + "environment": [] + } + ] \ No newline at end of file diff --git a/infra/main.tf b/infra/main.tf index b9290e9..f91f8ff 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -260,6 +260,9 @@ locals { flower_container_memory = 8192 flower_container_cpu = 1024 + arango_container_memory = 8192 + arango_container_cpu = 4096 + mlflow_container_memory = 8192 mlflow_container_cpu = 1024 mlflow_port = 8004 diff --git a/infra/security_groups.tf b/infra/security_groups.tf index d50f440..a394e7d 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -1887,3 +1887,69 @@ resource "aws_security_group_rule" "notebooks_egress_http_to_mlflow_service" { to_port = local.mlflow_port protocol = "tcp" } + +resource "aws_security_group" "arango_lb" { + name = "${var.prefix}-arango_lb" + description = "${var.prefix}-arango_lb" + vpc_id = "${aws_vpc.main.id}" + + tags = { + Name = "${var.prefix}-arango_lb" + } + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_security_group" "arango_service" { + name = "${var.prefix}-arango" + description = "${var.prefix}-arango" + vpc_id = "${aws_vpc.main.id}" + + tags = { + Name = "${var.prefix}-arango" + } + + lifecycle { + create_before_destroy = true + } +} + +# Connections to ECR and CloudWatch. ECR needs S3, and its VPC endpoint type +# does not have an IP range or security group to limit access to +resource "aws_security_group_rule" "arango_egress_https_all" { + description = "egress-https-to-all" + + security_group_id = aws_security_group.arango_service.id + cidr_blocks = ["0.0.0.0/0"] + + type = "egress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango_service_ingress_8529_arango_lb" { + description = "ingress-arango-lb" + + security_group_id = "${aws_security_group.arango_service.id}" + source_security_group_id = "${aws_security_group.arango_lb.id}" + + type = "ingress" + from_port = "8529" + to_port = "8529" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango_service_egress_8529_arango_lb" { + description = "egress-arango-lb" + + security_group_id = "${aws_security_group.arango_service.id}" + source_security_group_id = "${aws_security_group.arango_lb.id}" + + type = "egress" + from_port = "8529" + to_port = "8529" + protocol = "tcp" +} \ No newline at end of file From f25ac247ef8889243f8df70d0bb2d60d349fbb88 Mon Sep 17 00:00:00 2001 From: Nick Ross Date: Tue, 5 Mar 2024 16:31:52 +0000 Subject: [PATCH 02/42] fix: add arango ecr repo to ecs available ecr repos --- infra/ecr.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/infra/ecr.tf b/infra/ecr.tf index 2ab41db..1d76c27 100644 --- a/infra/ecr.tf +++ b/infra/ecr.tf @@ -275,6 +275,7 @@ data "aws_iam_policy_document" "aws_vpc_endpoint_ecr" { "${aws_ecr_repository.superset.arn}", "${aws_ecr_repository.flower.arn}", "${aws_ecr_repository.mlflow.arn}", + "${aws_ecr_repository.arango.arn}", ] } From 0ac41211dbf981bb122d340d336655eadfb62f4a Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Tue, 5 Mar 2024 16:49:49 +0000 Subject: [PATCH 03/42] fix: add arango ECR to IAM policy --- infra/ecr.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/infra/ecr.tf b/infra/ecr.tf index 2ab41db..92ca521 100644 --- a/infra/ecr.tf +++ b/infra/ecr.tf @@ -275,6 +275,7 @@ data "aws_iam_policy_document" "aws_vpc_endpoint_ecr" { "${aws_ecr_repository.superset.arn}", "${aws_ecr_repository.flower.arn}", "${aws_ecr_repository.mlflow.arn}", + "${aws_ecr_repository.arango.arn}" ] } From acbd910ee495c8656b8b08bc1c807a272b5762fa Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Tue, 5 Mar 2024 16:51:29 +0000 Subject: [PATCH 04/42] fix: amend security groups to enable connection to ECR --- infra/security_groups.tf | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/infra/security_groups.tf b/infra/security_groups.tf index a394e7d..2c18367 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -1922,7 +1922,7 @@ resource "aws_security_group_rule" "arango_egress_https_all" { description = "egress-https-to-all" security_group_id = aws_security_group.arango_service.id - cidr_blocks = ["0.0.0.0/0"] + source_security_group_id = aws_security_group.ecr_api.id type = "egress" from_port = "443" @@ -1952,4 +1952,16 @@ resource "aws_security_group_rule" "arango_service_egress_8529_arango_lb" { from_port = "8529" to_port = "8529" protocol = "tcp" +} + +resource "aws_security_group_rule" "ecr_api_ingress_https_from_arango" { + description = "ingress-https-from-arango-service" + + security_group_id = aws_security_group.arango_service.id + source_security_group_id = aws_security_group.ecr_api.id + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" } \ No newline at end of file From fa35d6eda5d884e4b0e4c4d4be7d816e29c1aa03 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Wed, 6 Mar 2024 10:47:59 +0000 Subject: [PATCH 05/42] fix: remove conflict markers --- infra/ecr.tf | 4 ---- 1 file changed, 4 deletions(-) diff --git a/infra/ecr.tf b/infra/ecr.tf index 74f1cf1..92ca521 100644 --- a/infra/ecr.tf +++ b/infra/ecr.tf @@ -275,11 +275,7 @@ data "aws_iam_policy_document" "aws_vpc_endpoint_ecr" { "${aws_ecr_repository.superset.arn}", "${aws_ecr_repository.flower.arn}", "${aws_ecr_repository.mlflow.arn}", -<<<<<<< HEAD "${aws_ecr_repository.arango.arn}" -======= - "${aws_ecr_repository.arango.arn}", ->>>>>>> 9f83c4d94f6574276992d2323b8e89159b9d16a0 ] } From f07b401ba64b4525e617455b66712864e3eac064 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Thu, 7 Mar 2024 15:20:08 +0000 Subject: [PATCH 06/42] fix: adding secruity group rule to enable connection btwn the arango container and ecr --- infra/security_groups.tf | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/infra/security_groups.tf b/infra/security_groups.tf index f9c19d8..50e2b30 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -686,6 +686,18 @@ resource "aws_security_group_rule" "ecr_api_ingress_https_from_healthcheck" { protocol = "tcp" } +resource "aws_security_group_rule" "ecr_api_ingress_https_from_arango_proxy" { + description = "ingress-https-from-arango" + + security_group_id = aws_security_group.ecr_api.id + source_security_group_id = aws_security_group.arango_service.id + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + resource "aws_security_group_rule" "cloudwatch_ingress_https_from_all" { description = "ingress-https-from-everywhere" @@ -1930,11 +1942,23 @@ resource "aws_security_group" "arango_service" { # Connections to ECR and CloudWatch. ECR needs S3, and its VPC endpoint type # does not have an IP range or security group to limit access to +resource "aws_security_group_rule" "arango_egress_ecr_api" { + description = "egress-https-to-ecr-api" + + security_group_id = aws_security_group.ecr_api.id + source_security_group_id = aws_security_group.arango_service.id + + type = "egress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + resource "aws_security_group_rule" "arango_egress_https_all" { description = "egress-https-to-all" security_group_id = aws_security_group.arango_service.id - source_security_group_id = aws_security_group.ecr_api.id + cidr_blocks = ["0.0.0.0/0"] type = "egress" from_port = "443" From a636fac1bbe0686128d9dae7d12040992255eacc Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Tue, 19 Mar 2024 11:44:06 +0000 Subject: [PATCH 07/42] fix: add arango_container_port to facilitate connection to load balancer --- infra/main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/infra/main.tf b/infra/main.tf index f91f8ff..4599bf7 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -262,6 +262,7 @@ locals { arango_container_memory = 8192 arango_container_cpu = 4096 + arango_container_port = 8529 mlflow_container_memory = 8192 mlflow_container_cpu = 1024 From fb3f6f1614fa7daf74107e1693c4b1809a505a53 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Tue, 19 Mar 2024 11:44:35 +0000 Subject: [PATCH 08/42] fix: add security group rules to facilitate connection to load balancer --- infra/security_groups.tf | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/infra/security_groups.tf b/infra/security_groups.tf index 50e2b30..cb7cd45 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -1926,6 +1926,30 @@ resource "aws_security_group" "arango_lb" { } } +resource "aws_security_group_rule" "arango_lb_ingress_https_from_whitelist" { + description = "ingress-https-from-whitelist" + + security_group_id = aws_security_group.arango_lb.id + cidr_blocks = "${var.ip_whitelist}" + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango_lb_egress_https_to_arango_service" { + description = "egress-https-to-arango-service" + + security_group_id = aws_security_group.arango_lb.id + source_security_group_id = aws_security_group.arango_service.id + + type = "egress" + from_port = local.arango_container_port + to_port = local.arango_container_port + protocol = "tcp" +} + resource "aws_security_group" "arango_service" { name = "${var.prefix}-arango" description = "${var.prefix}-arango" From 830d9617459657335ad11a31d85c03a26b3c6e59 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Wed, 27 Mar 2024 14:39:33 +0000 Subject: [PATCH 09/42] feat: modify infra to use EC2 based container --- infra/ecs_main_arango.tf | 124 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 123 insertions(+), 1 deletion(-) diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf index 1fe6f8b..b1076bc 100644 --- a/infra/ecs_main_arango.tf +++ b/infra/ecs_main_arango.tf @@ -3,7 +3,12 @@ resource "aws_ecs_service" "arango" { cluster = "${aws_ecs_cluster.main_cluster.id}" task_definition = "${aws_ecs_task_definition.arango_service.arn}" desired_count = 1 - launch_type = "FARGATE" + + capacity_provider_strategy { + capacity_provider = aws_ecs_capacity_provider.arango_capacity_provider.name + weight = 100 + base = 1 + } network_configuration { subnets = ["${aws_subnet.private_with_egress.*.id[0]}"] @@ -19,9 +24,99 @@ resource "aws_ecs_service" "arango" { depends_on = [ # The target group must have been associated with the listener first "aws_lb_listener.arango", + "aws_autoscaling_group.arango_service" ] } +resource "aws_service_discovery_service" "arango" { + name = "${var.prefix}-arango" + + dns_config { + namespace_id = aws_service_discovery_private_dns_namespace.jupyterhub.id + dns_records { + ttl = 10 + type = "A" + } + } +} + +resource "aws_autoscaling_group" "arango_service" { + name_prefix = "${var.prefix}-arango" + max_size = 2 + min_size = 1 + desired_capacity = 1 + health_check_grace_period = 120 + health_check_type = "EC2" + vpc_zone_identifier = ["${aws_subnet.private_with_egress.*.id[0]}"] + + launch_template { + id = aws_launch_template.arango_service.id + version = "$Latest" + } + + tag { + key = "Name" + value = "${var.prefix}-arango-service" + propagate_at_launch = true + } + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_launch_template" "arango_service" { + name_prefix = "${var.prefix}-arango-service-" + image_id = "ami-0d17f7a2768c41ccd" + instance_type = "t2.xlarge" + key_name = "${aws_key_pair.shared.key_name}" + vpc_security_group_ids = ["${aws_security_group.arango-ec2.id}", + "${aws_security_group.arango_service.id}"] + + iam_instance_profile { + name = "${aws_iam_instance_profile.arango_ec2.name}" + } + + user_data = "${data.template_file.ecs_config_template.rendered}" + + lifecycle { + create_before_destroy = true + } +} + +data "template_file" "ecs_config_template" { + template = "${filebase64("${path.module}/arango_user_data.sh")}" + vars = { + ECS_CLUSTER = "${aws_ecs_cluster.main_cluster.name}" + EBS_REGION = "${data.aws_region.aws_region.name}" + } + } + +resource "aws_ecs_capacity_provider" "arango_capacity_provider" { + name = "${var.prefix}-arango_service" + + auto_scaling_group_provider { + auto_scaling_group_arn = aws_autoscaling_group.arango_service.arn + + managed_scaling { + maximum_scaling_step_size = 1000 + minimum_scaling_step_size = 1 + status = "ENABLED" + target_capacity = 3 + } + } +} + +resource "aws_ecs_cluster_capacity_providers" "arango" { + cluster_name = aws_ecs_cluster.main_cluster.name + + capacity_providers = [aws_ecs_capacity_provider.arango_capacity_provider.name] + + default_capacity_provider_strategy { + capacity_provider = aws_ecs_capacity_provider.arango_capacity_provider.name + } +} + resource "aws_ecs_task_definition" "arango_service" { family = "${var.prefix}-arango" container_definitions = "${data.template_file.arango_service_container_definitions.rendered}" @@ -158,6 +253,33 @@ data "aws_iam_policy_document" "arango_ecs_assume_role" { } } +resource "aws_iam_role" "arango_ec2" { + name = "${var.prefix}-arango-ec2" + assume_role_policy = data.aws_iam_policy_document.arango_ec2_assume_role.json +} + +data "aws_iam_policy_document" "arango_ec2_assume_role" { + + statement { + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ec2.amazonaws.com"] + } + } +} + +resource "aws_iam_role_policy_attachment" "arango_ec2" { + role = aws_iam_role.arango_ec2.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" +} + +resource "aws_iam_instance_profile" "arango_ec2" { + name = "${var.prefix}-arango-ec2" + role = aws_iam_role.arango_ec2.id +} + resource "aws_lb" "arango" { name = "${var.prefix}-arango" load_balancer_type = "network" From ad879a623681bb68ee34ab00194a47c58df8485f Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Wed, 27 Mar 2024 14:41:10 +0000 Subject: [PATCH 10/42] feat: changes to help support connection btwn arango and notebook cluster --- infra/ecs_main_arango.tf | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf index b1076bc..2ea2c11 100644 --- a/infra/ecs_main_arango.tf +++ b/infra/ecs_main_arango.tf @@ -21,6 +21,10 @@ resource "aws_ecs_service" "arango" { container_name = "arango" } + service_registries { + registry_arn = aws_service_discovery_service.arango.arn + } + depends_on = [ # The target group must have been associated with the listener first "aws_lb_listener.arango", @@ -291,6 +295,10 @@ resource "aws_lb" "arango" { subnet_id = "${aws_subnet.public.*.id[0]}" } + + tags = { + name = "arango-to-notebook-lb" + } } resource "aws_lb_listener" "arango" { @@ -312,6 +320,21 @@ resource "aws_lb_target_group" "arango" { protocol = "TCP" preserve_client_ip = true + health_check { + protocol = "TCP" + interval = 10 + healthy_threshold = 2 + unhealthy_threshold = 2 + } +} + +resource "aws_lb_target_group" "notebooks" { + name = "${var.prefix}-notebooks" + port = "8888" + vpc_id = "${aws_vpc.notebooks.id}" + protocol = "TCP" + preserve_client_ip = true + health_check { protocol = "TCP" interval = 10 From b73d04d9046c94ae2f8ebbd7247487a75f513cfc Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Wed, 27 Mar 2024 14:42:16 +0000 Subject: [PATCH 11/42] feat: modify infra to use EC2 based container --- infra/ecs_main_arango.tf | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf index 2ea2c11..bb64b37 100644 --- a/infra/ecs_main_arango.tf +++ b/infra/ecs_main_arango.tf @@ -129,7 +129,20 @@ resource "aws_ecs_task_definition" "arango_service" { network_mode = "awsvpc" cpu = "${local.arango_container_cpu}" memory = "${local.arango_container_memory}" - requires_compatibilities = ["FARGATE"] + requires_compatibilities = ["EC2"] + + volume { + name = "arango-ebs-volume" + docker_volume_configuration { + scope = "shared" + autoprovision = true + driver = "rexray/ebs" + driver_opts = { + volumetype = "gp2" + size = 5 + } + } + } lifecycle { ignore_changes = [ From 762ba0ab24281739986429673530683746fdc203 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Wed, 27 Mar 2024 14:46:22 +0000 Subject: [PATCH 12/42] feat: add DNS and certificate for arango --- infra/route_53.tf | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/infra/route_53.tf b/infra/route_53.tf index 16e86ce..8ad0d45 100644 --- a/infra/route_53.tf +++ b/infra/route_53.tf @@ -190,6 +190,38 @@ resource "aws_acm_certificate_validation" "superset_internal" { certificate_arn = aws_acm_certificate.superset_internal[count.index].arn } +resource "aws_route53_record" "arango" { + provider = "aws.route53" + zone_id = data.aws_route53_zone.aws_route53_zone.zone_id + name = "arango" + type = "A" + + alias { + name = aws_lb.arango.dns_name + zone_id = aws_lb.arango.zone_id + evaluate_target_health = false + } + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_acm_certificate" "arango" { + domain_name = aws_route53_record.arango.name + validation_method = "DNS" + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_acm_certificate_validation" "arango" { + certificate_arn = aws_acm_certificate.arango.arn +} + + + # resource "aws_route53_record" "jupyterhub" { # zone_id = "${data.aws_route53_zone.aws_route53_zone.zone_id}" # name = "${var.jupyterhub_domain}." From 600031bc776e061bc6f6d3a13397d97818d180dc Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Wed, 27 Mar 2024 14:48:26 +0000 Subject: [PATCH 13/42] feat: user_data to assign EC2 instance to cluster and install rexray plugin --- infra/arango_user_data.sh | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 infra/arango_user_data.sh diff --git a/infra/arango_user_data.sh b/infra/arango_user_data.sh new file mode 100644 index 0000000..21ec27b --- /dev/null +++ b/infra/arango_user_data.sh @@ -0,0 +1,6 @@ +#!/bin/bash +echo "ECS_CLUSTER=data-workspace-dev-a" >> /etc/ecs/ecs.config +# install the REX-Ray Docker volume plugin +docker plugin install rexray/ebs REXRAY_PREEMPT=true EBS_REGION=${EBS_REGION} --grant-all-permission +# restart the ECS agent. This ensures the plugin is active and recognized once the agent starts. +#sudo systemctl restart ecs \ No newline at end of file From 5b2355b3c638d829104e0e4ab0e3498eb1c3938c Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Wed, 27 Mar 2024 14:49:02 +0000 Subject: [PATCH 14/42] feat: add mount points for container volume --- infra/ecs_main_arango_container_definitions.json | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/infra/ecs_main_arango_container_definitions.json b/infra/ecs_main_arango_container_definitions.json index f321c58..f177e9d 100644 --- a/infra/ecs_main_arango_container_definitions.json +++ b/infra/ecs_main_arango_container_definitions.json @@ -17,6 +17,12 @@ "awslogs-stream-prefix": "${container_name}" } }, - "environment": [] + "environment": [], + "mountPoints": [ + { + "sourceVolume": "arango-ebs-volume", + "containerPath": "/var/lib/arangodb3" + } + ] } ] \ No newline at end of file From 15258ed1e5d0801b2984c402de5fc65a5c381f55 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Wed, 27 Mar 2024 14:50:27 +0000 Subject: [PATCH 15/42] feat: add security groups and rules to manage connection t and from EC2 and between clusters --- infra/security_groups.tf | 171 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) diff --git a/infra/security_groups.tf b/infra/security_groups.tf index cb7cd45..dae3c51 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -436,6 +436,18 @@ resource "aws_security_group_rule" "notebooks_ingress_https_from_admin" { protocol = "tcp" } +resource "aws_security_group_rule" "notebooks_ingress_https_from_arango" { + description = "ingress-https-from-arango" + + security_group_id = aws_security_group.notebooks.id + source_security_group_id = aws_security_group.arango_service.id + + type = "ingress" + from_port = local.arango_container_port + to_port = local.arango_container_port + protocol = "tcp" +} + resource "aws_security_group_rule" "notebooks_ingress_http_dev_from_admin" { description = "ingress-http-dev-from-jupytehub" @@ -521,6 +533,30 @@ resource "aws_security_group_rule" "notebooks_egress_postgres_to_datasets_db" { protocol = "tcp" } +resource "aws_security_group_rule" "notebooks_egress_arango_service" { + description = "egress-to-arango" + + security_group_id = aws_security_group.notebooks.id + source_security_group_id = aws_security_group.arango_service.id + + type = "egress" + from_port = local.arango_container_port + to_port = local.arango_container_port + protocol = "tcp" +} + +resource "aws_security_group_rule" "notebooks_egress_arango_lb" { + description = "egress-to-arango-lb" + + security_group_id = aws_security_group.notebooks.id + source_security_group_id = aws_security_group.arango_lb.id + + type = "egress" + from_port = local.arango_container_port + to_port = local.arango_container_port + protocol = "tcp" +} + resource "aws_security_group" "cloudwatch" { name = "${var.prefix}-cloudwatch" @@ -1938,6 +1974,18 @@ resource "aws_security_group_rule" "arango_lb_ingress_https_from_whitelist" { protocol = "tcp" } +resource "aws_security_group_rule" "arangoo_service_egress_http_to_arango_lb" { + description = "egress-http-to-arango-lb" + + security_group_id = aws_security_group.arango_service.id + source_security_group_id = aws_security_group.arango_lb.id + + type = "egress" + from_port = "80" + to_port = "80" + protocol = "tcp" +} + resource "aws_security_group_rule" "arango_lb_egress_https_to_arango_service" { description = "egress-https-to-arango-service" @@ -1950,6 +1998,30 @@ resource "aws_security_group_rule" "arango_lb_egress_https_to_arango_service" { protocol = "tcp" } +resource "aws_security_group_rule" "arango_lb_notebooks_ingress" { + description = "inbound peering connection with notebooks vpc" + + security_group_id = aws_security_group.arango_lb.id + cidr_blocks = ["${aws_vpc.notebooks.cidr_block}"] + + type = "ingress" + from_port = "0" + to_port = "0" + protocol = "-1" +} + +resource "aws_security_group_rule" "arango_lb_notebooks_egress" { + description = "allow outbound traffic" + + security_group_id = aws_security_group.arango_lb.id + cidr_blocks = ["0.0.0.0/0"] + + type = "egress" + from_port = "0" + to_port = "0" + protocol = "-1" +} + resource "aws_security_group" "arango_service" { name = "${var.prefix}-arango" description = "${var.prefix}-arango" @@ -1990,6 +2062,31 @@ resource "aws_security_group_rule" "arango_egress_https_all" { protocol = "tcp" } +resource "aws_security_group_rule" "arango_service_ec2" { + + security_group_id = aws_security_group.arango_service.id + cidr_blocks = ["0.0.0.0/0"] + + type = "ingress" + from_port = "80" + to_port = "80" + protocol = "tcp" +} + + + +resource "aws_security_group_rule" "arango_service_ingress_http_arango_lb" { + description = "ingress-arango-lb" + + security_group_id = aws_security_group.arango_service.id + source_security_group_id = aws_security_group.arango_lb.id + + type = "ingress" + from_port = "80" + to_port = "80" + protocol = "tcp" +} + resource "aws_security_group_rule" "arango_service_ingress_8529_arango_lb" { description = "ingress-arango-lb" @@ -2020,6 +2117,80 @@ resource "aws_security_group_rule" "ecr_api_ingress_https_from_arango" { security_group_id = aws_security_group.arango_service.id source_security_group_id = aws_security_group.ecr_api.id + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango_ecs_ec2" { + description = "ingress_ec2_instance" + + security_group_id = aws_security_group.arango_service.id + cidr_blocks = ["0.0.0.0/0"] + + type = "ingress" + from_port = "22" + to_port = "22" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango-ecs-egress-all" { + + security_group_id = aws_security_group.arango_service.id + cidr_blocks = ["0.0.0.0/0"] + + type = "egress" + from_port = "0" + to_port = "65535" + protocol = "tcp" +} + +resource "aws_security_group" "arango-ec2" { + name = "${var.prefix}-arango-ec2" + description = "${var.prefix}-arango-ec2" + vpc_id = aws_vpc.main.id + + tags = { + Name = "${var.prefix}-arango-ec2" + } + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_security_group_rule" "arango-ec2-egress-all" { + description = "egress-everything-to-everywhere" + + security_group_id = aws_security_group.arango-ec2.id + cidr_blocks = ["0.0.0.0/0"] + + type = "egress" + from_port = "0" + to_port = "65535" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango-ec2-egress" { + description = "egress-everything-to-everywhere" + + security_group_id = aws_security_group.arango-ec2.id + cidr_blocks = ["0.0.0.0/0"] + + type = "ingress" + from_port = "80" + to_port = "80" + protocol = "tcp" +} + + +resource "aws_security_group_rule" "ecr_api_ingress_https_from_arango_ec2" { + description = "ingress-https-from-arango-ec2" + + security_group_id = aws_security_group.ecr_api.id + source_security_group_id = aws_security_group.arango-ec2.id + type = "ingress" from_port = "443" to_port = "443" From 55c6535db48c6a225e8e7233ef7ace72729ea871 Mon Sep 17 00:00:00 2001 From: Sophie Glinton Date: Fri, 5 Apr 2024 14:24:52 +0100 Subject: [PATCH 16/42] Random string root password and Arango container env var --- infra/ecs_main_arango.tf | 10 ++++++++++ infra/ecs_main_arango_container_definitions.json | 7 ++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf index bb64b37..4f6b9d5 100644 --- a/infra/ecs_main_arango.tf +++ b/infra/ecs_main_arango.tf @@ -161,6 +161,7 @@ data "template_file" "arango_service_container_definitions" { log_region = "${data.aws_region.aws_region.name}" cpu = "${local.arango_container_cpu}" memory = "${local.arango_container_memory}" + root_password = "${random_string.aws_arangodb_root_password.result}" } } @@ -354,4 +355,13 @@ resource "aws_lb_target_group" "notebooks" { healthy_threshold = 2 unhealthy_threshold = 2 } +} + +resource "random_string" "aws_arangodb_root_password" { + length = 64 + special = false + + lifecycle { + ignore_changes = all + } } \ No newline at end of file diff --git a/infra/ecs_main_arango_container_definitions.json b/infra/ecs_main_arango_container_definitions.json index f177e9d..8a4f299 100644 --- a/infra/ecs_main_arango_container_definitions.json +++ b/infra/ecs_main_arango_container_definitions.json @@ -17,7 +17,12 @@ "awslogs-stream-prefix": "${container_name}" } }, - "environment": [], + "environment": [ + { + "name": "ARANGO_ROOT_PASSWORD", + "value": "${root_password}" + } + ], "mountPoints": [ { "sourceVolume": "arango-ebs-volume", From ccb7b21fb0ddcb9e9bbb69e7c27d481ac5c49e78 Mon Sep 17 00:00:00 2001 From: Sophie Glinton Date: Fri, 5 Apr 2024 15:07:37 +0100 Subject: [PATCH 17/42] Arango creds as env vars in main container --- infra/ecs_main_admin.tf | 4 ++++ infra/ecs_main_admin_container_definitions.json | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/infra/ecs_main_admin.tf b/infra/ecs_main_admin.tf index f16e036..1c6585f 100644 --- a/infra/ecs_main_admin.tf +++ b/infra/ecs_main_admin.tf @@ -26,6 +26,10 @@ locals { authbroker_url = "${var.admin_authbroker_url}" secret_key = "${random_string.admin_secret_key.result}" + arango_db__host = "${aws_ecs_service.arango.address}" + arango_db__password = "${random_string.aws_arangodb_root_password.result}" + arango_db__port = "${aws_ecs_service.arango.port}" + environment = "${var.admin_environment}" uploads_bucket = "${var.uploads_bucket}" diff --git a/infra/ecs_main_admin_container_definitions.json b/infra/ecs_main_admin_container_definitions.json index 4cb0a5e..e5a33b5 100644 --- a/infra/ecs_main_admin_container_definitions.json +++ b/infra/ecs_main_admin_container_definitions.json @@ -55,6 +55,22 @@ "name": "EXPLORER_DEFAULT_CONNECTION", "value": "datasets_1" }, + { + "name": "ARANGO_DB__HOST", + "value": "${arango_db__host}" + }, + { + "name": "ARANGO_DB__PORT", + "value": "${arango_db__port}" + }, + { + "name": "ARANGO_DB__USER", + "value": "root" + }, + { + "name": "ARANGO_DB__PASSWORD", + "value": "${arango_db__password}" + }, { "name": "ALLOWED_HOSTS__1", "value": "${root_domain}" From 7cd5a97ad1c67c02b4ef60a7e8c1e3b321e730c0 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:27:28 +0100 Subject: [PATCH 18/42] fix: add vpc endpoints for ecr to facilitate connection with ecs --- infra/ecr.tf | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/infra/ecr.tf b/infra/ecr.tf index 92ca521..0319c98 100644 --- a/infra/ecr.tf +++ b/infra/ecr.tf @@ -126,6 +126,34 @@ resource "aws_vpc_endpoint" "ecr_api" { timeouts {} } +resource "aws_vpc_endpoint" "ecr_dkr_datasets" { + vpc_id = aws_vpc.datasets.id + service_name = "com.amazonaws.${data.aws_region.aws_region.name}.ecr.dkr" + vpc_endpoint_type = "Interface" + private_dns_enabled = true + + security_group_ids = ["${aws_security_group.ecr_dkr_datasets.id}"] + subnet_ids = ["${aws_subnet.datasets.*.id[0]}"] + + policy = data.aws_iam_policy_document.aws_vpc_endpoint_ecr.json + + timeouts {} +} + +resource "aws_vpc_endpoint" "ecr_api_datasets" { + vpc_id = aws_vpc.datasets.id + service_name = "com.amazonaws.${data.aws_region.aws_region.name}.ecr.api" + vpc_endpoint_type = "Interface" + private_dns_enabled = true + + security_group_ids = ["${aws_security_group.ecr_api_datasets.id}"] + subnet_ids = ["${aws_subnet.datasets.*.id[0]}"] + + policy = data.aws_iam_policy_document.aws_vpc_endpoint_ecr.json + + timeouts {} +} + data "aws_iam_policy_document" "aws_vpc_endpoint_ecr" { # Contains policies for both ECR and DKR endpoints, as recommended From 47eb127b0bfa6fcda7edb208482e248786abf9f9 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:28:00 +0100 Subject: [PATCH 19/42] fix: remove EBS mount points --- infra/ecs_main_arango_container_definitions.json | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/infra/ecs_main_arango_container_definitions.json b/infra/ecs_main_arango_container_definitions.json index f177e9d..f321c58 100644 --- a/infra/ecs_main_arango_container_definitions.json +++ b/infra/ecs_main_arango_container_definitions.json @@ -17,12 +17,6 @@ "awslogs-stream-prefix": "${container_name}" } }, - "environment": [], - "mountPoints": [ - { - "sourceVolume": "arango-ebs-volume", - "containerPath": "/var/lib/arangodb3" - } - ] + "environment": [] } ] \ No newline at end of file From 1588e33db333dd9b958768e905cb8d4ba5bb068f Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:28:47 +0100 Subject: [PATCH 20/42] fix: add commands to install ecs agent to ec2 instance --- infra/ecs_main_arango_user_data.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 infra/ecs_main_arango_user_data.sh diff --git a/infra/ecs_main_arango_user_data.sh b/infra/ecs_main_arango_user_data.sh new file mode 100644 index 0000000..5d1b1c5 --- /dev/null +++ b/infra/ecs_main_arango_user_data.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# install and start ecs agent + +mkdir /etc/ecs/ +echo "ECS_CLUSTER=data-workspace-dev-a" >> /etc/ecs/ecs.config +sudo yum update -y ecs-init +sudo systemctl restart ecs +#echo "ECS_ENABLE_AWSLOGS_EXECUTIONROLE_OVERRIDE=true" >> /etc/ecs/ecs.config +# install the REX-Ray Docker volume plugin +#docker plugin install rexray/ebs "REXRAY_PREEMPT=true" "EBS_REGION=eu-west-2" --grant-all-permission +# restart the ECS agent. This ensures the plugin is active and recognized once the agent starts. +#sudo yum update -y ecs-init +#sudo systemctl restart ecs \ No newline at end of file From f62f8bbb0bc556f3ffde72150a986c9b59591bbc Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:31:11 +0100 Subject: [PATCH 21/42] feat: switch arango to run on datasets vpc --- infra/ecs_main_arango.tf | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf index bb64b37..32cda20 100644 --- a/infra/ecs_main_arango.tf +++ b/infra/ecs_main_arango.tf @@ -11,7 +11,7 @@ resource "aws_ecs_service" "arango" { } network_configuration { - subnets = ["${aws_subnet.private_with_egress.*.id[0]}"] + subnets = ["${aws_subnet.datasets.*.id[0]}"] security_groups = ["${aws_security_group.arango_service.id}"] } @@ -48,10 +48,10 @@ resource "aws_autoscaling_group" "arango_service" { name_prefix = "${var.prefix}-arango" max_size = 2 min_size = 1 - desired_capacity = 1 + desired_capacity = 2 health_check_grace_period = 120 health_check_type = "EC2" - vpc_zone_identifier = ["${aws_subnet.private_with_egress.*.id[0]}"] + vpc_zone_identifier = ["${aws_subnet.datasets.*.id[0]}"] launch_template { id = aws_launch_template.arango_service.id @@ -71,15 +71,22 @@ resource "aws_autoscaling_group" "arango_service" { resource "aws_launch_template" "arango_service" { name_prefix = "${var.prefix}-arango-service-" - image_id = "ami-0d17f7a2768c41ccd" + image_id = "ami-0c618421e207909d0" instance_type = "t2.xlarge" key_name = "${aws_key_pair.shared.key_name}" - vpc_security_group_ids = ["${aws_security_group.arango-ec2.id}", - "${aws_security_group.arango_service.id}"] + metadata_options { + http_tokens = "required" + } + + network_interfaces { + security_groups = ["${aws_security_group.arango-ec2.id}"] + subnet_id = aws_subnet.datasets.*.id[0] + } + iam_instance_profile { name = "${aws_iam_instance_profile.arango_ec2.name}" - } + } user_data = "${data.template_file.ecs_config_template.rendered}" @@ -305,7 +312,7 @@ resource "aws_lb" "arango" { timeouts {} subnet_mapping { - subnet_id = "${aws_subnet.public.*.id[0]}" + subnet_id = "${aws_subnet.public_datasets.*.id[0]}" } @@ -328,7 +335,7 @@ resource "aws_lb_listener" "arango" { resource "aws_lb_target_group" "arango" { name = "${var.prefix}-arango" port = "8529" - vpc_id = "${aws_vpc.main.id}" + vpc_id = "${aws_vpc.datasets.id}" target_type = "ip" protocol = "TCP" preserve_client_ip = true From 21cf59c8ff50ba96f1338ccc1f6635bf2e9178a5 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:31:53 +0100 Subject: [PATCH 22/42] feat: add ebs iam policies --- infra/ecs_main_arango.tf | 45 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf index 32cda20..25eadde 100644 --- a/infra/ecs_main_arango.tf +++ b/infra/ecs_main_arango.tf @@ -299,6 +299,51 @@ resource "aws_iam_role_policy_attachment" "arango_ec2" { policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" } +resource "aws_iam_role_policy_attachment" "arango_ec2_ssm" { + role = aws_iam_role.arango_ec2.name + policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" +} + +data "aws_iam_policy_document" "arango_ebs" { + statement { + effect = "Allow" + actions = [ + "ec2:AttachVolume", + "ec2:CreateVolume", + "ec2:CreateSnapshot", + "ec2:CreateTags", + "ec2:DeleteVolume", + "ec2:DeleteSnapshot", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeInstances", + "ec2:DescribeVolumes", + "ec2:DescribeVolumeAttribute", + "ec2:DescribeVolumeStatus", + "ec2:DescribeSnapshots", + "ec2:CopySnapshot", + "ec2:DescribeSnapshotAttribute", + "ec2:DetachVolume", + "ec2:ModifySnapshotAttribute", + "ec2:ModifyVolumeAttribute", + "ec2:DescribeTags" + ] + resources = [ + "*" + ] + } +} + +resource "aws_iam_policy" "arango_ebs" { + name = "arango-ebs" + description = "enable-mounting-of-ebs-volume" + policy = data.aws_iam_policy_document.arango_ebs.json +} + +resource "aws_iam_role_policy_attachment" "arango_ec2_ebs" { + role = aws_iam_role.arango_ec2.name + policy_arn = aws_iam_policy.arango_ebs.arn +} + resource "aws_iam_instance_profile" "arango_ec2" { name = "${var.prefix}-arango-ec2" role = aws_iam_role.arango_ec2.id From 2534b42bb4cf97c29f0f137f3961a6fa07516805 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:33:09 +0100 Subject: [PATCH 23/42] fix: remove ebs volume config using rexray --- infra/ecs_main_arango.tf | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf index 25eadde..354f7af 100644 --- a/infra/ecs_main_arango.tf +++ b/infra/ecs_main_arango.tf @@ -96,7 +96,7 @@ resource "aws_launch_template" "arango_service" { } data "template_file" "ecs_config_template" { - template = "${filebase64("${path.module}/arango_user_data.sh")}" + template = "${filebase64("${path.module}/ecs_main_arango_user_data.sh")}" vars = { ECS_CLUSTER = "${aws_ecs_cluster.main_cluster.name}" EBS_REGION = "${data.aws_region.aws_region.name}" @@ -138,19 +138,6 @@ resource "aws_ecs_task_definition" "arango_service" { memory = "${local.arango_container_memory}" requires_compatibilities = ["EC2"] - volume { - name = "arango-ebs-volume" - docker_volume_configuration { - scope = "shared" - autoprovision = true - driver = "rexray/ebs" - driver_opts = { - volumetype = "gp2" - size = 5 - } - } - } - lifecycle { ignore_changes = [ "revision", @@ -385,21 +372,6 @@ resource "aws_lb_target_group" "arango" { protocol = "TCP" preserve_client_ip = true - health_check { - protocol = "TCP" - interval = 10 - healthy_threshold = 2 - unhealthy_threshold = 2 - } -} - -resource "aws_lb_target_group" "notebooks" { - name = "${var.prefix}-notebooks" - port = "8888" - vpc_id = "${aws_vpc.notebooks.id}" - protocol = "TCP" - preserve_client_ip = true - health_check { protocol = "TCP" interval = 10 From cb58592358d8a1d089612c7820d9c3d3440bb321 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:33:52 +0100 Subject: [PATCH 24/42] fix: renamed arango_user_data to ecs_main_arango_user_data.sh --- infra/arango_user_data.sh | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 infra/arango_user_data.sh diff --git a/infra/arango_user_data.sh b/infra/arango_user_data.sh deleted file mode 100644 index 21ec27b..0000000 --- a/infra/arango_user_data.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -echo "ECS_CLUSTER=data-workspace-dev-a" >> /etc/ecs/ecs.config -# install the REX-Ray Docker volume plugin -docker plugin install rexray/ebs REXRAY_PREEMPT=true EBS_REGION=${EBS_REGION} --grant-all-permission -# restart the ECS agent. This ensures the plugin is active and recognized once the agent starts. -#sudo systemctl restart ecs \ No newline at end of file From 7d1e9c6086b59278f7b7ca01d5f1fab4f5ace43d Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:35:31 +0100 Subject: [PATCH 25/42] feat: add datasets cidr details for vpc subnets --- infra/environment-template/main.tf | 5 +++++ infra/main.tf | 1 + 2 files changed, 6 insertions(+) diff --git a/infra/environment-template/main.tf b/infra/environment-template/main.tf index 86e58c2..f63df0d 100644 --- a/infra/environment-template/main.tf +++ b/infra/environment-template/main.tf @@ -53,6 +53,7 @@ module "jupyterhub" { vpc_notebooks_cidr = "172.17.0.0/16" vpc_notebooks_subnets_num_bits = "5" vpc_datasets_cidr = "172.18.4.0/22" + vpc_datasets_subnets_num_bits = "8" aws_availability_zones = ["eu-west-2a", "eu-west-2b", "eu-west-2c"] aws_availability_zones_short = ["a", "b", "c"] @@ -149,6 +150,10 @@ module "jupyterhub" { "172.18.4.0/25", "172.18.4.128/25", "172.18.5.0/25", + "172.18.6.0/25", + "172.18.6.128/25", + "172.18.7.0/25", + ] ] dataset_subnets_availability_zones = [ "eu-west-2a", diff --git a/infra/main.tf b/infra/main.tf index a951c4c..8f5d11f 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -35,6 +35,7 @@ variable "subnets_num_bits" {} variable "vpc_notebooks_cidr" {} variable "vpc_notebooks_subnets_num_bits" {} variable "vpc_datasets_cidr" {} +variable "vpc_datasets_subnets_num_bits" {} variable "aws_route53_zone" {} variable "admin_domain" {} From 21c3c599c279620113074d942e1e3e1324e85483 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:37:12 +0100 Subject: [PATCH 26/42] feat: add security rules to enable connection from datasets vpc to ecr --- infra/security_groups.tf | 42 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/infra/security_groups.tf b/infra/security_groups.tf index dae3c51..30a0a34 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -600,6 +600,34 @@ resource "aws_security_group" "ecr_api" { } } +resource "aws_security_group" "ecr_dkr_datasets" { + name = "${var.prefix}-ecr-dkr-datasets" + description = "${var.prefix}-ecr-dkr-datasets" + vpc_id = aws_vpc.datasets.id + + tags = { + Name = "${var.prefix}-ecr-dkr-datasets" + } + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_security_group" "ecr_api_datasets" { + name = "${var.prefix}-ecr-api-datasets" + description = "${var.prefix}-ecr-api-datasets" + vpc_id = aws_vpc.datasets.id + + tags = { + Name = "${var.prefix}-ecr-api-datasets" + } + + lifecycle { + create_before_destroy = true + } +} + resource "aws_security_group_rule" "ecr_api_ingress_https_from_dns_rewrite_proxy" { description = "ingress-https-from-dns-rewrite-proxy" @@ -725,7 +753,7 @@ resource "aws_security_group_rule" "ecr_api_ingress_https_from_healthcheck" { resource "aws_security_group_rule" "ecr_api_ingress_https_from_arango_proxy" { description = "ingress-https-from-arango" - security_group_id = aws_security_group.ecr_api.id + security_group_id = aws_security_group.ecr_api_datasets.id source_security_group_id = aws_security_group.arango_service.id type = "ingress" @@ -758,6 +786,18 @@ resource "aws_security_group_rule" "ecr_dkr_ingress_https_from_all" { protocol = "tcp" } +resource "aws_security_group_rule" "ecr_dkr_datasets_ingress_https_from_all" { + description = "ingress-https-from-everywhere" + + security_group_id = aws_security_group.ecr_dkr_datasets.id + cidr_blocks = ["0.0.0.0/0"] + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + resource "aws_security_group" "mirrors_sync" { name = "${var.prefix}-mirrors-sync" description = "${var.prefix}-mirrors-sync" From 886cd5f02ad4923253ade5a0a911a01cd901ab27 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:39:47 +0100 Subject: [PATCH 27/42] fix: adding rule to allow egress to cloudwatch --- infra/security_groups.tf | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/infra/security_groups.tf b/infra/security_groups.tf index 30a0a34..7e0f0b8 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -2062,6 +2062,18 @@ resource "aws_security_group_rule" "arango_lb_notebooks_egress" { protocol = "-1" } +resource "aws_security_group_rule" "arango_lb_egress_https_to_cloudwatch" { + description = "egress-https-to-cloudwatch" + + security_group_id = aws_security_group.arango_lb.id + source_security_group_id = aws_security_group.cloudwatch.id + + type = "egress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + resource "aws_security_group" "arango_service" { name = "${var.prefix}-arango" description = "${var.prefix}-arango" From ba9bf91bc1d2d3942e0ee079a98f6431ac11828a Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:40:46 +0100 Subject: [PATCH 28/42] feat: adjusting arango security groups to use datasets vpc --- infra/security_groups.tf | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/infra/security_groups.tf b/infra/security_groups.tf index 7e0f0b8..3cc362e 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -1991,7 +1991,7 @@ resource "aws_security_group_rule" "notebooks_egress_http_to_mlflow_service" { resource "aws_security_group" "arango_lb" { name = "${var.prefix}-arango_lb" description = "${var.prefix}-arango_lb" - vpc_id = "${aws_vpc.main.id}" + vpc_id = "${aws_vpc.datasets.id}" tags = { Name = "${var.prefix}-arango_lb" @@ -2050,6 +2050,18 @@ resource "aws_security_group_rule" "arango_lb_notebooks_ingress" { protocol = "-1" } +resource "aws_security_group_rule" "arango_lb_ingress_vpc" { + description = "inbound connection from vpc CIDR" + + security_group_id = aws_security_group.arango_lb.id + cidr_blocks = ["${aws_vpc.datasets.cidr_block}"] + + type = "ingress" + from_port = "8529" + to_port = "8529" + protocol = "-1" +} + resource "aws_security_group_rule" "arango_lb_notebooks_egress" { description = "allow outbound traffic" @@ -2077,7 +2089,7 @@ resource "aws_security_group_rule" "arango_lb_egress_https_to_cloudwatch" { resource "aws_security_group" "arango_service" { name = "${var.prefix}-arango" description = "${var.prefix}-arango" - vpc_id = "${aws_vpc.main.id}" + vpc_id = "${aws_vpc.datasets.id}" tags = { Name = "${var.prefix}-arango" @@ -2093,7 +2105,7 @@ resource "aws_security_group" "arango_service" { resource "aws_security_group_rule" "arango_egress_ecr_api" { description = "egress-https-to-ecr-api" - security_group_id = aws_security_group.ecr_api.id + security_group_id = aws_security_group.ecr_api_datasets.id source_security_group_id = aws_security_group.arango_service.id type = "egress" @@ -2167,7 +2179,7 @@ resource "aws_security_group_rule" "ecr_api_ingress_https_from_arango" { description = "ingress-https-from-arango-service" security_group_id = aws_security_group.arango_service.id - source_security_group_id = aws_security_group.ecr_api.id + source_security_group_id = aws_security_group.ecr_api_datasets.id type = "ingress" from_port = "443" @@ -2201,7 +2213,7 @@ resource "aws_security_group_rule" "arango-ecs-egress-all" { resource "aws_security_group" "arango-ec2" { name = "${var.prefix}-arango-ec2" description = "${var.prefix}-arango-ec2" - vpc_id = aws_vpc.main.id + vpc_id = aws_vpc.datasets.id tags = { Name = "${var.prefix}-arango-ec2" From 8cd99c79ff9c7ed8ef9f24c29f5f40f502d375c9 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:41:28 +0100 Subject: [PATCH 29/42] fix: adding security rules to enable connection between ec2 and ecs --- infra/security_groups.tf | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/infra/security_groups.tf b/infra/security_groups.tf index 3cc362e..dd8cd5c 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -2224,6 +2224,18 @@ resource "aws_security_group" "arango-ec2" { } } +resource "aws_security_group_rule" "arango-ec2-egress-ecs-agent" { + description = "egress-ec2-agent" + + security_group_id = aws_security_group.arango-ec2.id + cidr_blocks = ["0.0.0.0/0"] + + type = "egress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + resource "aws_security_group_rule" "arango-ec2-egress-all" { description = "egress-everything-to-everywhere" @@ -2237,7 +2249,7 @@ resource "aws_security_group_rule" "arango-ec2-egress-all" { } resource "aws_security_group_rule" "arango-ec2-egress" { - description = "egress-everything-to-everywhere" + description = "ingress-everything-to-everywhere" security_group_id = aws_security_group.arango-ec2.id cidr_blocks = ["0.0.0.0/0"] @@ -2248,11 +2260,34 @@ resource "aws_security_group_rule" "arango-ec2-egress" { protocol = "tcp" } +resource "aws_security_group_rule" "arango-ec2-22" { + description = "ingress_ec2_instance" + + security_group_id = aws_security_group.arango-ec2.id + cidr_blocks = ["0.0.0.0/0"] + + type = "ingress" + from_port = "22" + to_port = "22" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango-ec2-ingress-ecs-agent" { + description = "ingress-ec2-agent" + + security_group_id = aws_security_group.arango-ec2.id + cidr_blocks = ["0.0.0.0/0"] + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} resource "aws_security_group_rule" "ecr_api_ingress_https_from_arango_ec2" { description = "ingress-https-from-arango-ec2" - security_group_id = aws_security_group.ecr_api.id + security_group_id = aws_security_group.ecr_api_datasets.id source_security_group_id = aws_security_group.arango-ec2.id type = "ingress" From 843ab088b99c049139da071cd10b10d50584cb76 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Mon, 8 Apr 2024 20:42:05 +0100 Subject: [PATCH 30/42] feat: creating NAT gateway for datasets vpc --- infra/vpc.tf | 73 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 3 deletions(-) diff --git a/infra/vpc.tf b/infra/vpc.tf index f4866da..32dd371 100644 --- a/infra/vpc.tf +++ b/infra/vpc.tf @@ -139,7 +139,6 @@ resource "aws_subnet" "public" { count = length(var.aws_availability_zones) vpc_id = aws_vpc.main.id cidr_block = cidrsubnet(aws_vpc.main.cidr_block, var.subnets_num_bits, count.index) - availability_zone = var.aws_availability_zones[count.index] tags = { @@ -303,7 +302,7 @@ resource "aws_vpc" "datasets" { cidr_block = var.vpc_datasets_cidr enable_dns_support = true - enable_dns_hostnames = false + enable_dns_hostnames = true tags = { Name = "${var.prefix}-datasets" @@ -441,7 +440,7 @@ resource "aws_route_table_association" "datasets" { resource "aws_subnet" "datasets_quicksight" { vpc_id = aws_vpc.datasets.id cidr_block = var.quicksight_cidr_block - + availability_zone = var.quicksight_subnet_availability_zone tags = { @@ -457,3 +456,71 @@ resource "aws_route_table_association" "datasets_quicksight" { subnet_id = aws_subnet.datasets_quicksight.id route_table_id = aws_route_table.datasets.id } + +# public subnet for datasets +resource "aws_subnet" "public_datasets" { + count = length(var.aws_availability_zones) + vpc_id = aws_vpc.datasets.id + cidr_block = var.datasets_subnet_cidr_blocks[count.index+3] + + availability_zone = var.aws_availability_zones[count.index] + + tags = { + Name = "${var.prefix}-public-datasets-${var.aws_availability_zones_short[count.index]}" + } + + lifecycle { + create_before_destroy = true + } +} +# internet gateway for public subnet +resource "aws_internet_gateway" "main_datasets" { + vpc_id = aws_vpc.datasets.id + + tags = { + Name = "${var.prefix}" + } +} + +# elastic IP for NAT gateway +resource "aws_eip" "nat_gateway_datasets" { + vpc = true +} + +# NAT gateway in public datasets subnet +resource "aws_nat_gateway" "datasets" { + allocation_id = aws_eip.nat_gateway_datasets.id + subnet_id = aws_subnet.public_datasets.*.id[0] + + tags = { + Name = "${var.prefix}" + } +} + +# Route table for public datasets subnet +resource "aws_route_table" "public_datasets" { + vpc_id = aws_vpc.datasets.id + tags = { + Name = "${var.prefix}-public" + } +} + +resource "aws_route" "public_datasets_internet_gateway_ipv4" { + route_table_id = aws_route_table.public_datasets.id + destination_cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.main_datasets.id +} + +# associate public route table with public subnet +resource "aws_route_table_association" "public_datasets" { + count = length(var.aws_availability_zones) + subnet_id = aws_subnet.public_datasets.*.id[count.index] + route_table_id = aws_route_table.public_datasets.id +} + +# associate datasets private subnet with NAT gateway +resource "aws_route" "datasets_nat_gateway" { + route_table_id = aws_route_table.datasets.id + destination_cidr_block = "0.0.0.0/0" + gateway_id = aws_nat_gateway.datasets.id +} \ No newline at end of file From 04ea3a44596c58c4dc19f1a24ab2559f3185e28a Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Thu, 11 Apr 2024 08:36:57 +0100 Subject: [PATCH 31/42] fix: adjusting load balancer config --- infra/ecs_main_arango.tf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf index 354f7af..648d29b 100644 --- a/infra/ecs_main_arango.tf +++ b/infra/ecs_main_arango.tf @@ -340,12 +340,11 @@ resource "aws_lb" "arango" { name = "${var.prefix}-arango" load_balancer_type = "network" security_groups = ["${aws_security_group.arango_lb.id}"] - enable_deletion_protection = true + enable_deletion_protection = false timeouts {} subnet_mapping { - subnet_id = "${aws_subnet.public_datasets.*.id[0]}" - + subnet_id = "${aws_subnet.datasets.*.id[0]}" } tags = { From 6f5dc31e67c9f754b516a646cc87b2725882fe77 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Thu, 11 Apr 2024 08:37:37 +0100 Subject: [PATCH 32/42] fix: adjusting security groups to fix Theia connection --- infra/security_groups.tf | 65 +++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/infra/security_groups.tf b/infra/security_groups.tf index dd8cd5c..770740d 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -448,6 +448,18 @@ resource "aws_security_group_rule" "notebooks_ingress_https_from_arango" { protocol = "tcp" } +resource "aws_security_group_rule" "notebooks_ingress_https_from_arango_lb" { + description = "ingress-https-from-arango-lb" + + security_group_id = aws_security_group.notebooks.id + source_security_group_id = aws_security_group.arango_lb.id + + type = "ingress" + from_port = local.arango_container_port + to_port = local.arango_container_port + protocol = "tcp" +} + resource "aws_security_group_rule" "notebooks_ingress_http_dev_from_admin" { description = "ingress-http-dev-from-jupytehub" @@ -537,7 +549,7 @@ resource "aws_security_group_rule" "notebooks_egress_arango_service" { description = "egress-to-arango" security_group_id = aws_security_group.notebooks.id - source_security_group_id = aws_security_group.arango_service.id + source_security_group_id = aws_security_group.datasets.id type = "egress" from_port = local.arango_container_port @@ -2163,6 +2175,18 @@ resource "aws_security_group_rule" "arango_service_ingress_8529_arango_lb" { protocol = "tcp" } +resource "aws_security_group_rule" "arango_service_ingress_from_notebooks" { + description = "ingress-notebooks-arango" + + security_group_id = "${aws_security_group.arango_service.id}" + source_security_group_id = "${aws_security_group.notebooks.id}" + + type = "ingress" + from_port = "8529" + to_port = "8529" + protocol = "tcp" +} + resource "aws_security_group_rule" "arango_service_egress_8529_arango_lb" { description = "egress-arango-lb" @@ -2244,15 +2268,15 @@ resource "aws_security_group_rule" "arango-ec2-egress-all" { type = "egress" from_port = "0" - to_port = "65535" - protocol = "tcp" + to_port = "0" + protocol = "-1" } -resource "aws_security_group_rule" "arango-ec2-egress" { - description = "ingress-everything-to-everywhere" +resource "aws_security_group_rule" "arango-ec2-ingress-lb" { + description = "ingress-everything-to-ec2" security_group_id = aws_security_group.arango-ec2.id - cidr_blocks = ["0.0.0.0/0"] + source_security_group_id = aws_security_group.arango_lb.id type = "ingress" from_port = "80" @@ -2260,7 +2284,32 @@ resource "aws_security_group_rule" "arango-ec2-egress" { protocol = "tcp" } -resource "aws_security_group_rule" "arango-ec2-22" { +resource "aws_security_group_rule" "arango8529-ec2-ingress-lb" { + description = "ingress-arango8529-to-ec2" + + security_group_id = aws_security_group.arango-ec2.id + cidr_blocks = ["0.0.0.0/0"] + + type = "ingress" + from_port = "8529" + to_port = "8529" + protocol = "tcp" +} + + +resource "aws_security_group_rule" "arango-ec2-ingress-arango-lb" { + description = "ingress-everything-to-arango-lb" + + security_group_id = aws_security_group.arango-ec2.id + source_security_group_id = aws_security_group.arango_lb.id + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango-ingress-ec2-22" { description = "ingress_ec2_instance" security_group_id = aws_security_group.arango-ec2.id @@ -2276,7 +2325,7 @@ resource "aws_security_group_rule" "arango-ec2-ingress-ecs-agent" { description = "ingress-ec2-agent" security_group_id = aws_security_group.arango-ec2.id - cidr_blocks = ["0.0.0.0/0"] + source_security_group_id = aws_security_group.arango_service.id type = "ingress" from_port = "443" From b625a35dcc0876626e93b44f53c98e48e650ee57 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Thu, 11 Apr 2024 08:38:10 +0100 Subject: [PATCH 33/42] fix: enable dns support and hostnames to true to support connection to Theia --- infra/vpc.tf | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/infra/vpc.tf b/infra/vpc.tf index 32dd371..93c863a 100644 --- a/infra/vpc.tf +++ b/infra/vpc.tf @@ -19,8 +19,8 @@ resource "aws_vpc_peering_connection" "jupyterhub" { resource "aws_vpc" "notebooks" { cidr_block = var.vpc_notebooks_cidr - enable_dns_support = false - enable_dns_hostnames = false + enable_dns_support = true + enable_dns_hostnames = true tags = { Name = "${var.prefix}-notebooks" @@ -359,11 +359,11 @@ resource "aws_vpc_peering_connection" "datasets_to_notebooks" { auto_accept = true accepter { - allow_remote_vpc_dns_resolution = false + allow_remote_vpc_dns_resolution = true } requester { - allow_remote_vpc_dns_resolution = false + allow_remote_vpc_dns_resolution = true } tags = { @@ -414,7 +414,6 @@ resource "aws_route" "pcx_datasets_to_private_without_egress" { vpc_peering_connection_id = aws_vpc_peering_connection.datasets_to_notebooks.id } - resource "aws_subnet" "datasets" { count = length(var.aws_availability_zones) vpc_id = aws_vpc.datasets.id @@ -440,7 +439,7 @@ resource "aws_route_table_association" "datasets" { resource "aws_subnet" "datasets_quicksight" { vpc_id = aws_vpc.datasets.id cidr_block = var.quicksight_cidr_block - + availability_zone = var.quicksight_subnet_availability_zone tags = { From 5376c21fb66459c4d233180a1f36cc429de3fbd6 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Thu, 11 Apr 2024 13:53:18 +0100 Subject: [PATCH 34/42] feat: add mount points for volume --- infra/ecs_main_arango_container_definitions.json | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/infra/ecs_main_arango_container_definitions.json b/infra/ecs_main_arango_container_definitions.json index f321c58..df1ae9b 100644 --- a/infra/ecs_main_arango_container_definitions.json +++ b/infra/ecs_main_arango_container_definitions.json @@ -17,6 +17,10 @@ "awslogs-stream-prefix": "${container_name}" } }, - "environment": [] + "environment": [], + "mountPoints" : [{ + "containerPath" : "/data/", + "sourceVolume" : "data-arango" + }] } ] \ No newline at end of file From 4a3b62d1f58ff7ea3f70c23efaee6d29cec6c8f0 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Thu, 11 Apr 2024 13:53:57 +0100 Subject: [PATCH 35/42] feat: mount EBS volume to EC2 instance --- infra/ecs_main_arango_user_data.sh | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/infra/ecs_main_arango_user_data.sh b/infra/ecs_main_arango_user_data.sh index 5d1b1c5..c51dad8 100644 --- a/infra/ecs_main_arango_user_data.sh +++ b/infra/ecs_main_arango_user_data.sh @@ -1,13 +1,25 @@ #!/bin/bash # install and start ecs agent +EC2_INSTANCE_ID=$(ec2-metadata --instance-id | sed 's/instance-id: //') +aws ec2 attach-volume --volume-id vol-0496244e683c15b0a --instance-id ${EC2_INSTANCE_ID} --device /dev/xvdf --region eu-west-2 +# Follow symlinks to find the real device +device=$(sudo readlink -f /dev/xvdf) + # Wait for the drive to be attached +while [ ! -e $device ] ; do sleep 1 ; done +# Format /dev/sdh if it does not contain a partition yet +if [ "$(sudo file -b -s $device)" == "data" ]; then +sudo mkfs -t ext4 $device +fi +# Mount the drive +sudo mkdir -p /data +sudo mount $device /data + mkdir /etc/ecs/ echo "ECS_CLUSTER=data-workspace-dev-a" >> /etc/ecs/ecs.config -sudo yum update -y ecs-init +sudo yum install -y ecs-init sudo systemctl restart ecs -#echo "ECS_ENABLE_AWSLOGS_EXECUTIONROLE_OVERRIDE=true" >> /etc/ecs/ecs.config -# install the REX-Ray Docker volume plugin -#docker plugin install rexray/ebs "REXRAY_PREEMPT=true" "EBS_REGION=eu-west-2" --grant-all-permission -# restart the ECS agent. This ensures the plugin is active and recognized once the agent starts. -#sudo yum update -y ecs-init -#sudo systemctl restart ecs \ No newline at end of file + + + + From 1e23406f39f826c47bc1528b502cfcff6a229252 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Thu, 11 Apr 2024 13:54:50 +0100 Subject: [PATCH 36/42] feat: add EBS volume --- infra/ecs_main_arango.tf | 48 +++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf index 648d29b..d80cce8 100644 --- a/infra/ecs_main_arango.tf +++ b/infra/ecs_main_arango.tf @@ -69,6 +69,10 @@ resource "aws_autoscaling_group" "arango_service" { } } +data "aws_autoscaling_groups" "arango_asgs" { + names = ["${aws_autoscaling_group.arango_service.name}"] +} + resource "aws_launch_template" "arango_service" { name_prefix = "${var.prefix}-arango-service-" image_id = "ami-0c618421e207909d0" @@ -100,9 +104,14 @@ data "template_file" "ecs_config_template" { vars = { ECS_CLUSTER = "${aws_ecs_cluster.main_cluster.name}" EBS_REGION = "${data.aws_region.aws_region.name}" + EBS_VOLUME_ID = "${aws_ebs_volume.arango.id}" } } +output "rendered" { + value = "${data.template_file.ecs_config_template.rendered}" +} + resource "aws_ecs_capacity_provider" "arango_capacity_provider" { name = "${var.prefix}-arango_service" @@ -138,6 +147,12 @@ resource "aws_ecs_task_definition" "arango_service" { memory = "${local.arango_container_memory}" requires_compatibilities = ["EC2"] + volume { + name = "data-arango" + host_path = "/data/" + } + + lifecycle { ignore_changes = [ "revision", @@ -158,6 +173,21 @@ data "template_file" "arango_service_container_definitions" { } } +resource "aws_ebs_volume" "arango" { + availability_zone = var.aws_availability_zones[0] + size = 20 + type = "gp3" + encrypted = true + + lifecycle { + prevent_destroy = false + } + + tags = { + Name = "${var.prefix}-arango" + } +} + resource "aws_cloudwatch_log_group" "arango" { name = "${var.prefix}-arango" retention_in_days = "3653" @@ -338,15 +368,13 @@ resource "aws_iam_instance_profile" "arango_ec2" { resource "aws_lb" "arango" { name = "${var.prefix}-arango" - load_balancer_type = "network" + load_balancer_type = "application" security_groups = ["${aws_security_group.arango_lb.id}"] - enable_deletion_protection = false + enable_deletion_protection = true + internal = true + subnets = aws_subnet.datasets.*.id timeouts {} - subnet_mapping { - subnet_id = "${aws_subnet.datasets.*.id[0]}" - } - tags = { name = "arango-to-notebook-lb" } @@ -355,7 +383,7 @@ resource "aws_lb" "arango" { resource "aws_lb_listener" "arango" { load_balancer_arn = "${aws_lb.arango.arn}" port = "8529" - protocol = "TCP" + protocol = "HTTP" default_action { target_group_arn = "${aws_lb_target_group.arango.id}" @@ -368,13 +396,13 @@ resource "aws_lb_target_group" "arango" { port = "8529" vpc_id = "${aws_vpc.datasets.id}" target_type = "ip" - protocol = "TCP" - preserve_client_ip = true + protocol = "HTTP" health_check { - protocol = "TCP" + protocol = "HTTP" interval = 10 healthy_threshold = 2 unhealthy_threshold = 2 + path = "/_db/_system/_admin/aardvark/index.html" } } \ No newline at end of file From cd05927a79a11b88fac8b6e7967e0ac082bdc517 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Thu, 11 Apr 2024 14:40:23 +0100 Subject: [PATCH 37/42] fix: address linting error --- infra/environment-template/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/environment-template/main.tf b/infra/environment-template/main.tf index f63df0d..0fb6b93 100644 --- a/infra/environment-template/main.tf +++ b/infra/environment-template/main.tf @@ -154,7 +154,7 @@ module "jupyterhub" { "172.18.6.128/25", "172.18.7.0/25", ] - ] + dataset_subnets_availability_zones = [ "eu-west-2a", "eu-west-2b", From fc3020503fa86da31c9a17939e9f2cff4c444d2e Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Tue, 16 Apr 2024 20:50:12 +0100 Subject: [PATCH 38/42] fix: adjust arango CPU requirement --- infra/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/main.tf b/infra/main.tf index 8f5d11f..2a27be4 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -268,7 +268,7 @@ locals { flower_container_cpu = 1024 arango_container_memory = 8192 - arango_container_cpu = 4096 + arango_container_cpu = 2048 arango_container_port = 8529 mlflow_container_memory = 8192 From 4e9fa6ec3f69c2bdb771299dba6dddc7f7945074 Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Tue, 16 Apr 2024 21:11:00 +0100 Subject: [PATCH 39/42] fix: minor amendments to Arango environment variables to tie in with terraform fix: formatting changes to pass linting checks --- infra/ecs_main_admin.tf | 6 +- infra/ecs_main_arango.tf | 150 ++++++++++++++--------------- infra/environment-template/main.tf | 25 ++--- infra/security_groups.tf | 28 +++--- infra/vpc.tf | 8 +- 5 files changed, 98 insertions(+), 119 deletions(-) diff --git a/infra/ecs_main_admin.tf b/infra/ecs_main_admin.tf index 1c6585f..7c12cd3 100644 --- a/infra/ecs_main_admin.tf +++ b/infra/ecs_main_admin.tf @@ -26,9 +26,9 @@ locals { authbroker_url = "${var.admin_authbroker_url}" secret_key = "${random_string.admin_secret_key.result}" - arango_db__host = "${aws_ecs_service.arango.address}" - arango_db__password = "${random_string.aws_arangodb_root_password.result}" - arango_db__port = "${aws_ecs_service.arango.port}" + arango_db__host = "http://${aws_lb.arango.dns_name}" + arango_db__password = "${random_string.aws_arangodb_root_password.result}" + arango_db__port = "${local.arango_container_port}" environment = "${var.admin_environment}" diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf index faeb038..e010158 100644 --- a/infra/ecs_main_arango.tf +++ b/infra/ecs_main_arango.tf @@ -1,22 +1,22 @@ resource "aws_ecs_service" "arango" { name = "${var.prefix}-arango" - cluster = "${aws_ecs_cluster.main_cluster.id}" - task_definition = "${aws_ecs_task_definition.arango_service.arn}" + cluster = aws_ecs_cluster.main_cluster.id + task_definition = aws_ecs_task_definition.arango_service.arn desired_count = 1 capacity_provider_strategy { - capacity_provider = aws_ecs_capacity_provider.arango_capacity_provider.name - weight = 100 - base = 1 - } + capacity_provider = aws_ecs_capacity_provider.arango_capacity_provider.name + weight = 100 + base = 1 + } network_configuration { - subnets = ["${aws_subnet.datasets.*.id[0]}"] - security_groups = ["${aws_security_group.arango_service.id}"] + subnets = [aws_subnet.datasets.*.id[0]] + security_groups = [aws_security_group.arango_service.id] } load_balancer { - target_group_arn = "${aws_lb_target_group.arango.arn}" + target_group_arn = aws_lb_target_group.arango.arn container_port = "8529" container_name = "arango" } @@ -54,8 +54,8 @@ resource "aws_autoscaling_group" "arango_service" { vpc_zone_identifier = ["${aws_subnet.datasets.*.id[0]}"] launch_template { - id = aws_launch_template.arango_service.id - version = "$Latest" + id = aws_launch_template.arango_service.id + version = "$Latest" } tag { @@ -74,25 +74,24 @@ data "aws_autoscaling_groups" "arango_asgs" { } resource "aws_launch_template" "arango_service" { - name_prefix = "${var.prefix}-arango-service-" - image_id = "ami-0c618421e207909d0" - instance_type = "t2.xlarge" - key_name = "${aws_key_pair.shared.key_name}" + name_prefix = "${var.prefix}-arango-service-" + image_id = "ami-0c618421e207909d0" + instance_type = "t2.xlarge" + key_name = aws_key_pair.shared.key_name metadata_options { - http_tokens = "required" + http_tokens = "required" } network_interfaces { - security_groups = ["${aws_security_group.arango-ec2.id}"] + security_groups = [aws_security_group.arango-ec2.id] subnet_id = aws_subnet.datasets.*.id[0] - } - + } iam_instance_profile { - name = "${aws_iam_instance_profile.arango_ec2.name}" - } + name = aws_iam_instance_profile.arango_ec2.name + } - user_data = "${data.template_file.ecs_config_template.rendered}" + user_data = data.template_file.ecs_config_template.rendered lifecycle { create_before_destroy = true @@ -100,51 +99,47 @@ resource "aws_launch_template" "arango_service" { } data "template_file" "ecs_config_template" { - template = "${filebase64("${path.module}/ecs_main_arango_user_data.sh")}" - vars = { - ECS_CLUSTER = "${aws_ecs_cluster.main_cluster.name}" - EBS_REGION = "${data.aws_region.aws_region.name}" + template = filebase64("${path.module}/ecs_main_arango_user_data.sh") + vars = { + ECS_CLUSTER = "${aws_ecs_cluster.main_cluster.name}" + EBS_REGION = "${data.aws_region.aws_region.name}" EBS_VOLUME_ID = "${aws_ebs_volume.arango.id}" } - } +} -output "rendered" { - value = "${data.template_file.ecs_config_template.rendered}" +output "rendered" { + value = data.template_file.ecs_config_template.rendered } resource "aws_ecs_capacity_provider" "arango_capacity_provider" { - name = "${var.prefix}-arango_service" - - auto_scaling_group_provider { - auto_scaling_group_arn = aws_autoscaling_group.arango_service.arn - - managed_scaling { - maximum_scaling_step_size = 1000 - minimum_scaling_step_size = 1 - status = "ENABLED" - target_capacity = 3 - } - } + name = "${var.prefix}-arango_service" + auto_scaling_group_provider { + auto_scaling_group_arn = aws_autoscaling_group.arango_service.arn + managed_scaling { + maximum_scaling_step_size = 1000 + minimum_scaling_step_size = 1 + status = "ENABLED" + target_capacity = 3 + } + } } resource "aws_ecs_cluster_capacity_providers" "arango" { - cluster_name = aws_ecs_cluster.main_cluster.name - - capacity_providers = [aws_ecs_capacity_provider.arango_capacity_provider.name] - - default_capacity_provider_strategy { - capacity_provider = aws_ecs_capacity_provider.arango_capacity_provider.name - } + cluster_name = aws_ecs_cluster.main_cluster.name + capacity_providers = [aws_ecs_capacity_provider.arango_capacity_provider.name] + default_capacity_provider_strategy { + capacity_provider = aws_ecs_capacity_provider.arango_capacity_provider.name + } } resource "aws_ecs_task_definition" "arango_service" { family = "${var.prefix}-arango" - container_definitions = "${data.template_file.arango_service_container_definitions.rendered}" - execution_role_arn = "${aws_iam_role.arango_task_execution.arn}" - task_role_arn = "${aws_iam_role.arango_task.arn}" + container_definitions = data.template_file.arango_service_container_definitions.rendered + execution_role_arn = aws_iam_role.arango_task_execution.arn + task_role_arn = aws_iam_role.arango_task.arn network_mode = "awsvpc" - cpu = "${local.arango_container_cpu}" - memory = "${local.arango_container_memory}" + cpu = local.arango_container_cpu + memory = local.arango_container_memory requires_compatibilities = ["EC2"] volume { @@ -152,7 +147,6 @@ resource "aws_ecs_task_definition" "arango_service" { host_path = "/data/" } - lifecycle { ignore_changes = [ "revision", @@ -161,7 +155,7 @@ resource "aws_ecs_task_definition" "arango_service" { } data "template_file" "arango_service_container_definitions" { - template = "${file("${path.module}/ecs_main_arango_container_definitions.json")}" + template = file("${path.module}/ecs_main_arango_container_definitions.json") vars = { container_image = "339713044404.dkr.ecr.eu-west-2.amazonaws.com/data-workspace-dev-a-arango:latest" @@ -197,7 +191,7 @@ resource "aws_cloudwatch_log_group" "arango" { resource "aws_iam_role" "arango_task_execution" { name = "${var.prefix}-arango-task-execution" path = "/" - assume_role_policy = "${data.aws_iam_policy_document.arango_task_execution_ecs_tasks_assume_role.json}" + assume_role_policy = data.aws_iam_policy_document.arango_task_execution_ecs_tasks_assume_role.json } data "aws_iam_policy_document" "arango_task_execution_ecs_tasks_assume_role" { @@ -212,14 +206,14 @@ data "aws_iam_policy_document" "arango_task_execution_ecs_tasks_assume_role" { } resource "aws_iam_role_policy_attachment" "arango_task_execution" { - role = "${aws_iam_role.arango_task_execution.name}" - policy_arn = "${aws_iam_policy.arango_task_execution.arn}" + role = aws_iam_role.arango_task_execution.name + policy_arn = aws_iam_policy.arango_task_execution.arn } resource "aws_iam_policy" "arango_task_execution" { name = "${var.prefix}-arango-task-execution" path = "/" - policy = "${data.aws_iam_policy_document.arango_task_execution.json}" + policy = data.aws_iam_policy_document.arango_task_execution.json } data "aws_iam_policy_document" "arango_task_execution" { @@ -259,7 +253,7 @@ data "aws_iam_policy_document" "arango_task_execution" { resource "aws_iam_role" "arango_task" { name = "${var.prefix}-arango-task" path = "/" - assume_role_policy = "${data.aws_iam_policy_document.arango_task_ecs_tasks_assume_role.json}" + assume_role_policy = data.aws_iam_policy_document.arango_task_ecs_tasks_assume_role.json } data "aws_iam_policy_document" "arango_task_ecs_tasks_assume_role" { @@ -318,8 +312,8 @@ resource "aws_iam_role_policy_attachment" "arango_ec2" { } resource "aws_iam_role_policy_attachment" "arango_ec2_ssm" { - role = aws_iam_role.arango_ec2.name - policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" + role = aws_iam_role.arango_ec2.name + policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" } data "aws_iam_policy_document" "arango_ebs" { @@ -358,53 +352,51 @@ resource "aws_iam_policy" "arango_ebs" { } resource "aws_iam_role_policy_attachment" "arango_ec2_ebs" { - role = aws_iam_role.arango_ec2.name - policy_arn = aws_iam_policy.arango_ebs.arn + role = aws_iam_role.arango_ec2.name + policy_arn = aws_iam_policy.arango_ebs.arn } resource "aws_iam_instance_profile" "arango_ec2" { - name = "${var.prefix}-arango-ec2" - role = aws_iam_role.arango_ec2.id + name = "${var.prefix}-arango-ec2" + role = aws_iam_role.arango_ec2.id } resource "aws_lb" "arango" { - name = "${var.prefix}-arango" - load_balancer_type = "application" - security_groups = ["${aws_security_group.arango_lb.id}"] + name = "${var.prefix}-arango" + load_balancer_type = "application" + security_groups = [aws_security_group.arango_lb.id] enable_deletion_protection = true internal = true - subnets = aws_subnet.datasets.*.id - timeouts {} - + subnets = aws_subnet.datasets.*.id tags = { name = "arango-to-notebook-lb" } } resource "aws_lb_listener" "arango" { - load_balancer_arn = "${aws_lb.arango.arn}" + load_balancer_arn = aws_lb.arango.arn port = "8529" protocol = "HTTP" default_action { - target_group_arn = "${aws_lb_target_group.arango.id}" + target_group_arn = aws_lb_target_group.arango.id type = "forward" } } resource "aws_lb_target_group" "arango" { - name = "${var.prefix}-arango" + name = "${var.prefix}-arango" port = "8529" - vpc_id = "${aws_vpc.datasets.id}" + vpc_id = aws_vpc.datasets.id target_type = "ip" protocol = "HTTP" health_check { - protocol = "HTTP" - interval = 10 + protocol = "HTTP" + interval = 10 healthy_threshold = 2 unhealthy_threshold = 2 - path = "/_db/_system/_admin/aardvark/index.html" + path = "/_db/_system/_admin/aardvark/index.html" } } diff --git a/infra/environment-template/main.tf b/infra/environment-template/main.tf index 0fb6b93..6f09811 100644 --- a/infra/environment-template/main.tf +++ b/infra/environment-template/main.tf @@ -142,25 +142,12 @@ module "jupyterhub" { datasets_rds_cluster_instance_performance_insights_enabled = "true" datasets_rds_cluster_instance_identifier = "REPLACE_ME" - paas_cidr_block = "10.0.0.0/16" - paas_vpc_id = "REPLACE_ME" - quicksight_cidr_block = "172.18.5.128/25" - quicksight_vpc_arn = "REPLACE_ME" - datasets_subnet_cidr_blocks = [ - "172.18.4.0/25", - "172.18.4.128/25", - "172.18.5.0/25", - "172.18.6.0/25", - "172.18.6.128/25", - "172.18.7.0/25", - ] - - dataset_subnets_availability_zones = [ - "eu-west-2a", - "eu-west-2b", - "eu-west-2b", - ] # The second and third subnet on the live environment are both in the same az - + paas_cidr_block = "10.0.0.0/16" + paas_vpc_id = "REPLACE_ME" + quicksight_cidr_block = "172.18.5.128/25" + quicksight_vpc_arn = "REPLACE_ME" + datasets_subnet_cidr_blocks = ["172.18.4.0/25", "172.18.4.128/25", "172.18.5.0/25", "172.18.6.0/25", "172.18.6.128/25", "172.18.7.0/25"] + dataset_subnets_availability_zones = ["eu-west-2a", "eu-west-2b", "eu-west-2b"] # The second and third subnet on the live environment are both in the same az quicksight_security_group_name = "jupyterhub-quicksight" quicksight_security_group_description = "Allow quicksight to connect to data workspace datasets DB" quicksight_subnet_availability_zone = "eu-west-2b" diff --git a/infra/security_groups.tf b/infra/security_groups.tf index 770740d..a488914 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -2003,7 +2003,7 @@ resource "aws_security_group_rule" "notebooks_egress_http_to_mlflow_service" { resource "aws_security_group" "arango_lb" { name = "${var.prefix}-arango_lb" description = "${var.prefix}-arango_lb" - vpc_id = "${aws_vpc.datasets.id}" + vpc_id = aws_vpc.datasets.id tags = { Name = "${var.prefix}-arango_lb" @@ -2018,7 +2018,7 @@ resource "aws_security_group_rule" "arango_lb_ingress_https_from_whitelist" { description = "ingress-https-from-whitelist" security_group_id = aws_security_group.arango_lb.id - cidr_blocks = "${var.ip_whitelist}" + cidr_blocks = var.ip_whitelist type = "ingress" from_port = "443" @@ -2101,7 +2101,7 @@ resource "aws_security_group_rule" "arango_lb_egress_https_to_cloudwatch" { resource "aws_security_group" "arango_service" { name = "${var.prefix}-arango" description = "${var.prefix}-arango" - vpc_id = "${aws_vpc.datasets.id}" + vpc_id = aws_vpc.datasets.id tags = { Name = "${var.prefix}-arango" @@ -2117,7 +2117,7 @@ resource "aws_security_group" "arango_service" { resource "aws_security_group_rule" "arango_egress_ecr_api" { description = "egress-https-to-ecr-api" - security_group_id = aws_security_group.ecr_api_datasets.id + security_group_id = aws_security_group.ecr_api_datasets.id source_security_group_id = aws_security_group.arango_service.id type = "egress" @@ -2166,8 +2166,8 @@ resource "aws_security_group_rule" "arango_service_ingress_http_arango_lb" { resource "aws_security_group_rule" "arango_service_ingress_8529_arango_lb" { description = "ingress-arango-lb" - security_group_id = "${aws_security_group.arango_service.id}" - source_security_group_id = "${aws_security_group.arango_lb.id}" + security_group_id = aws_security_group.arango_service.id + source_security_group_id = aws_security_group.arango_lb.id type = "ingress" from_port = "8529" @@ -2178,8 +2178,8 @@ resource "aws_security_group_rule" "arango_service_ingress_8529_arango_lb" { resource "aws_security_group_rule" "arango_service_ingress_from_notebooks" { description = "ingress-notebooks-arango" - security_group_id = "${aws_security_group.arango_service.id}" - source_security_group_id = "${aws_security_group.notebooks.id}" + security_group_id = aws_security_group.arango_service.id + source_security_group_id = aws_security_group.notebooks.id type = "ingress" from_port = "8529" @@ -2190,8 +2190,8 @@ resource "aws_security_group_rule" "arango_service_ingress_from_notebooks" { resource "aws_security_group_rule" "arango_service_egress_8529_arango_lb" { description = "egress-arango-lb" - security_group_id = "${aws_security_group.arango_service.id}" - source_security_group_id = "${aws_security_group.arango_lb.id}" + security_group_id = aws_security_group.arango_service.id + source_security_group_id = aws_security_group.arango_lb.id type = "egress" from_port = "8529" @@ -2275,7 +2275,7 @@ resource "aws_security_group_rule" "arango-ec2-egress-all" { resource "aws_security_group_rule" "arango-ec2-ingress-lb" { description = "ingress-everything-to-ec2" - security_group_id = aws_security_group.arango-ec2.id + security_group_id = aws_security_group.arango-ec2.id source_security_group_id = aws_security_group.arango_lb.id type = "ingress" @@ -2288,7 +2288,7 @@ resource "aws_security_group_rule" "arango8529-ec2-ingress-lb" { description = "ingress-arango8529-to-ec2" security_group_id = aws_security_group.arango-ec2.id - cidr_blocks = ["0.0.0.0/0"] + cidr_blocks = ["0.0.0.0/0"] type = "ingress" from_port = "8529" @@ -2300,7 +2300,7 @@ resource "aws_security_group_rule" "arango8529-ec2-ingress-lb" { resource "aws_security_group_rule" "arango-ec2-ingress-arango-lb" { description = "ingress-everything-to-arango-lb" - security_group_id = aws_security_group.arango-ec2.id + security_group_id = aws_security_group.arango-ec2.id source_security_group_id = aws_security_group.arango_lb.id type = "ingress" @@ -2324,7 +2324,7 @@ resource "aws_security_group_rule" "arango-ingress-ec2-22" { resource "aws_security_group_rule" "arango-ec2-ingress-ecs-agent" { description = "ingress-ec2-agent" - security_group_id = aws_security_group.arango-ec2.id + security_group_id = aws_security_group.arango-ec2.id source_security_group_id = aws_security_group.arango_service.id type = "ingress" diff --git a/infra/vpc.tf b/infra/vpc.tf index 93c863a..5ba1f61 100644 --- a/infra/vpc.tf +++ b/infra/vpc.tf @@ -136,9 +136,9 @@ data "aws_iam_policy_document" "vpc_main_flow_log" { } resource "aws_subnet" "public" { - count = length(var.aws_availability_zones) - vpc_id = aws_vpc.main.id - cidr_block = cidrsubnet(aws_vpc.main.cidr_block, var.subnets_num_bits, count.index) + count = length(var.aws_availability_zones) + vpc_id = aws_vpc.main.id + cidr_block = cidrsubnet(aws_vpc.main.cidr_block, var.subnets_num_bits, count.index) availability_zone = var.aws_availability_zones[count.index] tags = { @@ -460,7 +460,7 @@ resource "aws_route_table_association" "datasets_quicksight" { resource "aws_subnet" "public_datasets" { count = length(var.aws_availability_zones) vpc_id = aws_vpc.datasets.id - cidr_block = var.datasets_subnet_cidr_blocks[count.index+3] + cidr_block = var.datasets_subnet_cidr_blocks[count.index + 3] availability_zone = var.aws_availability_zones[count.index] From f472e89b4c5b6caac818abf07a1324a81e03899f Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Wed, 24 Apr 2024 19:32:50 +0100 Subject: [PATCH 40/42] fix: move hard-coded variables into main.tf --- infra/ecs_main_arango.tf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf index e010158..a06d4dc 100644 --- a/infra/ecs_main_arango.tf +++ b/infra/ecs_main_arango.tf @@ -75,8 +75,8 @@ data "aws_autoscaling_groups" "arango_asgs" { resource "aws_launch_template" "arango_service" { name_prefix = "${var.prefix}-arango-service-" - image_id = "ami-0c618421e207909d0" - instance_type = "t2.xlarge" + image_id = var.arango_image_id + instance_type = var.arango_instance_type key_name = aws_key_pair.shared.key_name metadata_options { @@ -158,7 +158,7 @@ data "template_file" "arango_service_container_definitions" { template = file("${path.module}/ecs_main_arango_container_definitions.json") vars = { - container_image = "339713044404.dkr.ecr.eu-west-2.amazonaws.com/data-workspace-dev-a-arango:latest" + container_image = "${aws_ecr_repository.arango.repository_url}:latest" container_name = "arango" log_group = "${aws_cloudwatch_log_group.arango.name}" log_region = "${data.aws_region.aws_region.name}" @@ -170,8 +170,8 @@ data "template_file" "arango_service_container_definitions" { resource "aws_ebs_volume" "arango" { availability_zone = var.aws_availability_zones[0] - size = 20 - type = "gp3" + size = var.arango_ebs_volume_size + type = var.arango_ebs_volume_type encrypted = true lifecycle { From bf7a784fb04afd423e8f8de3f340111abe88397b Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Wed, 24 Apr 2024 19:40:57 +0100 Subject: [PATCH 41/42] fix: restrict resources to which EBS IAM policy applies --- infra/ecs_main_arango.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf index a06d4dc..5c59a41 100644 --- a/infra/ecs_main_arango.tf +++ b/infra/ecs_main_arango.tf @@ -340,7 +340,7 @@ data "aws_iam_policy_document" "arango_ebs" { "ec2:DescribeTags" ] resources = [ - "*" + "${aws_ebs_volume.arango.arn}" ] } } From 07349403a00cd6111cf4d1038863defbeee32b7c Mon Sep 17 00:00:00 2001 From: Isobel Daley Date: Wed, 24 Apr 2024 19:43:34 +0100 Subject: [PATCH 42/42] fix: remove redundant security groups --- infra/security_groups.tf | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/infra/security_groups.tf b/infra/security_groups.tf index a488914..35f9298 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -436,18 +436,6 @@ resource "aws_security_group_rule" "notebooks_ingress_https_from_admin" { protocol = "tcp" } -resource "aws_security_group_rule" "notebooks_ingress_https_from_arango" { - description = "ingress-https-from-arango" - - security_group_id = aws_security_group.notebooks.id - source_security_group_id = aws_security_group.arango_service.id - - type = "ingress" - from_port = local.arango_container_port - to_port = local.arango_container_port - protocol = "tcp" -} - resource "aws_security_group_rule" "notebooks_ingress_https_from_arango_lb" { description = "ingress-https-from-arango-lb" @@ -2026,18 +2014,6 @@ resource "aws_security_group_rule" "arango_lb_ingress_https_from_whitelist" { protocol = "tcp" } -resource "aws_security_group_rule" "arangoo_service_egress_http_to_arango_lb" { - description = "egress-http-to-arango-lb" - - security_group_id = aws_security_group.arango_service.id - source_security_group_id = aws_security_group.arango_lb.id - - type = "egress" - from_port = "80" - to_port = "80" - protocol = "tcp" -} - resource "aws_security_group_rule" "arango_lb_egress_https_to_arango_service" { description = "egress-https-to-arango-service"