diff --git a/infra/ecr.tf b/infra/ecr.tf index 8ebfccd..0319c98 100644 --- a/infra/ecr.tf +++ b/infra/ecr.tf @@ -94,6 +94,10 @@ resource "aws_ecr_repository" "mlflow" { name = "${var.prefix}-mlflow" } +resource "aws_ecr_repository" "arango" { + name = "${var.prefix}-arango" +} + resource "aws_vpc_endpoint" "ecr_dkr" { vpc_id = aws_vpc.main.id service_name = "com.amazonaws.${data.aws_region.aws_region.name}.ecr.dkr" @@ -122,6 +126,34 @@ resource "aws_vpc_endpoint" "ecr_api" { timeouts {} } +resource "aws_vpc_endpoint" "ecr_dkr_datasets" { + vpc_id = aws_vpc.datasets.id + service_name = "com.amazonaws.${data.aws_region.aws_region.name}.ecr.dkr" + vpc_endpoint_type = "Interface" + private_dns_enabled = true + + security_group_ids = ["${aws_security_group.ecr_dkr_datasets.id}"] + subnet_ids = ["${aws_subnet.datasets.*.id[0]}"] + + policy = data.aws_iam_policy_document.aws_vpc_endpoint_ecr.json + + timeouts {} +} + +resource "aws_vpc_endpoint" "ecr_api_datasets" { + vpc_id = aws_vpc.datasets.id + service_name = "com.amazonaws.${data.aws_region.aws_region.name}.ecr.api" + vpc_endpoint_type = "Interface" + private_dns_enabled = true + + security_group_ids = ["${aws_security_group.ecr_api_datasets.id}"] + subnet_ids = ["${aws_subnet.datasets.*.id[0]}"] + + policy = data.aws_iam_policy_document.aws_vpc_endpoint_ecr.json + + timeouts {} +} + data "aws_iam_policy_document" "aws_vpc_endpoint_ecr" { # Contains policies for both ECR and DKR endpoints, as recommended @@ -271,6 +303,7 @@ data "aws_iam_policy_document" "aws_vpc_endpoint_ecr" { "${aws_ecr_repository.superset.arn}", "${aws_ecr_repository.flower.arn}", "${aws_ecr_repository.mlflow.arn}", + "${aws_ecr_repository.arango.arn}" ] } diff --git a/infra/ecs_main_admin.tf b/infra/ecs_main_admin.tf index f16e036..7c12cd3 100644 --- a/infra/ecs_main_admin.tf +++ b/infra/ecs_main_admin.tf @@ -26,6 +26,10 @@ locals { authbroker_url = "${var.admin_authbroker_url}" secret_key = "${random_string.admin_secret_key.result}" + arango_db__host = "http://${aws_lb.arango.dns_name}" + arango_db__password = "${random_string.aws_arangodb_root_password.result}" + arango_db__port = "${local.arango_container_port}" + environment = "${var.admin_environment}" uploads_bucket = "${var.uploads_bucket}" diff --git a/infra/ecs_main_admin_container_definitions.json b/infra/ecs_main_admin_container_definitions.json index 4cb0a5e..e5a33b5 100644 --- a/infra/ecs_main_admin_container_definitions.json +++ b/infra/ecs_main_admin_container_definitions.json @@ -55,6 +55,22 @@ "name": "EXPLORER_DEFAULT_CONNECTION", "value": "datasets_1" }, + { + "name": "ARANGO_DB__HOST", + "value": "${arango_db__host}" + }, + { + "name": "ARANGO_DB__PORT", + "value": "${arango_db__port}" + }, + { + "name": "ARANGO_DB__USER", + "value": "root" + }, + { + "name": "ARANGO_DB__PASSWORD", + "value": "${arango_db__password}" + }, { "name": "ALLOWED_HOSTS__1", "value": "${root_domain}" diff --git a/infra/ecs_main_arango.tf b/infra/ecs_main_arango.tf new file mode 100644 index 0000000..5c59a41 --- /dev/null +++ b/infra/ecs_main_arango.tf @@ -0,0 +1,410 @@ +resource "aws_ecs_service" "arango" { + name = "${var.prefix}-arango" + cluster = aws_ecs_cluster.main_cluster.id + task_definition = aws_ecs_task_definition.arango_service.arn + desired_count = 1 + + capacity_provider_strategy { + capacity_provider = aws_ecs_capacity_provider.arango_capacity_provider.name + weight = 100 + base = 1 + } + + network_configuration { + subnets = [aws_subnet.datasets.*.id[0]] + security_groups = [aws_security_group.arango_service.id] + } + + load_balancer { + target_group_arn = aws_lb_target_group.arango.arn + container_port = "8529" + container_name = "arango" + } + + service_registries { + registry_arn = aws_service_discovery_service.arango.arn + } + + depends_on = [ + # The target group must have been associated with the listener first + "aws_lb_listener.arango", + "aws_autoscaling_group.arango_service" + ] +} + +resource "aws_service_discovery_service" "arango" { + name = "${var.prefix}-arango" + + dns_config { + namespace_id = aws_service_discovery_private_dns_namespace.jupyterhub.id + dns_records { + ttl = 10 + type = "A" + } + } +} + +resource "aws_autoscaling_group" "arango_service" { + name_prefix = "${var.prefix}-arango" + max_size = 2 + min_size = 1 + desired_capacity = 2 + health_check_grace_period = 120 + health_check_type = "EC2" + vpc_zone_identifier = ["${aws_subnet.datasets.*.id[0]}"] + + launch_template { + id = aws_launch_template.arango_service.id + version = "$Latest" + } + + tag { + key = "Name" + value = "${var.prefix}-arango-service" + propagate_at_launch = true + } + + lifecycle { + create_before_destroy = true + } +} + +data "aws_autoscaling_groups" "arango_asgs" { + names = ["${aws_autoscaling_group.arango_service.name}"] +} + +resource "aws_launch_template" "arango_service" { + name_prefix = "${var.prefix}-arango-service-" + image_id = var.arango_image_id + instance_type = var.arango_instance_type + key_name = aws_key_pair.shared.key_name + + metadata_options { + http_tokens = "required" + } + + network_interfaces { + security_groups = [aws_security_group.arango-ec2.id] + subnet_id = aws_subnet.datasets.*.id[0] + } + iam_instance_profile { + name = aws_iam_instance_profile.arango_ec2.name + } + + user_data = data.template_file.ecs_config_template.rendered + + lifecycle { + create_before_destroy = true + } +} + +data "template_file" "ecs_config_template" { + template = filebase64("${path.module}/ecs_main_arango_user_data.sh") + vars = { + ECS_CLUSTER = "${aws_ecs_cluster.main_cluster.name}" + EBS_REGION = "${data.aws_region.aws_region.name}" + EBS_VOLUME_ID = "${aws_ebs_volume.arango.id}" + } +} + +output "rendered" { + value = data.template_file.ecs_config_template.rendered +} + +resource "aws_ecs_capacity_provider" "arango_capacity_provider" { + name = "${var.prefix}-arango_service" + auto_scaling_group_provider { + auto_scaling_group_arn = aws_autoscaling_group.arango_service.arn + managed_scaling { + maximum_scaling_step_size = 1000 + minimum_scaling_step_size = 1 + status = "ENABLED" + target_capacity = 3 + } + } +} + +resource "aws_ecs_cluster_capacity_providers" "arango" { + cluster_name = aws_ecs_cluster.main_cluster.name + capacity_providers = [aws_ecs_capacity_provider.arango_capacity_provider.name] + default_capacity_provider_strategy { + capacity_provider = aws_ecs_capacity_provider.arango_capacity_provider.name + } +} + +resource "aws_ecs_task_definition" "arango_service" { + family = "${var.prefix}-arango" + container_definitions = data.template_file.arango_service_container_definitions.rendered + execution_role_arn = aws_iam_role.arango_task_execution.arn + task_role_arn = aws_iam_role.arango_task.arn + network_mode = "awsvpc" + cpu = local.arango_container_cpu + memory = local.arango_container_memory + requires_compatibilities = ["EC2"] + + volume { + name = "data-arango" + host_path = "/data/" + } + + lifecycle { + ignore_changes = [ + "revision", + ] + } +} + +data "template_file" "arango_service_container_definitions" { + template = file("${path.module}/ecs_main_arango_container_definitions.json") + + vars = { + container_image = "${aws_ecr_repository.arango.repository_url}:latest" + container_name = "arango" + log_group = "${aws_cloudwatch_log_group.arango.name}" + log_region = "${data.aws_region.aws_region.name}" + cpu = "${local.arango_container_cpu}" + memory = "${local.arango_container_memory}" + root_password = "${random_string.aws_arangodb_root_password.result}" + } +} + +resource "aws_ebs_volume" "arango" { + availability_zone = var.aws_availability_zones[0] + size = var.arango_ebs_volume_size + type = var.arango_ebs_volume_type + encrypted = true + + lifecycle { + prevent_destroy = false + } + + tags = { + Name = "${var.prefix}-arango" + } +} + +resource "aws_cloudwatch_log_group" "arango" { + name = "${var.prefix}-arango" + retention_in_days = "3653" +} + +resource "aws_iam_role" "arango_task_execution" { + name = "${var.prefix}-arango-task-execution" + path = "/" + assume_role_policy = data.aws_iam_policy_document.arango_task_execution_ecs_tasks_assume_role.json +} + +data "aws_iam_policy_document" "arango_task_execution_ecs_tasks_assume_role" { + statement { + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ecs-tasks.amazonaws.com"] + } + } +} + +resource "aws_iam_role_policy_attachment" "arango_task_execution" { + role = aws_iam_role.arango_task_execution.name + policy_arn = aws_iam_policy.arango_task_execution.arn +} + +resource "aws_iam_policy" "arango_task_execution" { + name = "${var.prefix}-arango-task-execution" + path = "/" + policy = data.aws_iam_policy_document.arango_task_execution.json +} + +data "aws_iam_policy_document" "arango_task_execution" { + statement { + actions = [ + "logs:CreateLogStream", + "logs:PutLogEvents", + ] + + resources = [ + "${aws_cloudwatch_log_group.arango.arn}:*", + ] + } + + statement { + actions = [ + "ecr:BatchGetImage", + "ecr:GetDownloadUrlForLayer", + ] + + resources = [ + "${aws_ecr_repository.arango.arn}", + ] + } + + statement { + actions = [ + "ecr:GetAuthorizationToken", + ] + + resources = [ + "*", + ] + } +} + +resource "aws_iam_role" "arango_task" { + name = "${var.prefix}-arango-task" + path = "/" + assume_role_policy = data.aws_iam_policy_document.arango_task_ecs_tasks_assume_role.json +} + +data "aws_iam_policy_document" "arango_task_ecs_tasks_assume_role" { + statement { + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ecs-tasks.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "arango_ecs" { + name = "${var.prefix}-arango-ecs" + path = "/" + assume_role_policy = data.aws_iam_policy_document.arango_ecs_assume_role.json +} + +resource "aws_iam_role_policy_attachment" "arango_ecs" { + role = aws_iam_role.arango_ecs.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceRole" +} + +data "aws_iam_policy_document" "arango_ecs_assume_role" { + statement { + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ecs.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "arango_ec2" { + name = "${var.prefix}-arango-ec2" + assume_role_policy = data.aws_iam_policy_document.arango_ec2_assume_role.json +} + +data "aws_iam_policy_document" "arango_ec2_assume_role" { + + statement { + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ec2.amazonaws.com"] + } + } +} + +resource "aws_iam_role_policy_attachment" "arango_ec2" { + role = aws_iam_role.arango_ec2.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" +} + +resource "aws_iam_role_policy_attachment" "arango_ec2_ssm" { + role = aws_iam_role.arango_ec2.name + policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" +} + +data "aws_iam_policy_document" "arango_ebs" { + statement { + effect = "Allow" + actions = [ + "ec2:AttachVolume", + "ec2:CreateVolume", + "ec2:CreateSnapshot", + "ec2:CreateTags", + "ec2:DeleteVolume", + "ec2:DeleteSnapshot", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeInstances", + "ec2:DescribeVolumes", + "ec2:DescribeVolumeAttribute", + "ec2:DescribeVolumeStatus", + "ec2:DescribeSnapshots", + "ec2:CopySnapshot", + "ec2:DescribeSnapshotAttribute", + "ec2:DetachVolume", + "ec2:ModifySnapshotAttribute", + "ec2:ModifyVolumeAttribute", + "ec2:DescribeTags" + ] + resources = [ + "${aws_ebs_volume.arango.arn}" + ] + } +} + +resource "aws_iam_policy" "arango_ebs" { + name = "arango-ebs" + description = "enable-mounting-of-ebs-volume" + policy = data.aws_iam_policy_document.arango_ebs.json +} + +resource "aws_iam_role_policy_attachment" "arango_ec2_ebs" { + role = aws_iam_role.arango_ec2.name + policy_arn = aws_iam_policy.arango_ebs.arn +} + +resource "aws_iam_instance_profile" "arango_ec2" { + name = "${var.prefix}-arango-ec2" + role = aws_iam_role.arango_ec2.id +} + +resource "aws_lb" "arango" { + name = "${var.prefix}-arango" + load_balancer_type = "application" + security_groups = [aws_security_group.arango_lb.id] + enable_deletion_protection = true + internal = true + subnets = aws_subnet.datasets.*.id + tags = { + name = "arango-to-notebook-lb" + } +} + +resource "aws_lb_listener" "arango" { + load_balancer_arn = aws_lb.arango.arn + port = "8529" + protocol = "HTTP" + + default_action { + target_group_arn = aws_lb_target_group.arango.id + type = "forward" + } +} + +resource "aws_lb_target_group" "arango" { + name = "${var.prefix}-arango" + port = "8529" + vpc_id = aws_vpc.datasets.id + target_type = "ip" + protocol = "HTTP" + + health_check { + protocol = "HTTP" + interval = 10 + healthy_threshold = 2 + unhealthy_threshold = 2 + path = "/_db/_system/_admin/aardvark/index.html" + } +} + +resource "random_string" "aws_arangodb_root_password" { + length = 64 + special = false + + lifecycle { + ignore_changes = all + } +} \ No newline at end of file diff --git a/infra/ecs_main_arango_container_definitions.json b/infra/ecs_main_arango_container_definitions.json new file mode 100644 index 0000000..6b6bc65 --- /dev/null +++ b/infra/ecs_main_arango_container_definitions.json @@ -0,0 +1,31 @@ +[ + { + "name": "${container_name}", + "image": "${container_image}", + "memoryReservation": ${memory}, + "cpu": ${cpu}, + "essential": true, + "portMappings": [{ + "containerPort": 8529, + "protocol": "tcp" + }], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "${log_group}", + "awslogs-region": "${log_region}", + "awslogs-stream-prefix": "${container_name}" + } + }, + "environment": [ + { + "name": "ARANGO_ROOT_PASSWORD", + "value": "${root_password}" + } + ], + "mountPoints" : [{ + "containerPath" : "/data/", + "sourceVolume" : "data-arango" + }] + } + ] \ No newline at end of file diff --git a/infra/ecs_main_arango_user_data.sh b/infra/ecs_main_arango_user_data.sh new file mode 100644 index 0000000..c51dad8 --- /dev/null +++ b/infra/ecs_main_arango_user_data.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# install and start ecs agent + +EC2_INSTANCE_ID=$(ec2-metadata --instance-id | sed 's/instance-id: //') +aws ec2 attach-volume --volume-id vol-0496244e683c15b0a --instance-id ${EC2_INSTANCE_ID} --device /dev/xvdf --region eu-west-2 +# Follow symlinks to find the real device +device=$(sudo readlink -f /dev/xvdf) + # Wait for the drive to be attached +while [ ! -e $device ] ; do sleep 1 ; done +# Format /dev/sdh if it does not contain a partition yet +if [ "$(sudo file -b -s $device)" == "data" ]; then +sudo mkfs -t ext4 $device +fi +# Mount the drive +sudo mkdir -p /data +sudo mount $device /data + +mkdir /etc/ecs/ +echo "ECS_CLUSTER=data-workspace-dev-a" >> /etc/ecs/ecs.config +sudo yum install -y ecs-init +sudo systemctl restart ecs + + + + diff --git a/infra/environment-template/main.tf b/infra/environment-template/main.tf index 86e58c2..6f09811 100644 --- a/infra/environment-template/main.tf +++ b/infra/environment-template/main.tf @@ -53,6 +53,7 @@ module "jupyterhub" { vpc_notebooks_cidr = "172.17.0.0/16" vpc_notebooks_subnets_num_bits = "5" vpc_datasets_cidr = "172.18.4.0/22" + vpc_datasets_subnets_num_bits = "8" aws_availability_zones = ["eu-west-2a", "eu-west-2b", "eu-west-2c"] aws_availability_zones_short = ["a", "b", "c"] @@ -141,21 +142,12 @@ module "jupyterhub" { datasets_rds_cluster_instance_performance_insights_enabled = "true" datasets_rds_cluster_instance_identifier = "REPLACE_ME" - paas_cidr_block = "10.0.0.0/16" - paas_vpc_id = "REPLACE_ME" - quicksight_cidr_block = "172.18.5.128/25" - quicksight_vpc_arn = "REPLACE_ME" - datasets_subnet_cidr_blocks = [ - "172.18.4.0/25", - "172.18.4.128/25", - "172.18.5.0/25", - ] - dataset_subnets_availability_zones = [ - "eu-west-2a", - "eu-west-2b", - "eu-west-2b", - ] # The second and third subnet on the live environment are both in the same az - + paas_cidr_block = "10.0.0.0/16" + paas_vpc_id = "REPLACE_ME" + quicksight_cidr_block = "172.18.5.128/25" + quicksight_vpc_arn = "REPLACE_ME" + datasets_subnet_cidr_blocks = ["172.18.4.0/25", "172.18.4.128/25", "172.18.5.0/25", "172.18.6.0/25", "172.18.6.128/25", "172.18.7.0/25"] + dataset_subnets_availability_zones = ["eu-west-2a", "eu-west-2b", "eu-west-2b"] # The second and third subnet on the live environment are both in the same az quicksight_security_group_name = "jupyterhub-quicksight" quicksight_security_group_description = "Allow quicksight to connect to data workspace datasets DB" quicksight_subnet_availability_zone = "eu-west-2b" diff --git a/infra/main.tf b/infra/main.tf index 8283cd7..2a27be4 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -35,6 +35,7 @@ variable "subnets_num_bits" {} variable "vpc_notebooks_cidr" {} variable "vpc_notebooks_subnets_num_bits" {} variable "vpc_datasets_cidr" {} +variable "vpc_datasets_subnets_num_bits" {} variable "aws_route53_zone" {} variable "admin_domain" {} @@ -266,6 +267,10 @@ locals { flower_container_memory = 8192 flower_container_cpu = 1024 + arango_container_memory = 8192 + arango_container_cpu = 2048 + arango_container_port = 8529 + mlflow_container_memory = 8192 mlflow_container_cpu = 1024 mlflow_port = 8004 diff --git a/infra/route_53.tf b/infra/route_53.tf index 16e86ce..8ad0d45 100644 --- a/infra/route_53.tf +++ b/infra/route_53.tf @@ -190,6 +190,38 @@ resource "aws_acm_certificate_validation" "superset_internal" { certificate_arn = aws_acm_certificate.superset_internal[count.index].arn } +resource "aws_route53_record" "arango" { + provider = "aws.route53" + zone_id = data.aws_route53_zone.aws_route53_zone.zone_id + name = "arango" + type = "A" + + alias { + name = aws_lb.arango.dns_name + zone_id = aws_lb.arango.zone_id + evaluate_target_health = false + } + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_acm_certificate" "arango" { + domain_name = aws_route53_record.arango.name + validation_method = "DNS" + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_acm_certificate_validation" "arango" { + certificate_arn = aws_acm_certificate.arango.arn +} + + + # resource "aws_route53_record" "jupyterhub" { # zone_id = "${data.aws_route53_zone.aws_route53_zone.zone_id}" # name = "${var.jupyterhub_domain}." diff --git a/infra/security_groups.tf b/infra/security_groups.tf index 338c6c1..35f9298 100644 --- a/infra/security_groups.tf +++ b/infra/security_groups.tf @@ -436,6 +436,18 @@ resource "aws_security_group_rule" "notebooks_ingress_https_from_admin" { protocol = "tcp" } +resource "aws_security_group_rule" "notebooks_ingress_https_from_arango_lb" { + description = "ingress-https-from-arango-lb" + + security_group_id = aws_security_group.notebooks.id + source_security_group_id = aws_security_group.arango_lb.id + + type = "ingress" + from_port = local.arango_container_port + to_port = local.arango_container_port + protocol = "tcp" +} + resource "aws_security_group_rule" "notebooks_ingress_http_dev_from_admin" { description = "ingress-http-dev-from-jupytehub" @@ -521,6 +533,30 @@ resource "aws_security_group_rule" "notebooks_egress_postgres_to_datasets_db" { protocol = "tcp" } +resource "aws_security_group_rule" "notebooks_egress_arango_service" { + description = "egress-to-arango" + + security_group_id = aws_security_group.notebooks.id + source_security_group_id = aws_security_group.datasets.id + + type = "egress" + from_port = local.arango_container_port + to_port = local.arango_container_port + protocol = "tcp" +} + +resource "aws_security_group_rule" "notebooks_egress_arango_lb" { + description = "egress-to-arango-lb" + + security_group_id = aws_security_group.notebooks.id + source_security_group_id = aws_security_group.arango_lb.id + + type = "egress" + from_port = local.arango_container_port + to_port = local.arango_container_port + protocol = "tcp" +} + resource "aws_security_group" "cloudwatch" { name = "${var.prefix}-cloudwatch" @@ -564,6 +600,34 @@ resource "aws_security_group" "ecr_api" { } } +resource "aws_security_group" "ecr_dkr_datasets" { + name = "${var.prefix}-ecr-dkr-datasets" + description = "${var.prefix}-ecr-dkr-datasets" + vpc_id = aws_vpc.datasets.id + + tags = { + Name = "${var.prefix}-ecr-dkr-datasets" + } + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_security_group" "ecr_api_datasets" { + name = "${var.prefix}-ecr-api-datasets" + description = "${var.prefix}-ecr-api-datasets" + vpc_id = aws_vpc.datasets.id + + tags = { + Name = "${var.prefix}-ecr-api-datasets" + } + + lifecycle { + create_before_destroy = true + } +} + resource "aws_security_group_rule" "ecr_api_ingress_https_from_dns_rewrite_proxy" { description = "ingress-https-from-dns-rewrite-proxy" @@ -686,6 +750,18 @@ resource "aws_security_group_rule" "ecr_api_ingress_https_from_healthcheck" { protocol = "tcp" } +resource "aws_security_group_rule" "ecr_api_ingress_https_from_arango_proxy" { + description = "ingress-https-from-arango" + + security_group_id = aws_security_group.ecr_api_datasets.id + source_security_group_id = aws_security_group.arango_service.id + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + resource "aws_security_group_rule" "cloudwatch_ingress_https_from_all" { description = "ingress-https-from-everywhere" @@ -710,6 +786,18 @@ resource "aws_security_group_rule" "ecr_dkr_ingress_https_from_all" { protocol = "tcp" } +resource "aws_security_group_rule" "ecr_dkr_datasets_ingress_https_from_all" { + description = "ingress-https-from-everywhere" + + security_group_id = aws_security_group.ecr_dkr_datasets.id + cidr_blocks = ["0.0.0.0/0"] + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + resource "aws_security_group" "mirrors_sync" { name = "${var.prefix}-mirrors-sync" description = "${var.prefix}-mirrors-sync" @@ -1899,3 +1987,336 @@ resource "aws_security_group_rule" "notebooks_egress_http_to_mlflow_service" { to_port = local.mlflow_port protocol = "tcp" } + +resource "aws_security_group" "arango_lb" { + name = "${var.prefix}-arango_lb" + description = "${var.prefix}-arango_lb" + vpc_id = aws_vpc.datasets.id + + tags = { + Name = "${var.prefix}-arango_lb" + } + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_security_group_rule" "arango_lb_ingress_https_from_whitelist" { + description = "ingress-https-from-whitelist" + + security_group_id = aws_security_group.arango_lb.id + cidr_blocks = var.ip_whitelist + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango_lb_egress_https_to_arango_service" { + description = "egress-https-to-arango-service" + + security_group_id = aws_security_group.arango_lb.id + source_security_group_id = aws_security_group.arango_service.id + + type = "egress" + from_port = local.arango_container_port + to_port = local.arango_container_port + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango_lb_notebooks_ingress" { + description = "inbound peering connection with notebooks vpc" + + security_group_id = aws_security_group.arango_lb.id + cidr_blocks = ["${aws_vpc.notebooks.cidr_block}"] + + type = "ingress" + from_port = "0" + to_port = "0" + protocol = "-1" +} + +resource "aws_security_group_rule" "arango_lb_ingress_vpc" { + description = "inbound connection from vpc CIDR" + + security_group_id = aws_security_group.arango_lb.id + cidr_blocks = ["${aws_vpc.datasets.cidr_block}"] + + type = "ingress" + from_port = "8529" + to_port = "8529" + protocol = "-1" +} + +resource "aws_security_group_rule" "arango_lb_notebooks_egress" { + description = "allow outbound traffic" + + security_group_id = aws_security_group.arango_lb.id + cidr_blocks = ["0.0.0.0/0"] + + type = "egress" + from_port = "0" + to_port = "0" + protocol = "-1" +} + +resource "aws_security_group_rule" "arango_lb_egress_https_to_cloudwatch" { + description = "egress-https-to-cloudwatch" + + security_group_id = aws_security_group.arango_lb.id + source_security_group_id = aws_security_group.cloudwatch.id + + type = "egress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + +resource "aws_security_group" "arango_service" { + name = "${var.prefix}-arango" + description = "${var.prefix}-arango" + vpc_id = aws_vpc.datasets.id + + tags = { + Name = "${var.prefix}-arango" + } + + lifecycle { + create_before_destroy = true + } +} + +# Connections to ECR and CloudWatch. ECR needs S3, and its VPC endpoint type +# does not have an IP range or security group to limit access to +resource "aws_security_group_rule" "arango_egress_ecr_api" { + description = "egress-https-to-ecr-api" + + security_group_id = aws_security_group.ecr_api_datasets.id + source_security_group_id = aws_security_group.arango_service.id + + type = "egress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango_egress_https_all" { + description = "egress-https-to-all" + + security_group_id = aws_security_group.arango_service.id + cidr_blocks = ["0.0.0.0/0"] + + type = "egress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango_service_ec2" { + + security_group_id = aws_security_group.arango_service.id + cidr_blocks = ["0.0.0.0/0"] + + type = "ingress" + from_port = "80" + to_port = "80" + protocol = "tcp" +} + + + +resource "aws_security_group_rule" "arango_service_ingress_http_arango_lb" { + description = "ingress-arango-lb" + + security_group_id = aws_security_group.arango_service.id + source_security_group_id = aws_security_group.arango_lb.id + + type = "ingress" + from_port = "80" + to_port = "80" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango_service_ingress_8529_arango_lb" { + description = "ingress-arango-lb" + + security_group_id = aws_security_group.arango_service.id + source_security_group_id = aws_security_group.arango_lb.id + + type = "ingress" + from_port = "8529" + to_port = "8529" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango_service_ingress_from_notebooks" { + description = "ingress-notebooks-arango" + + security_group_id = aws_security_group.arango_service.id + source_security_group_id = aws_security_group.notebooks.id + + type = "ingress" + from_port = "8529" + to_port = "8529" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango_service_egress_8529_arango_lb" { + description = "egress-arango-lb" + + security_group_id = aws_security_group.arango_service.id + source_security_group_id = aws_security_group.arango_lb.id + + type = "egress" + from_port = "8529" + to_port = "8529" + protocol = "tcp" +} + +resource "aws_security_group_rule" "ecr_api_ingress_https_from_arango" { + description = "ingress-https-from-arango-service" + + security_group_id = aws_security_group.arango_service.id + source_security_group_id = aws_security_group.ecr_api_datasets.id + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango_ecs_ec2" { + description = "ingress_ec2_instance" + + security_group_id = aws_security_group.arango_service.id + cidr_blocks = ["0.0.0.0/0"] + + type = "ingress" + from_port = "22" + to_port = "22" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango-ecs-egress-all" { + + security_group_id = aws_security_group.arango_service.id + cidr_blocks = ["0.0.0.0/0"] + + type = "egress" + from_port = "0" + to_port = "65535" + protocol = "tcp" +} + +resource "aws_security_group" "arango-ec2" { + name = "${var.prefix}-arango-ec2" + description = "${var.prefix}-arango-ec2" + vpc_id = aws_vpc.datasets.id + + tags = { + Name = "${var.prefix}-arango-ec2" + } + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_security_group_rule" "arango-ec2-egress-ecs-agent" { + description = "egress-ec2-agent" + + security_group_id = aws_security_group.arango-ec2.id + cidr_blocks = ["0.0.0.0/0"] + + type = "egress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango-ec2-egress-all" { + description = "egress-everything-to-everywhere" + + security_group_id = aws_security_group.arango-ec2.id + cidr_blocks = ["0.0.0.0/0"] + + type = "egress" + from_port = "0" + to_port = "0" + protocol = "-1" +} + +resource "aws_security_group_rule" "arango-ec2-ingress-lb" { + description = "ingress-everything-to-ec2" + + security_group_id = aws_security_group.arango-ec2.id + source_security_group_id = aws_security_group.arango_lb.id + + type = "ingress" + from_port = "80" + to_port = "80" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango8529-ec2-ingress-lb" { + description = "ingress-arango8529-to-ec2" + + security_group_id = aws_security_group.arango-ec2.id + cidr_blocks = ["0.0.0.0/0"] + + type = "ingress" + from_port = "8529" + to_port = "8529" + protocol = "tcp" +} + + +resource "aws_security_group_rule" "arango-ec2-ingress-arango-lb" { + description = "ingress-everything-to-arango-lb" + + security_group_id = aws_security_group.arango-ec2.id + source_security_group_id = aws_security_group.arango_lb.id + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango-ingress-ec2-22" { + description = "ingress_ec2_instance" + + security_group_id = aws_security_group.arango-ec2.id + cidr_blocks = ["0.0.0.0/0"] + + type = "ingress" + from_port = "22" + to_port = "22" + protocol = "tcp" +} + +resource "aws_security_group_rule" "arango-ec2-ingress-ecs-agent" { + description = "ingress-ec2-agent" + + security_group_id = aws_security_group.arango-ec2.id + source_security_group_id = aws_security_group.arango_service.id + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} + +resource "aws_security_group_rule" "ecr_api_ingress_https_from_arango_ec2" { + description = "ingress-https-from-arango-ec2" + + security_group_id = aws_security_group.ecr_api_datasets.id + source_security_group_id = aws_security_group.arango-ec2.id + + type = "ingress" + from_port = "443" + to_port = "443" + protocol = "tcp" +} \ No newline at end of file diff --git a/infra/vpc.tf b/infra/vpc.tf index f4866da..5ba1f61 100644 --- a/infra/vpc.tf +++ b/infra/vpc.tf @@ -19,8 +19,8 @@ resource "aws_vpc_peering_connection" "jupyterhub" { resource "aws_vpc" "notebooks" { cidr_block = var.vpc_notebooks_cidr - enable_dns_support = false - enable_dns_hostnames = false + enable_dns_support = true + enable_dns_hostnames = true tags = { Name = "${var.prefix}-notebooks" @@ -136,10 +136,9 @@ data "aws_iam_policy_document" "vpc_main_flow_log" { } resource "aws_subnet" "public" { - count = length(var.aws_availability_zones) - vpc_id = aws_vpc.main.id - cidr_block = cidrsubnet(aws_vpc.main.cidr_block, var.subnets_num_bits, count.index) - + count = length(var.aws_availability_zones) + vpc_id = aws_vpc.main.id + cidr_block = cidrsubnet(aws_vpc.main.cidr_block, var.subnets_num_bits, count.index) availability_zone = var.aws_availability_zones[count.index] tags = { @@ -303,7 +302,7 @@ resource "aws_vpc" "datasets" { cidr_block = var.vpc_datasets_cidr enable_dns_support = true - enable_dns_hostnames = false + enable_dns_hostnames = true tags = { Name = "${var.prefix}-datasets" @@ -360,11 +359,11 @@ resource "aws_vpc_peering_connection" "datasets_to_notebooks" { auto_accept = true accepter { - allow_remote_vpc_dns_resolution = false + allow_remote_vpc_dns_resolution = true } requester { - allow_remote_vpc_dns_resolution = false + allow_remote_vpc_dns_resolution = true } tags = { @@ -415,7 +414,6 @@ resource "aws_route" "pcx_datasets_to_private_without_egress" { vpc_peering_connection_id = aws_vpc_peering_connection.datasets_to_notebooks.id } - resource "aws_subnet" "datasets" { count = length(var.aws_availability_zones) vpc_id = aws_vpc.datasets.id @@ -457,3 +455,71 @@ resource "aws_route_table_association" "datasets_quicksight" { subnet_id = aws_subnet.datasets_quicksight.id route_table_id = aws_route_table.datasets.id } + +# public subnet for datasets +resource "aws_subnet" "public_datasets" { + count = length(var.aws_availability_zones) + vpc_id = aws_vpc.datasets.id + cidr_block = var.datasets_subnet_cidr_blocks[count.index + 3] + + availability_zone = var.aws_availability_zones[count.index] + + tags = { + Name = "${var.prefix}-public-datasets-${var.aws_availability_zones_short[count.index]}" + } + + lifecycle { + create_before_destroy = true + } +} +# internet gateway for public subnet +resource "aws_internet_gateway" "main_datasets" { + vpc_id = aws_vpc.datasets.id + + tags = { + Name = "${var.prefix}" + } +} + +# elastic IP for NAT gateway +resource "aws_eip" "nat_gateway_datasets" { + vpc = true +} + +# NAT gateway in public datasets subnet +resource "aws_nat_gateway" "datasets" { + allocation_id = aws_eip.nat_gateway_datasets.id + subnet_id = aws_subnet.public_datasets.*.id[0] + + tags = { + Name = "${var.prefix}" + } +} + +# Route table for public datasets subnet +resource "aws_route_table" "public_datasets" { + vpc_id = aws_vpc.datasets.id + tags = { + Name = "${var.prefix}-public" + } +} + +resource "aws_route" "public_datasets_internet_gateway_ipv4" { + route_table_id = aws_route_table.public_datasets.id + destination_cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.main_datasets.id +} + +# associate public route table with public subnet +resource "aws_route_table_association" "public_datasets" { + count = length(var.aws_availability_zones) + subnet_id = aws_subnet.public_datasets.*.id[count.index] + route_table_id = aws_route_table.public_datasets.id +} + +# associate datasets private subnet with NAT gateway +resource "aws_route" "datasets_nat_gateway" { + route_table_id = aws_route_table.datasets.id + destination_cidr_block = "0.0.0.0/0" + gateway_id = aws_nat_gateway.datasets.id +} \ No newline at end of file