From 10e5bbbcf88f047105d5fd665d0ae36e12c4ef10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Fri, 31 Jan 2025 10:12:52 +0100 Subject: [PATCH 01/27] Add prometheus keys to access aws instances --- ansible/roles/prometheus/templates/prometheus.yml | 12 ++++++++++++ ansible/roles/prometheus/vars/main.yml | 3 +++ 2 files changed, 15 insertions(+) diff --git a/ansible/roles/prometheus/templates/prometheus.yml b/ansible/roles/prometheus/templates/prometheus.yml index c1f53654..5106725e 100755 --- a/ansible/roles/prometheus/templates/prometheus.yml +++ b/ansible/roles/prometheus/templates/prometheus.yml @@ -214,4 +214,16 @@ scrape_configs: static_configs: - targets: - backend-hel.ooni.org:444 + + # EC2 instances monitoring: + - job_name: 'ooni-api-ec2' + scrape_interval: 5s + scheme: https + metrics_path: "/metrics" + ec2_sd_configs: + - access_key: "{{prometheus_aws_access_key}}" + secret_key: "{{prometheus_aws_secret_key}}" + region: "eu-central-1" + port: 80 # should be the proxy + # relabel_configs: # Change the host to the proxy host with relabeling ... diff --git a/ansible/roles/prometheus/vars/main.yml b/ansible/roles/prometheus/vars/main.yml index d8774c47..81c321ee 100644 --- a/ansible/roles/prometheus/vars/main.yml +++ b/ansible/roles/prometheus/vars/main.yml @@ -150,3 +150,6 @@ blackbox_jobs: - name: icmp module: icmp targets: "{{ dom0_hosts | list }}" + +prometheus_aws_access_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/access_key', profile='oonidevops_user_dev') }}" +prometheus_aws_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/secret_key', profile='oonidevops_user_dev') }}" \ No newline at end of file From 3fa1281c0a0bf944e49066c80d0289435168d343 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Fri, 31 Jan 2025 10:31:41 +0100 Subject: [PATCH 02/27] Add initial version of proxy relabeling configs --- ansible/roles/prometheus/templates/prometheus.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ansible/roles/prometheus/templates/prometheus.yml b/ansible/roles/prometheus/templates/prometheus.yml index 5106725e..8d475362 100755 --- a/ansible/roles/prometheus/templates/prometheus.yml +++ b/ansible/roles/prometheus/templates/prometheus.yml @@ -225,5 +225,10 @@ scrape_configs: secret_key: "{{prometheus_aws_secret_key}}" region: "eu-central-1" port: 80 # should be the proxy - # relabel_configs: # Change the host to the proxy host with relabeling + relabel_configs: # Change the host to the proxy host with relabeling + - source_labels: [__address__] + regex: "([0-9\\.]+):([0-9]+)" # : + replacement: "proxy:${2}/${1}/metrics" + target_label: "proxy_host" + action: "replace" ... From 15c86e93a64141c9166c4588e6b965a20f6a9200 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Fri, 31 Jan 2025 15:54:08 +0100 Subject: [PATCH 03/27] Add nginx prometheus monitoring settings --- .../roles/clickhouse_proxy/templates/prometheus-proxy.conf | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf diff --git a/ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf b/ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf new file mode 100644 index 00000000..334ad172 --- /dev/null +++ b/ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf @@ -0,0 +1,6 @@ +server { + listen 80; + location ~ /([a-zA-Z0-9_\.]+)/(.*) { + proxy_pass http://$1:80/$2$is_args$args; + } +} From 78a7c0853718d065882590118f29b2fcfe2289ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Fri, 31 Jan 2025 16:09:40 +0100 Subject: [PATCH 04/27] Add access to ec2 instances from clickhouse proxy --- tf/environments/dev/main.tf | 2 ++ tf/modules/ec2/outputs.tf | 4 ++++ tf/modules/ecs_cluster/main.tf | 8 ++++++++ tf/modules/ecs_cluster/variables.tf | 4 ++++ 4 files changed, 18 insertions(+) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 14e02f71..f5e3d9f5 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -301,6 +301,8 @@ module "ooniapi_cluster" { instance_type = "t3a.micro" + monitoring_sg_ids = [module.ooni_clickhouse_proxy.ec2_sg_id] + tags = merge( local.tags, { Name = "ooni-tier0-api-ecs-cluster" } diff --git a/tf/modules/ec2/outputs.tf b/tf/modules/ec2/outputs.tf index 4a99b3c8..55488632 100644 --- a/tf/modules/ec2/outputs.tf +++ b/tf/modules/ec2/outputs.tf @@ -5,3 +5,7 @@ output "aws_instance_id" { output "aws_instance_public_dns" { value = aws_instance.ooni_ec2.public_dns } + +output "ec2_sg_id" { + value = aws_security_group.ec2_sg.id +} \ No newline at end of file diff --git a/tf/modules/ecs_cluster/main.tf b/tf/modules/ecs_cluster/main.tf index 365f5607..9ac89e3b 100644 --- a/tf/modules/ecs_cluster/main.tf +++ b/tf/modules/ecs_cluster/main.tf @@ -120,6 +120,14 @@ resource "aws_security_group" "container_host" { ] } + ingress { + protocol = "tcp" + from_port = 80 + to_port = 80 + + security_groups = var.monitoring_sg_ids + } + egress { from_port = 0 to_port = 0 diff --git a/tf/modules/ecs_cluster/variables.tf b/tf/modules/ecs_cluster/variables.tf index 2ada3a7a..fdef010a 100644 --- a/tf/modules/ecs_cluster/variables.tf +++ b/tf/modules/ecs_cluster/variables.tf @@ -61,3 +61,7 @@ variable "instance_type" { variable "instance_volume_size" { default = "5" } + +variable "monitoring_sg_ids" { + default = [] +} \ No newline at end of file From c9c46eeb6ce76faae7f0fe878416a0dd40cf893d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Mon, 3 Feb 2025 14:02:09 +0100 Subject: [PATCH 05/27] Add ecs service for service discovery --- tf/environments/dev/main.tf | 6 +- tf/modules/ooni_monitoring/main.tf | 104 ++++++++++++++++++++++++ tf/modules/ooni_monitoring/variables.tf | 24 ++++++ 3 files changed, 133 insertions(+), 1 deletion(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index f5e3d9f5..e0d2a0fe 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -795,5 +795,9 @@ resource "aws_acm_certificate_validation" "ooniapi_frontend" { module "ooni_monitoring" { source = "../../modules/ooni_monitoring" + environment = local.environment + aws_region = var.aws_region + cluster_id = module.ooniapi_cluster.cluster_id + tags = local.tags -} \ No newline at end of file +} diff --git a/tf/modules/ooni_monitoring/main.tf b/tf/modules/ooni_monitoring/main.tf index 22021f0e..c7d0e96d 100644 --- a/tf/modules/ooni_monitoring/main.tf +++ b/tf/modules/ooni_monitoring/main.tf @@ -1,3 +1,11 @@ +locals { + name = "ecs-service-discovery-${var.environment}" + + tags = { + Name = local.name + Environment = var.environment + } +} resource "aws_iam_user" "ooni_monitoring" { name = "oonidevops-monitoring" } @@ -34,4 +42,100 @@ resource "aws_ssm_parameter" "ooni_monitoring_secret_key" { name = "/oonidevops/secrets/ooni_monitoring/secret_key" type = "SecureString" value = aws_iam_access_key.ooni_monitoring.secret +} + +resource "aws_ecs_task_definition" "ooni_service_discovery" { + family = "ecs-sd-td" + network_mode = "bridge" + + container_definitions = jsonencode([ + { + memoryReservation = var.task_memory, + essential = true, + image = "apptality/aws-ecs-cloudmap-prometheus-discovery:latest", + name = local.name, + + portMappings = [ + { + containerPort = 9001 + protocol = "tcp" + } + ], + + environment = [ + { + name = "AWS_REGION" + value = var.aws_region + } + ] + secrets = [ + for k, v in var.task_secrets : { + name = k, + valueFrom = v + } + ], + logConfiguration = { + logDriver = "awslogs", + options = { + awslogs-group = aws_cloudwatch_log_group.ooni_ecs_sd.name, + awslogs-region = var.aws_region + } + } + } + ]) + + # TODO I think this should have its own role + task_role_arn = aws_iam_role.ecs_sd_task.arn + execution_role_arn = aws_iam_role.ecs_sd_task.arn + tags = var.tags + track_latest = true +} + +resource "aws_ecs_service" "service" { + name = local.name + cluster = var.cluster_id + launch_type = "EC2" + task_definition = aws_ecs_task_definition.ooni_service_discovery.id + desired_count = 1 + + # Required to SSH into the container + enable_execute_command = true + + # Below are required to enforce a new deployment to be ready before the old one is stopped + deployment_minimum_healthy_percent = 0 + deployment_maximum_percent = 100 + + # lifecycle { + # ignore_changes = [ + # desired_count + # ] + # } + + tags = var.tags +} + +resource "aws_iam_role" "ecs_sd_task" { + name = "${local.name}-task-role" + + tags = var.tags + + assume_role_policy = < Date: Tue, 4 Feb 2025 11:26:10 +0100 Subject: [PATCH 06/27] Added permissions to service discovery task --- tf/modules/ooni_monitoring/main.tf | 14 ++++- .../templates/profile_policy.json | 57 +++++++++++++++++++ tf/modules/ooni_monitoring/variables.tf | 4 ++ 3 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 tf/modules/ooni_monitoring/templates/profile_policy.json diff --git a/tf/modules/ooni_monitoring/main.tf b/tf/modules/ooni_monitoring/main.tf index c7d0e96d..803095e7 100644 --- a/tf/modules/ooni_monitoring/main.tf +++ b/tf/modules/ooni_monitoring/main.tf @@ -66,6 +66,10 @@ resource "aws_ecs_task_definition" "ooni_service_discovery" { { name = "AWS_REGION" value = var.aws_region + }, + { + name = "DiscoveryOptions__EcsClusters" + value = var.cluster_name } ] secrets = [ @@ -94,7 +98,6 @@ resource "aws_ecs_task_definition" "ooni_service_discovery" { resource "aws_ecs_service" "service" { name = local.name cluster = var.cluster_id - launch_type = "EC2" task_definition = aws_ecs_task_definition.ooni_service_discovery.id desired_count = 1 @@ -115,7 +118,7 @@ resource "aws_ecs_service" "service" { } resource "aws_iam_role" "ecs_sd_task" { - name = "${local.name}-task-role" + name = "${local.name}-task-role-execution" tags = var.tags @@ -136,6 +139,13 @@ resource "aws_iam_role" "ecs_sd_task" { EOF } +resource "aws_iam_role_policy" "ooni_ecs_sd_task" { + name = "${local.name}-task-role-execution" + role = aws_iam_role.ecs_sd_task.name + + policy = templatefile("${path.module}/templates/profile_policy.json", {}) +} + resource "aws_cloudwatch_log_group" "ooni_ecs_sd" { name = "ooni-ecs-group/${local.name}" } \ No newline at end of file diff --git a/tf/modules/ooni_monitoring/templates/profile_policy.json b/tf/modules/ooni_monitoring/templates/profile_policy.json new file mode 100644 index 00000000..c5a893fe --- /dev/null +++ b/tf/modules/ooni_monitoring/templates/profile_policy.json @@ -0,0 +1,57 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "ecsInstanceRole", + "Effect": "Allow", + "Action": [ + "ecs:DeregisterContainerInstance", + "ecs:DiscoverPollEndpoint", + "ecs:Poll", + "ecs:RegisterContainerInstance", + "ecs:Submit*", + "ecs:StartTelemetrySession" + ], + "Resource": ["*"] + }, + { + "Sid": "CloudWatchLogsFullAccess", + "Effect": "Allow", + "Action": ["logs:*", "cloudwatch:GenerateQuery"], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ssm:GetParameter", + "ssm:GetParameters", + "ssm:GetParameterHistory", + "ssm:GetParametersByPath" + ], + "Resource": "arn:aws:ssm:*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:Describe*", + "elasticloadbalancing:DeregisterInstancesFromLoadBalancer", + "elasticloadbalancing:DeregisterTargets", + "elasticloadbalancing:Describe*", + "elasticloadbalancing:RegisterInstancesWithLoadBalancer", + "elasticloadbalancing:RegisterTargets" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ecs:DescribeClusters", + "ecs:ListServices", + "ecs:DescribeServices", + "ecs:ListTasks", + "ecs:DescribeTasks" + ], + "Resource": "*" + } + ] +} diff --git a/tf/modules/ooni_monitoring/variables.tf b/tf/modules/ooni_monitoring/variables.tf index 835699b6..9a3bc8d8 100644 --- a/tf/modules/ooni_monitoring/variables.tf +++ b/tf/modules/ooni_monitoring/variables.tf @@ -26,4 +26,8 @@ variable "task_secrets" { variable "cluster_id" { type = string +} + +variable "cluster_name" { + type = string } \ No newline at end of file From 504cfbba5c48c5be02560338db965305d0d569be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Tue, 4 Feb 2025 11:26:50 +0100 Subject: [PATCH 07/27] set up ecs sd task --- tf/environments/dev/main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index e0d2a0fe..347fddec 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -800,4 +800,5 @@ module "ooni_monitoring" { cluster_id = module.ooniapi_cluster.cluster_id tags = local.tags + cluster_name = module.ooniapi_cluster.cluster_name } From 61612c19aa2cf05593906b88fe5e38d7fe1b07a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Wed, 5 Feb 2025 10:01:03 +0100 Subject: [PATCH 08/27] Working on service to monitor ecs nodes and tasks --- tf/environments/dev/main.tf | 2 +- tf/modules/ooni_monitoring/main.tf | 2 +- tf/modules/ooni_monitoring/variables.tf | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 347fddec..2107ebfd 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -800,5 +800,5 @@ module "ooni_monitoring" { cluster_id = module.ooniapi_cluster.cluster_id tags = local.tags - cluster_name = module.ooniapi_cluster.cluster_name + cluster_names = [module.ooniapi_cluster.cluster_name] } diff --git a/tf/modules/ooni_monitoring/main.tf b/tf/modules/ooni_monitoring/main.tf index 803095e7..71fcd3d3 100644 --- a/tf/modules/ooni_monitoring/main.tf +++ b/tf/modules/ooni_monitoring/main.tf @@ -69,7 +69,7 @@ resource "aws_ecs_task_definition" "ooni_service_discovery" { }, { name = "DiscoveryOptions__EcsClusters" - value = var.cluster_name + value = join(";", var.cluster_names) } ] secrets = [ diff --git a/tf/modules/ooni_monitoring/variables.tf b/tf/modules/ooni_monitoring/variables.tf index 9a3bc8d8..137c1df4 100644 --- a/tf/modules/ooni_monitoring/variables.tf +++ b/tf/modules/ooni_monitoring/variables.tf @@ -28,6 +28,6 @@ variable "cluster_id" { type = string } -variable "cluster_name" { - type = string +variable "cluster_names" { + type = list(string) } \ No newline at end of file From 8b7f9dd6461324270b013df0573e72cc9aee2f07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Wed, 5 Feb 2025 10:02:03 +0100 Subject: [PATCH 09/27] Add nginx proxy to clickhouse server --- ansible/roles/clickhouse_proxy/tasks/main.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ansible/roles/clickhouse_proxy/tasks/main.yml b/ansible/roles/clickhouse_proxy/tasks/main.yml index 194c64e8..b7d792de 100644 --- a/ansible/roles/clickhouse_proxy/tasks/main.yml +++ b/ansible/roles/clickhouse_proxy/tasks/main.yml @@ -28,3 +28,13 @@ notify: - reload nginx - restart nginx +- name: Add prometheus proxy nginx config + tags: webserv + template: + src: templates/prometheus-proxy.conf + dest: /etc/nginx/sites-enabled/prometheus-proxy.conf + mode: 0755 + owner: root + notify: + - reload nginx + - restart nginx From 11c39e25dd1aa187537a4bcf9217ccacf3cda8d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 6 Feb 2025 12:53:33 +0100 Subject: [PATCH 10/27] Remove monitoring service that didn't worked well --- tf/modules/ooni_monitoring/main.tf | 106 ------------------ .../templates/profile_policy.json | 57 ---------- tf/modules/ooni_monitoring/variables.tf | 13 --- 3 files changed, 176 deletions(-) delete mode 100644 tf/modules/ooni_monitoring/templates/profile_policy.json diff --git a/tf/modules/ooni_monitoring/main.tf b/tf/modules/ooni_monitoring/main.tf index 71fcd3d3..a3615f73 100644 --- a/tf/modules/ooni_monitoring/main.tf +++ b/tf/modules/ooni_monitoring/main.tf @@ -43,109 +43,3 @@ resource "aws_ssm_parameter" "ooni_monitoring_secret_key" { type = "SecureString" value = aws_iam_access_key.ooni_monitoring.secret } - -resource "aws_ecs_task_definition" "ooni_service_discovery" { - family = "ecs-sd-td" - network_mode = "bridge" - - container_definitions = jsonencode([ - { - memoryReservation = var.task_memory, - essential = true, - image = "apptality/aws-ecs-cloudmap-prometheus-discovery:latest", - name = local.name, - - portMappings = [ - { - containerPort = 9001 - protocol = "tcp" - } - ], - - environment = [ - { - name = "AWS_REGION" - value = var.aws_region - }, - { - name = "DiscoveryOptions__EcsClusters" - value = join(";", var.cluster_names) - } - ] - secrets = [ - for k, v in var.task_secrets : { - name = k, - valueFrom = v - } - ], - logConfiguration = { - logDriver = "awslogs", - options = { - awslogs-group = aws_cloudwatch_log_group.ooni_ecs_sd.name, - awslogs-region = var.aws_region - } - } - } - ]) - - # TODO I think this should have its own role - task_role_arn = aws_iam_role.ecs_sd_task.arn - execution_role_arn = aws_iam_role.ecs_sd_task.arn - tags = var.tags - track_latest = true -} - -resource "aws_ecs_service" "service" { - name = local.name - cluster = var.cluster_id - task_definition = aws_ecs_task_definition.ooni_service_discovery.id - desired_count = 1 - - # Required to SSH into the container - enable_execute_command = true - - # Below are required to enforce a new deployment to be ready before the old one is stopped - deployment_minimum_healthy_percent = 0 - deployment_maximum_percent = 100 - - # lifecycle { - # ignore_changes = [ - # desired_count - # ] - # } - - tags = var.tags -} - -resource "aws_iam_role" "ecs_sd_task" { - name = "${local.name}-task-role-execution" - - tags = var.tags - - assume_role_policy = < Date: Thu, 6 Feb 2025 12:53:50 +0100 Subject: [PATCH 11/27] Remove monitoring service that didn't worked well --- tf/environments/dev/main.tf | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 2107ebfd..85218bb5 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -301,8 +301,6 @@ module "ooniapi_cluster" { instance_type = "t3a.micro" - monitoring_sg_ids = [module.ooni_clickhouse_proxy.ec2_sg_id] - tags = merge( local.tags, { Name = "ooni-tier0-api-ecs-cluster" } @@ -797,8 +795,6 @@ module "ooni_monitoring" { source = "../../modules/ooni_monitoring" environment = local.environment aws_region = var.aws_region - cluster_id = module.ooniapi_cluster.cluster_id tags = local.tags - cluster_names = [module.ooniapi_cluster.cluster_name] } From aad3d6c9945910d5c6fa9e03268c4c7e4190239d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 6 Feb 2025 13:11:31 +0100 Subject: [PATCH 12/27] Change ports for monitoring hosts in nginx proxy rule --- .../roles/clickhouse_proxy/templates/prometheus-proxy.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf b/ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf index 334ad172..059599dc 100644 --- a/ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf +++ b/ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf @@ -1,6 +1,6 @@ server { - listen 80; + listen 9200; location ~ /([a-zA-Z0-9_\.]+)/(.*) { - proxy_pass http://$1:80/$2$is_args$args; + proxy_pass http://$1:9100/$2$is_args$args; } } From f6e920d068c78466ffc7f3291430b87194c36d8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Fri, 7 Feb 2025 14:13:27 +0100 Subject: [PATCH 13/27] Add clickhouse proxy parameter for the prometheus configuration file --- ansible/roles/prometheus/templates/prometheus.yml | 6 ++++-- ansible/roles/prometheus/vars/main.yml | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ansible/roles/prometheus/templates/prometheus.yml b/ansible/roles/prometheus/templates/prometheus.yml index 8d475362..12d04853 100755 --- a/ansible/roles/prometheus/templates/prometheus.yml +++ b/ansible/roles/prometheus/templates/prometheus.yml @@ -220,15 +220,17 @@ scrape_configs: scrape_interval: 5s scheme: https metrics_path: "/metrics" + + # Node level metrics for cluster nodes ec2_sd_configs: - access_key: "{{prometheus_aws_access_key}}" secret_key: "{{prometheus_aws_secret_key}}" region: "eu-central-1" - port: 80 # should be the proxy + port: 9100 # should be the proxy relabel_configs: # Change the host to the proxy host with relabeling - source_labels: [__address__] regex: "([0-9\\.]+):([0-9]+)" # : - replacement: "proxy:${2}/${1}/metrics" + replacement: "{{clickhouse_proxy_host_dev}}:9200/${1}/metrics" target_label: "proxy_host" action: "replace" ... diff --git a/ansible/roles/prometheus/vars/main.yml b/ansible/roles/prometheus/vars/main.yml index 81c321ee..abf2120f 100644 --- a/ansible/roles/prometheus/vars/main.yml +++ b/ansible/roles/prometheus/vars/main.yml @@ -152,4 +152,7 @@ blackbox_jobs: targets: "{{ dom0_hosts | list }}" prometheus_aws_access_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/access_key', profile='oonidevops_user_dev') }}" -prometheus_aws_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/secret_key', profile='oonidevops_user_dev') }}" \ No newline at end of file +prometheus_aws_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/secret_key', profile='oonidevops_user_dev') }}" +# TODO lookup this value somewhere else +clickhouse_proxy_host_dev: "clickhouseproxy.dev.ooni.io" +clickhouse_proxy_host_prod: "clickhouseproxy.dev.ooni.io" # TODO Change for prod From 7013ea09cb7f77d895dd465868087c3aff76789b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Fri, 7 Feb 2025 14:14:47 +0100 Subject: [PATCH 14/27] Add access from the clickhouse server to ECS nodes; trying to add access from monitoring server to clickhouse proxy server --- tf/environments/dev/main.tf | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 85218bb5..10df6fb3 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -301,6 +301,13 @@ module "ooniapi_cluster" { instance_type = "t3a.micro" + monitoring_sg_ids = [ + # The clickhouse proxy has an nginx configuration + # to proxy requests from the monitoring server + # to the cluster instances + module.ooni_clickhouse_proxy.ec2_sg_id + ] + tags = merge( local.tags, { Name = "ooni-tier0-api-ecs-cluster" } @@ -440,6 +447,11 @@ module "ooni_clickhouse_proxy" { to_port = 9000, protocol = "tcp", cidr_blocks = module.network.vpc_subnet_private[*].cidr_block, + }, { + from_port = 9200, + to_port = 9200, + protocol = "tcp" + cidr_blocks = ["5.9.112.244/32"] # TODO set this as parameter }] egress_rules = [{ From d880b4da0502e3d1078c476ffebbac2a08e00f00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Fri, 7 Feb 2025 14:15:47 +0100 Subject: [PATCH 15/27] Add monitoring SG to ecs cluster ingress rules --- tf/modules/ecs_cluster/main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tf/modules/ecs_cluster/main.tf b/tf/modules/ecs_cluster/main.tf index 9ac89e3b..9c8a584f 100644 --- a/tf/modules/ecs_cluster/main.tf +++ b/tf/modules/ecs_cluster/main.tf @@ -122,8 +122,8 @@ resource "aws_security_group" "container_host" { ingress { protocol = "tcp" - from_port = 80 - to_port = 80 + from_port = 9100 + to_port = 9100 security_groups = var.monitoring_sg_ids } From b00b17944ee29ff7875678d664a52d6264d6fb82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Fri, 7 Feb 2025 15:28:38 +0100 Subject: [PATCH 16/27] Add relabeling settings to direct traffic through the proxy into the cluster machines --- .../roles/prometheus/templates/prometheus.yml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ansible/roles/prometheus/templates/prometheus.yml b/ansible/roles/prometheus/templates/prometheus.yml index 12d04853..212eaa37 100755 --- a/ansible/roles/prometheus/templates/prometheus.yml +++ b/ansible/roles/prometheus/templates/prometheus.yml @@ -218,7 +218,7 @@ scrape_configs: # EC2 instances monitoring: - job_name: 'ooni-api-ec2' scrape_interval: 5s - scheme: https + scheme: http # TODO change to https metrics_path: "/metrics" # Node level metrics for cluster nodes @@ -228,9 +228,24 @@ scrape_configs: region: "eu-central-1" port: 9100 # should be the proxy relabel_configs: # Change the host to the proxy host with relabeling + - source_labels: [__address__] + regex: "([0-9\\.]+):([0-9]+)" # :" + replacement: "$1" + target_label: "ec2_host" + action: "replace" - source_labels: [__address__] regex: "([0-9\\.]+):([0-9]+)" # : replacement: "{{clickhouse_proxy_host_dev}}:9200/${1}/metrics" target_label: "proxy_host" action: "replace" + - source_labels: [proxy_host] + regex: "([^/]*)/(.*)" + replacement: "$1" + target_label: "__address__" + action: "replace" + - source_labels: [proxy_host] + regex: "([^/]*)/(.*)" + replacement: "/$2" + target_label: "__metrics_path__" + action: "replace" ... From 6c93fa2e82e1845ea1760f12a2e0cc2c068ee7c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Fri, 7 Feb 2025 15:29:28 +0100 Subject: [PATCH 17/27] Add rule to allow traffic from monitoring server to clickhouse proxy server --- ansible/roles/clickhouse_proxy/tasks/main.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ansible/roles/clickhouse_proxy/tasks/main.yml b/ansible/roles/clickhouse_proxy/tasks/main.yml index b7d792de..a7c01158 100644 --- a/ansible/roles/clickhouse_proxy/tasks/main.yml +++ b/ansible/roles/clickhouse_proxy/tasks/main.yml @@ -9,6 +9,17 @@ notify: - reload nftables +# For prometheus scrape requests +- name: Allow traffic on port 9200 + tags: prometheus-proxy + blockinfile: + path: /etc/ooni/nftables/tcp/9200.nft + create: yes + block: | + add rule inet filter input tcp dport 9200 counter accept comment "prometheus" + notify: + - reload nftables + - name: Create the modules-enabled directory if not exists tags: webserv ansible.builtin.file: @@ -32,7 +43,7 @@ tags: webserv template: src: templates/prometheus-proxy.conf - dest: /etc/nginx/sites-enabled/prometheus-proxy.conf + dest: /etc/nginx/conf.d/prometheus-proxy.conf mode: 0755 owner: root notify: From 103a5bae741d1db5106df524bb256d17f9e150c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Fri, 7 Feb 2025 15:45:45 +0100 Subject: [PATCH 18/27] Add linejump to respect style --- ansible/roles/clickhouse_proxy/tasks/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/clickhouse_proxy/tasks/main.yml b/ansible/roles/clickhouse_proxy/tasks/main.yml index a7c01158..5b5fd094 100644 --- a/ansible/roles/clickhouse_proxy/tasks/main.yml +++ b/ansible/roles/clickhouse_proxy/tasks/main.yml @@ -39,6 +39,7 @@ notify: - reload nginx - restart nginx + - name: Add prometheus proxy nginx config tags: webserv template: From 490d853d7ece2235a9d3f4762d22a50782788f6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Wed, 12 Feb 2025 08:46:37 +0100 Subject: [PATCH 19/27] Add ip resolution from hostname in ingress rules for clickhouse proxy --- tf/environments/dev/main.tf | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 10df6fb3..a877c213 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -418,6 +418,10 @@ module "ooniapi_reverseproxy" { ) } +data "dns_a_record_set" "monitoring_host" { + host = "monitoring.ooni.org" +} + module "ooni_clickhouse_proxy" { source = "../../modules/ec2" @@ -451,7 +455,7 @@ module "ooni_clickhouse_proxy" { from_port = 9200, to_port = 9200, protocol = "tcp" - cidr_blocks = ["5.9.112.244/32"] # TODO set this as parameter + cidr_blocks = [for ip in flatten(data.dns_a_record_set.monitoring_host.*.addrs): "${tostring(ip)}/32"] # TODO set this as parameter }] egress_rules = [{ From 99167c5151802ae31e38d90c777f8ad9da2da91d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Wed, 12 Feb 2025 08:48:39 +0100 Subject: [PATCH 20/27] formatting and removing TODO comment --- tf/environments/dev/main.tf | 54 ++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index a877c213..c8635453 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -419,7 +419,7 @@ module "ooniapi_reverseproxy" { } data "dns_a_record_set" "monitoring_host" { - host = "monitoring.ooni.org" + host = "monitoring.ooni.org" } module "ooni_clickhouse_proxy" { @@ -437,36 +437,36 @@ module "ooni_clickhouse_proxy" { name = "oonickprx" ingress_rules = [{ - from_port = 22, - to_port = 22, - protocol = "tcp", + from_port = 22, + to_port = 22, + protocol = "tcp", cidr_blocks = ["0.0.0.0/0"], - }, { - from_port = 80, - to_port = 80, - protocol = "tcp", + }, { + from_port = 80, + to_port = 80, + protocol = "tcp", cidr_blocks = ["0.0.0.0/0"], - }, { - from_port = 9000, - to_port = 9000, - protocol = "tcp", + }, { + from_port = 9000, + to_port = 9000, + protocol = "tcp", cidr_blocks = module.network.vpc_subnet_private[*].cidr_block, - }, { - from_port = 9200, - to_port = 9200, - protocol = "tcp" - cidr_blocks = [for ip in flatten(data.dns_a_record_set.monitoring_host.*.addrs): "${tostring(ip)}/32"] # TODO set this as parameter + }, { + from_port = 9200, + to_port = 9200, + protocol = "tcp" + cidr_blocks = [for ip in flatten(data.dns_a_record_set.monitoring_host.*.addrs) : "${tostring(ip)}/32"] }] egress_rules = [{ - from_port = 0, - to_port = 0, - protocol = "-1", + from_port = 0, + to_port = 0, + protocol = "-1", cidr_blocks = ["0.0.0.0/0"], - }, { - from_port = 0, - to_port = 0, - protocol = "-1", + }, { + from_port = 0, + to_port = 0, + protocol = "-1", ipv6_cidr_blocks = ["::/0"] }] @@ -808,9 +808,9 @@ resource "aws_acm_certificate_validation" "ooniapi_frontend" { ### Ooni monitoring module "ooni_monitoring" { - source = "../../modules/ooni_monitoring" + source = "../../modules/ooni_monitoring" environment = local.environment - aws_region = var.aws_region + aws_region = var.aws_region - tags = local.tags + tags = local.tags } From ab5cd1c4a58751be595db1bc39fb48ce952c563d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Wed, 12 Feb 2025 15:52:30 +0100 Subject: [PATCH 21/27] Fix https error by adding ssl configuration --- ansible/deploy-clickhouse-proxy.yml | 5 +++++ ansible/roles/clickhouse_proxy/defaults/main.yml | 1 + .../clickhouse_proxy/templates/prometheus-proxy.conf | 12 +++++++++++- ansible/roles/prometheus/templates/prometheus.yml | 2 +- 4 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 ansible/roles/clickhouse_proxy/defaults/main.yml diff --git a/ansible/deploy-clickhouse-proxy.yml b/ansible/deploy-clickhouse-proxy.yml index 233ace4f..ef150409 100644 --- a/ansible/deploy-clickhouse-proxy.yml +++ b/ansible/deploy-clickhouse-proxy.yml @@ -5,9 +5,14 @@ become: true roles: - role: bootstrap + - role: dehydrated + vars: + ssl_domains: + - clickhouseproxy.dev.ooni.io - role: nginx tags: nginx - role: clickhouse_proxy vars: clickhouse_url: "clickhouse3.prod.ooni.io" clickhouse_port: 9000 + clickhouse_proxy_public_fqdn: "clickhouseproxy.dev.ooni.io" diff --git a/ansible/roles/clickhouse_proxy/defaults/main.yml b/ansible/roles/clickhouse_proxy/defaults/main.yml new file mode 100644 index 00000000..14c3bf27 --- /dev/null +++ b/ansible/roles/clickhouse_proxy/defaults/main.yml @@ -0,0 +1 @@ +tls_cert_dir: /var/lib/dehydrated/certs \ No newline at end of file diff --git a/ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf b/ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf index 059599dc..2ea41815 100644 --- a/ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf +++ b/ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf @@ -1,5 +1,15 @@ server { - listen 9200; + listen 9200 ssl; + + server_name {{ clickhouse_proxy_public_fqdn }}; + + include /etc/nginx/ssl_intermediate.conf; + + ssl_certificate {{tls_cert_dir}}/{{inventory_hostname}}/fullchain.pem; + ssl_certificate_key {{tls_cert_dir}}/{{inventory_hostname}}/privkey.pem; + ssl_trusted_certificate {{tls_cert_dir}}/{{inventory_hostname}}/chain.pem; + + proxy_ssl_server_name on; location ~ /([a-zA-Z0-9_\.]+)/(.*) { proxy_pass http://$1:9100/$2$is_args$args; } diff --git a/ansible/roles/prometheus/templates/prometheus.yml b/ansible/roles/prometheus/templates/prometheus.yml index 212eaa37..4dc4be65 100755 --- a/ansible/roles/prometheus/templates/prometheus.yml +++ b/ansible/roles/prometheus/templates/prometheus.yml @@ -218,7 +218,7 @@ scrape_configs: # EC2 instances monitoring: - job_name: 'ooni-api-ec2' scrape_interval: 5s - scheme: http # TODO change to https + scheme: https # TODO change to https metrics_path: "/metrics" # Node level metrics for cluster nodes From efc97ef265bc3306a17ed45af71be32e0b6975d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 13 Feb 2025 10:12:54 +0100 Subject: [PATCH 22/27] Removed done TODO comment --- ansible/roles/prometheus/templates/prometheus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/prometheus/templates/prometheus.yml b/ansible/roles/prometheus/templates/prometheus.yml index 4dc4be65..cf9d0458 100755 --- a/ansible/roles/prometheus/templates/prometheus.yml +++ b/ansible/roles/prometheus/templates/prometheus.yml @@ -218,7 +218,7 @@ scrape_configs: # EC2 instances monitoring: - job_name: 'ooni-api-ec2' scrape_interval: 5s - scheme: https # TODO change to https + scheme: https metrics_path: "/metrics" # Node level metrics for cluster nodes From b1236a5d5351e301a5178e625243771eacfcc1c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 13 Feb 2025 10:14:16 +0100 Subject: [PATCH 23/27] Add comment in ingress rules for clickhouse proxy --- tf/environments/dev/main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index c8635453..f9ed1c58 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -452,6 +452,7 @@ module "ooni_clickhouse_proxy" { protocol = "tcp", cidr_blocks = module.network.vpc_subnet_private[*].cidr_block, }, { + // For the prometheus proxy: from_port = 9200, to_port = 9200, protocol = "tcp" From b49e2f8f39d869fb4d1e80c93a24b4ade33eabde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 13 Feb 2025 10:26:03 +0100 Subject: [PATCH 24/27] Remove done TODO comment --- ansible/roles/prometheus/vars/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/prometheus/vars/main.yml b/ansible/roles/prometheus/vars/main.yml index abf2120f..513c9b4d 100644 --- a/ansible/roles/prometheus/vars/main.yml +++ b/ansible/roles/prometheus/vars/main.yml @@ -153,6 +153,6 @@ blackbox_jobs: prometheus_aws_access_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/access_key', profile='oonidevops_user_dev') }}" prometheus_aws_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/secret_key', profile='oonidevops_user_dev') }}" -# TODO lookup this value somewhere else + clickhouse_proxy_host_dev: "clickhouseproxy.dev.ooni.io" clickhouse_proxy_host_prod: "clickhouseproxy.dev.ooni.io" # TODO Change for prod From 155af276b716cada0958fb784cdb5403e27917b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 13 Feb 2025 12:33:49 +0100 Subject: [PATCH 25/27] Added missing } --- tf/modules/ecs_cluster/variables.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/tf/modules/ecs_cluster/variables.tf b/tf/modules/ecs_cluster/variables.tf index 9a664a18..5604b418 100644 --- a/tf/modules/ecs_cluster/variables.tf +++ b/tf/modules/ecs_cluster/variables.tf @@ -64,6 +64,7 @@ variable "instance_volume_size" { variable "monitoring_sg_ids" { default = [] +} variable "node_exporter_port" { default = "9100" From 2c1fc4f660c35c9ad54eaf48a8efaa031cd311cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 13 Feb 2025 15:23:39 +0100 Subject: [PATCH 26/27] Change job name --- ansible/roles/prometheus/templates/prometheus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/prometheus/templates/prometheus.yml b/ansible/roles/prometheus/templates/prometheus.yml index cf9d0458..0eb74970 100755 --- a/ansible/roles/prometheus/templates/prometheus.yml +++ b/ansible/roles/prometheus/templates/prometheus.yml @@ -216,7 +216,7 @@ scrape_configs: - backend-hel.ooni.org:444 # EC2 instances monitoring: - - job_name: 'ooni-api-ec2' + - job_name: 'ooni-aws-ec2' scrape_interval: 5s scheme: https metrics_path: "/metrics" From 3d3813f3bf2487b8ac1ab17cb0f765e966060cfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 13 Feb 2025 16:22:12 +0100 Subject: [PATCH 27/27] Add dev-prod versions of the prometheus access keys variables --- ansible/roles/prometheus/templates/prometheus.yml | 4 ++-- ansible/roles/prometheus/vars/main.yml | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ansible/roles/prometheus/templates/prometheus.yml b/ansible/roles/prometheus/templates/prometheus.yml index 0eb74970..3e8fa796 100755 --- a/ansible/roles/prometheus/templates/prometheus.yml +++ b/ansible/roles/prometheus/templates/prometheus.yml @@ -223,8 +223,8 @@ scrape_configs: # Node level metrics for cluster nodes ec2_sd_configs: - - access_key: "{{prometheus_aws_access_key}}" - secret_key: "{{prometheus_aws_secret_key}}" + - access_key: "{{prometheus_aws_access_key_dev}}" + secret_key: "{{prometheus_aws_secret_key_dev}}" region: "eu-central-1" port: 9100 # should be the proxy relabel_configs: # Change the host to the proxy host with relabeling diff --git a/ansible/roles/prometheus/vars/main.yml b/ansible/roles/prometheus/vars/main.yml index 513c9b4d..3e3fd359 100644 --- a/ansible/roles/prometheus/vars/main.yml +++ b/ansible/roles/prometheus/vars/main.yml @@ -151,8 +151,11 @@ blackbox_jobs: module: icmp targets: "{{ dom0_hosts | list }}" -prometheus_aws_access_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/access_key', profile='oonidevops_user_dev') }}" -prometheus_aws_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/secret_key', profile='oonidevops_user_dev') }}" +prometheus_aws_access_key_dev: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/access_key', profile='oonidevops_user_dev') }}" +prometheus_aws_secret_key_dev: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/secret_key', profile='oonidevops_user_dev') }}" + +prometheus_aws_access_key_prod: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/access_key', profile='oonidevops_user_prod') }}" +prometheus_aws_secret_key_prod: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/secret_key', profile='oonidevops_user_prod') }}" clickhouse_proxy_host_dev: "clickhouseproxy.dev.ooni.io" clickhouse_proxy_host_prod: "clickhouseproxy.dev.ooni.io" # TODO Change for prod