Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prometheus ec2 monitoring #182

Merged
merged 31 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
10e5bbb
Add prometheus keys to access aws instances
LDiazN Jan 31, 2025
3fa1281
Add initial version of proxy relabeling configs
LDiazN Jan 31, 2025
15c86e9
Add nginx prometheus monitoring settings
LDiazN Jan 31, 2025
78a7c08
Add access to ec2 instances from clickhouse proxy
LDiazN Jan 31, 2025
9f30039
Merge branch 'main' into prometheus-ec2-monitoring
LDiazN Feb 3, 2025
c9c46ee
Add ecs service for service discovery
LDiazN Feb 3, 2025
3cfc063
Merge branch 'main' into prometheus-ec2-monitoring
LDiazN Feb 4, 2025
c4c8e9c
Added permissions to service discovery task
LDiazN Feb 4, 2025
504cfbb
set up ecs sd task
LDiazN Feb 4, 2025
61612c1
Working on service to monitor ecs nodes and tasks
LDiazN Feb 5, 2025
8b7f9dd
Add nginx proxy to clickhouse server
LDiazN Feb 5, 2025
11c39e2
Remove monitoring service that didn't worked well
LDiazN Feb 6, 2025
4293caf
Remove monitoring service that didn't worked well
LDiazN Feb 6, 2025
aad3d6c
Change ports for monitoring hosts in nginx proxy rule
LDiazN Feb 6, 2025
f6e920d
Add clickhouse proxy parameter for the prometheus configuration file
LDiazN Feb 7, 2025
7013ea0
Add access from the clickhouse server to ECS nodes; trying to add acc…
LDiazN Feb 7, 2025
d880b4d
Add monitoring SG to ecs cluster ingress rules
LDiazN Feb 7, 2025
b00b179
Add relabeling settings to direct traffic through the proxy into the …
LDiazN Feb 7, 2025
6c93fa2
Add rule to allow traffic from monitoring server to clickhouse proxy …
LDiazN Feb 7, 2025
103a5ba
Add linejump to respect style
LDiazN Feb 7, 2025
b9e55b4
Merge branch 'main' into prometheus-ec2-monitoring
LDiazN Feb 11, 2025
490d853
Add ip resolution from hostname in ingress rules for clickhouse proxy
LDiazN Feb 12, 2025
99167c5
formatting and removing TODO comment
LDiazN Feb 12, 2025
ab5cd1c
Fix https error by adding ssl configuration
LDiazN Feb 12, 2025
efc97ef
Removed done TODO comment
LDiazN Feb 13, 2025
b1236a5
Add comment in ingress rules for clickhouse proxy
LDiazN Feb 13, 2025
b49e2f8
Remove done TODO comment
LDiazN Feb 13, 2025
9a7d352
Merge branch 'main' into prometheus-ec2-monitoring
LDiazN Feb 13, 2025
155af27
Added missing }
LDiazN Feb 13, 2025
2c1fc4f
Change job name
LDiazN Feb 13, 2025
3d3813f
Add dev-prod versions of the prometheus access keys variables
LDiazN Feb 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions ansible/deploy-clickhouse-proxy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@
become: true
roles:
- role: bootstrap
- role: dehydrated
vars:
ssl_domains:
- clickhouseproxy.dev.ooni.io
- role: nginx
tags: nginx
- role: clickhouse_proxy
vars:
clickhouse_url: "clickhouse3.prod.ooni.io"
clickhouse_port: 9000
clickhouse_proxy_public_fqdn: "clickhouseproxy.dev.ooni.io"
- role: dehydrated
vars:
ssl_domains: "clickhouseproxy.dev.ooni.io"
Expand Down
1 change: 1 addition & 0 deletions ansible/roles/clickhouse_proxy/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tls_cert_dir: /var/lib/dehydrated/certs
22 changes: 22 additions & 0 deletions ansible/roles/clickhouse_proxy/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@
notify:
- reload nftables

# For prometheus scrape requests
- name: Allow traffic on port 9200
tags: prometheus-proxy
blockinfile:
path: /etc/ooni/nftables/tcp/9200.nft
create: yes
block: |
add rule inet filter input tcp dport 9200 counter accept comment "prometheus"
notify:
- reload nftables

- name: Create the modules-enabled directory if not exists
tags: webserv
ansible.builtin.file:
Expand All @@ -28,3 +39,14 @@
notify:
- reload nginx
- restart nginx

- name: Add prometheus proxy nginx config
tags: webserv
template:
src: templates/prometheus-proxy.conf
dest: /etc/nginx/conf.d/prometheus-proxy.conf
mode: 0755
owner: root
notify:
- reload nginx
- restart nginx
16 changes: 16 additions & 0 deletions ansible/roles/clickhouse_proxy/templates/prometheus-proxy.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
server {
listen 9200 ssl;

server_name {{ clickhouse_proxy_public_fqdn }};

include /etc/nginx/ssl_intermediate.conf;

ssl_certificate {{tls_cert_dir}}/{{inventory_hostname}}/fullchain.pem;
ssl_certificate_key {{tls_cert_dir}}/{{inventory_hostname}}/privkey.pem;
ssl_trusted_certificate {{tls_cert_dir}}/{{inventory_hostname}}/chain.pem;

proxy_ssl_server_name on;
location ~ /([a-zA-Z0-9_\.]+)/(.*) {
proxy_pass http://$1:9100/$2$is_args$args;
}
}
34 changes: 34 additions & 0 deletions ansible/roles/prometheus/templates/prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -214,4 +214,38 @@ scrape_configs:
static_configs:
- targets:
- backend-hel.ooni.org:444

# EC2 instances monitoring:
- job_name: 'ooni-aws-ec2'
scrape_interval: 5s
scheme: https
metrics_path: "/metrics"

# Node level metrics for cluster nodes
ec2_sd_configs:
- access_key: "{{prometheus_aws_access_key_dev}}"
secret_key: "{{prometheus_aws_secret_key_dev}}"
region: "eu-central-1"
port: 9100 # should be the proxy
relabel_configs: # Change the host to the proxy host with relabeling
- source_labels: [__address__]
regex: "([0-9\\.]+):([0-9]+)" # <ip>:<port>"
replacement: "$1"
target_label: "ec2_host"
action: "replace"
- source_labels: [__address__]
regex: "([0-9\\.]+):([0-9]+)" # <ip>:<port>
replacement: "{{clickhouse_proxy_host_dev}}:9200/${1}/metrics"
target_label: "proxy_host"
action: "replace"
- source_labels: [proxy_host]
regex: "([^/]*)/(.*)"
replacement: "$1"
target_label: "__address__"
action: "replace"
- source_labels: [proxy_host]
regex: "([^/]*)/(.*)"
replacement: "/$2"
target_label: "__metrics_path__"
action: "replace"
...
9 changes: 9 additions & 0 deletions ansible/roles/prometheus/vars/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,12 @@ blackbox_jobs:
- name: icmp
module: icmp
targets: "{{ dom0_hosts | list }}"

prometheus_aws_access_key_dev: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/access_key', profile='oonidevops_user_dev') }}"
prometheus_aws_secret_key_dev: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/secret_key', profile='oonidevops_user_dev') }}"

prometheus_aws_access_key_prod: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/access_key', profile='oonidevops_user_prod') }}"
prometheus_aws_secret_key_prod: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ooni_monitoring/secret_key', profile='oonidevops_user_prod') }}"

clickhouse_proxy_host_dev: "clickhouseproxy.dev.ooni.io"
clickhouse_proxy_host_prod: "clickhouseproxy.dev.ooni.io" # TODO Change for prod
62 changes: 41 additions & 21 deletions tf/environments/dev/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,13 @@ module "ooniapi_cluster" {

instance_type = "t3a.micro"

monitoring_sg_ids = [
# The clickhouse proxy has an nginx configuration
# to proxy requests from the monitoring server
# to the cluster instances
module.ooni_clickhouse_proxy.ec2_sg_id
]

tags = merge(
local.tags,
{ Name = "ooni-tier0-api-ecs-cluster" }
Expand Down Expand Up @@ -411,6 +418,10 @@ module "ooniapi_reverseproxy" {
)
}

data "dns_a_record_set" "monitoring_host" {
host = "monitoring.ooni.org"
}

module "ooni_clickhouse_proxy" {
source = "../../modules/ec2"

Expand All @@ -426,31 +437,37 @@ module "ooni_clickhouse_proxy" {

name = "oonickprx"
ingress_rules = [{
from_port = 22,
to_port = 22,
protocol = "tcp",
from_port = 22,
to_port = 22,
protocol = "tcp",
cidr_blocks = ["0.0.0.0/0"],
}, {
from_port = 80,
to_port = 80,
protocol = "tcp",
}, {
from_port = 80,
to_port = 80,
protocol = "tcp",
cidr_blocks = ["0.0.0.0/0"],
}, {
from_port = 9000,
to_port = 9000,
protocol = "tcp",
}, {
from_port = 9000,
to_port = 9000,
protocol = "tcp",
cidr_blocks = module.network.vpc_subnet_private[*].cidr_block,
}, {
// For the prometheus proxy:
from_port = 9200,
to_port = 9200,
protocol = "tcp"
cidr_blocks = [for ip in flatten(data.dns_a_record_set.monitoring_host.*.addrs) : "${tostring(ip)}/32"]
LDiazN marked this conversation as resolved.
Show resolved Hide resolved
}]

egress_rules = [{
from_port = 0,
to_port = 0,
protocol = "-1",
from_port = 0,
to_port = 0,
protocol = "-1",
cidr_blocks = ["0.0.0.0/0"],
}, {
from_port = 0,
to_port = 0,
protocol = "-1",
}, {
from_port = 0,
to_port = 0,
protocol = "-1",
ipv6_cidr_blocks = ["::/0"]
}]

Expand Down Expand Up @@ -792,6 +809,9 @@ resource "aws_acm_certificate_validation" "ooniapi_frontend" {
### Ooni monitoring

module "ooni_monitoring" {
source = "../../modules/ooni_monitoring"
tags = local.tags
}
source = "../../modules/ooni_monitoring"
environment = local.environment
aws_region = var.aws_region

tags = local.tags
}
4 changes: 4 additions & 0 deletions tf/modules/ec2/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@ output "aws_instance_id" {
output "aws_instance_public_dns" {
value = aws_instance.ooni_ec2.public_dns
}

output "ec2_sg_id" {
value = aws_security_group.ec2_sg.id
}
8 changes: 8 additions & 0 deletions tf/modules/ecs_cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,14 @@ resource "aws_security_group" "container_host" {
]
}

ingress {
protocol = "tcp"
from_port = 9100
to_port = 9100

security_groups = var.monitoring_sg_ids
}

egress {
from_port = 0
to_port = 0
Expand Down
4 changes: 4 additions & 0 deletions tf/modules/ecs_cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ variable "instance_volume_size" {
default = "5"
}

variable "monitoring_sg_ids" {
default = []
}

variable "node_exporter_port" {
default = "9100"
}
10 changes: 9 additions & 1 deletion tf/modules/ooni_monitoring/main.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
locals {
name = "ecs-service-discovery-${var.environment}"

tags = {
Name = local.name
Environment = var.environment
}
}
resource "aws_iam_user" "ooni_monitoring" {
name = "oonidevops-monitoring"
}
Expand Down Expand Up @@ -34,4 +42,4 @@ resource "aws_ssm_parameter" "ooni_monitoring_secret_key" {
name = "/oonidevops/secrets/ooni_monitoring/secret_key"
type = "SecureString"
value = aws_iam_access_key.ooni_monitoring.secret
}
}
17 changes: 16 additions & 1 deletion tf/modules/ooni_monitoring/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,19 @@ variable "tags" {
description = "tags to apply to the resources"
default = {}
type = map(string)
}
}

variable "environment" {
type = string
}

variable "task_memory" {
description = "How much memory to allocate for this task"
type = number
default = 64
}

variable "aws_region" {
description = "AWS region"
type = string
}
Loading