Skip to content

Commit

Permalink
Maintain a equal distribution of instances across AZs in the same reg…
Browse files Browse the repository at this point in the history
…ion. (#4462)
  • Loading branch information
jeanschmidt authored Aug 10, 2023
1 parent f3f32fc commit 9439562
Show file tree
Hide file tree
Showing 16 changed files with 650 additions and 352 deletions.
48 changes: 24 additions & 24 deletions terraform-aws-github-runner/elastic_cache.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,31 @@ resource "aws_security_group" "allow_es_from_local" {
vpc_id = var.vpc_ids[0].vpc

ingress {
description = "Allow connection on port 6379 (redis)"
from_port = 6379
to_port = 6379
protocol = "tcp"
cidr_blocks = [var.vpc_cidrs[0].cidr]
description = "Allow connection on port 6379 (redis)"
from_port = 6379
to_port = 6379
protocol = "tcp"
cidr_blocks = [var.vpc_cidrs[0].cidr]
}

egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}

tags = var.tags
}

resource "random_password" "es_password" {
length = 21
special = false
length = 21
special = false
}

resource "aws_elasticache_user" "scale_lambda" {
user_id = "${var.environment}-scaleLambda"
user_name = "${var.environment}-scaleLambda"
user_id = "${var.environment}-scalelambda"
user_name = "${var.environment}-scalelambda"
access_string = "on ~* +@all"
engine = "REDIS"
passwords = [random_password.es_password.result]
Expand All @@ -42,17 +42,17 @@ resource "aws_elasticache_subnet_group" "es" {
}

resource "aws_elasticache_replication_group" "es" {
automatic_failover_enabled = false
description = "scale runners and lambdas"
engine = "redis"
node_type = "cache.m4.large"
num_node_groups = 1
port = 6379
replicas_per_node_group = 1
replication_group_id = "${var.environment}-scale-runners-rep-group"
security_group_ids = [aws_security_group.allow_es_from_local.id]
subnet_group_name = aws_elasticache_subnet_group.es.name
tags = var.tags
automatic_failover_enabled = false
description = "scale runners and lambdas"
engine = "redis"
node_type = "cache.m4.large"
num_node_groups = 1
port = 6379
replicas_per_node_group = 1
replication_group_id = "${var.environment}-scale-runners-rep-group"
security_group_ids = [aws_security_group.allow_es_from_local.id]
subnet_group_name = aws_elasticache_subnet_group.es.name
tags = var.tags
}

resource "aws_elasticache_cluster" "es" {
Expand Down
127 changes: 64 additions & 63 deletions terraform-aws-github-runner/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -29,43 +29,43 @@ resource "random_string" "random" {
}

resource "aws_sqs_queue" "queued_builds_dead_letter" {
name = "${var.environment}-queued-builds-dead-letter"
redrive_allow_policy = jsonencode({
name = "${var.environment}-queued-builds-dead-letter"
redrive_allow_policy = jsonencode({
redrivePermission = "allowAll",
})
tags = var.tags
tags = var.tags
}

resource "aws_sqs_queue" "queued_builds" {
name = "${var.environment}-queued-builds"
visibility_timeout_seconds = var.runners_scale_up_sqs_visibility_timeout
max_message_size = 2048
message_retention_seconds = var.runners_scale_up_sqs_message_ret_s
redrive_policy = jsonencode({
name = "${var.environment}-queued-builds"
visibility_timeout_seconds = var.runners_scale_up_sqs_visibility_timeout
max_message_size = 2048
message_retention_seconds = var.runners_scale_up_sqs_message_ret_s
redrive_policy = jsonencode({
deadLetterTargetArn = aws_sqs_queue.queued_builds_dead_letter.arn
maxReceiveCount = var.runners_scale_up_sqs_max_retry
})
tags = var.tags
tags = var.tags
}

resource "aws_sqs_queue" "queued_builds_retry_dead_letter" {
name = "${var.environment}-queued-builds-retry-dead-letter"
redrive_allow_policy = jsonencode({
name = "${var.environment}-queued-builds-retry-dead-letter"
redrive_allow_policy = jsonencode({
redrivePermission = "allowAll",
})
tags = var.tags
tags = var.tags
}

resource "aws_sqs_queue" "queued_builds_retry" {
name = "${var.environment}-queued-builds-retry"
visibility_timeout_seconds = var.runners_scale_up_sqs_visibility_timeout
max_message_size = 2048
message_retention_seconds = var.runners_scale_up_sqs_message_ret_s
redrive_policy = jsonencode({
name = "${var.environment}-queued-builds-retry"
visibility_timeout_seconds = var.runners_scale_up_sqs_visibility_timeout
max_message_size = 2048
message_retention_seconds = var.runners_scale_up_sqs_message_ret_s
redrive_policy = jsonencode({
deadLetterTargetArn = aws_sqs_queue.queued_builds_retry_dead_letter.arn
maxReceiveCount = var.runners_scale_up_sqs_max_retry
})
tags = var.tags
tags = var.tags
}

module "webhook" {
Expand All @@ -81,9 +81,9 @@ module "webhook" {
sqs_build_queue = aws_sqs_queue.queued_builds
github_app_webhook_secret = var.github_app.webhook_secret

lambda_zip = var.webhook_lambda_zip
lambda_timeout = var.webhook_lambda_timeout
logging_retention_in_days = var.logging_retention_in_days
lambda_zip = var.webhook_lambda_zip
lambda_timeout = var.webhook_lambda_timeout
logging_retention_in_days = var.logging_retention_in_days

role_path = var.role_path
role_permissions_boundary = var.role_permissions_boundary
Expand All @@ -99,6 +99,7 @@ module "runners" {
vpc_ids = var.vpc_ids
vpc_sgs = var.vpc_sgs
subnet_vpc_ids = var.subnet_vpc_ids
subnet_azs = var.subnet_azs
environment = var.environment
tags = local.tags

Expand All @@ -113,36 +114,36 @@ module "runners" {
redis_endpoint = aws_elasticache_replication_group.es.primary_endpoint_address
redis_login = "${aws_elasticache_user.scale_lambda.user_name}:${random_password.es_password.result}"

sqs_build_queue = aws_sqs_queue.queued_builds
sqs_build_queue_retry = aws_sqs_queue.queued_builds_retry
github_app = var.github_app
enable_organization_runners = var.enable_organization_runners
scale_down_schedule_expression = var.scale_down_schedule_expression
minimum_running_time_in_minutes = var.minimum_running_time_in_minutes
runner_extra_labels = var.runner_extra_labels
idle_config = var.idle_config
secretsmanager_secrets_id = var.secretsmanager_secrets_id

lambda_zip = var.runners_lambda_zip
lambda_timeout_scale_up = var.runners_scale_up_lambda_timeout
lambda_timeout_scale_down = var.runners_scale_down_lambda_timeout
lambda_subnet_ids = var.lambda_subnet_ids

lambda_security_group_ids = var.lambda_security_group_ids
runners_security_group_ids = module.runners_instances.security_groups_ids_vpcs
github_app_key_base64 = module.runners_instances.github_app_key_base64
github_app_client_secret = module.runners_instances.github_app_client_secret
role_runner_arn = module.runners_instances.role_runner_arn

launch_template_name_linux = module.runners_instances.launch_template_name_linux
launch_template_name_linux_nvidia = module.runners_instances.launch_template_name_linux_nvidia
launch_template_name_windows = module.runners_instances.launch_template_name_windows
launch_template_version_linux = module.runners_instances.launch_template_version_linux
launch_template_version_windows = module.runners_instances.launch_template_version_windows
launch_template_version_linux_nvidia = module.runners_instances.launch_template_version_linux_nvidia

logging_retention_in_days = var.logging_retention_in_days
scale_up_lambda_concurrency = var.scale_up_lambda_concurrency
sqs_build_queue = aws_sqs_queue.queued_builds
sqs_build_queue_retry = aws_sqs_queue.queued_builds_retry
github_app = var.github_app
enable_organization_runners = var.enable_organization_runners
scale_down_schedule_expression = var.scale_down_schedule_expression
minimum_running_time_in_minutes = var.minimum_running_time_in_minutes
runner_extra_labels = var.runner_extra_labels
idle_config = var.idle_config
secretsmanager_secrets_id = var.secretsmanager_secrets_id

lambda_zip = var.runners_lambda_zip
lambda_timeout_scale_up = var.runners_scale_up_lambda_timeout
lambda_timeout_scale_down = var.runners_scale_down_lambda_timeout
lambda_subnet_ids = var.lambda_subnet_ids

lambda_security_group_ids = var.lambda_security_group_ids
runners_security_group_ids = module.runners_instances.security_groups_ids_vpcs
github_app_key_base64 = module.runners_instances.github_app_key_base64
github_app_client_secret = module.runners_instances.github_app_client_secret
role_runner_arn = module.runners_instances.role_runner_arn

launch_template_name_linux = module.runners_instances.launch_template_name_linux
launch_template_name_linux_nvidia = module.runners_instances.launch_template_name_linux_nvidia
launch_template_name_windows = module.runners_instances.launch_template_name_windows
launch_template_version_linux = module.runners_instances.launch_template_version_linux
launch_template_version_windows = module.runners_instances.launch_template_version_windows
launch_template_version_linux_nvidia = module.runners_instances.launch_template_version_linux_nvidia

logging_retention_in_days = var.logging_retention_in_days
scale_up_lambda_concurrency = var.scale_up_lambda_concurrency
scale_up_provisioned_concurrent_executions = var.scale_up_provisioned_concurrent_executions

role_path = var.role_path
Expand All @@ -156,10 +157,10 @@ module "runners" {
module "runners_instances" {
source = "./modules/runners-instances"

aws_region = var.aws_region
vpc_ids = var.vpc_ids
environment = var.environment
tags = local.tags
aws_region = var.aws_region
vpc_ids = var.vpc_ids
environment = var.environment
tags = local.tags

encryption = {
kms_key_id = local.kms_key_id
Expand All @@ -179,12 +180,12 @@ module "runners_instances" {
ami_filter_linux = var.ami_filter_linux
ami_filter_windows = var.ami_filter_windows

github_app = var.github_app
runner_as_root = var.runner_as_root
enable_ssm_on_runners = var.enable_ssm_on_runners
github_app = var.github_app
runner_as_root = var.runner_as_root
enable_ssm_on_runners = var.enable_ssm_on_runners

logging_retention_in_days = var.logging_retention_in_days
enable_cloudwatch_agent = var.enable_cloudwatch_agent
logging_retention_in_days = var.logging_retention_in_days
enable_cloudwatch_agent = var.enable_cloudwatch_agent

instance_profile_path = var.instance_profile_path
role_path = var.role_path
Expand All @@ -210,9 +211,9 @@ module "runner_binaries" {

runner_allow_prerelease_binaries = var.runner_allow_prerelease_binaries

lambda_zip = var.runner_binaries_syncer_lambda_zip
lambda_timeout = var.runner_binaries_syncer_lambda_timeout
logging_retention_in_days = var.logging_retention_in_days
lambda_zip = var.runner_binaries_syncer_lambda_zip
lambda_timeout = var.runner_binaries_syncer_lambda_timeout
logging_retention_in_days = var.logging_retention_in_days

role_path = var.role_path
role_permissions_boundary = var.role_permissions_boundary
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ locals {
}

resource "aws_lambda_function" "syncer" {
filename = local.lambda_zip
source_code_hash = filebase64sha256(local.lambda_zip)
function_name = "${var.environment}-syncer"
role = aws_iam_role.syncer_lambda.arn
handler = "index.handler"
runtime = "nodejs14.x"
timeout = var.lambda_timeout
memory_size = 500
filename = local.lambda_zip
source_code_hash = filebase64sha256(local.lambda_zip)
function_name = "${var.environment}-syncer"
role = aws_iam_role.syncer_lambda.arn
handler = "index.handler"
runtime = "nodejs14.x"
timeout = var.lambda_timeout
memory_size = 500

environment {
variables = {
Expand All @@ -21,7 +21,7 @@ resource "aws_lambda_function" "syncer" {
GITHUB_RUNNER_ALLOW_PRERELEASE_BINARIES = var.runner_allow_prerelease_binaries
}
}

tags = var.tags
}

Expand Down
18 changes: 9 additions & 9 deletions terraform-aws-github-runner/modules/runners-instances/logging.tf
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ resource "aws_ssm_parameter" "cloudwatch_agent_config_runner_linux" {
templatefile(
"${path.module}/templates/cloudwatch_config.json",
{
aws_region = var.aws_region
environment = var.environment
logfiles = jsonencode(local.logfiles_linux)
aws_region = var.aws_region
environment = var.environment
logfiles = jsonencode(local.logfiles_linux)
metrics_collected = templatefile("${path.module}/templates/cloudwatch_config_linux.json", {})
}
)
Expand All @@ -86,9 +86,9 @@ resource "aws_ssm_parameter" "cloudwatch_agent_config_runner_linux_nvidia" {
templatefile(
"${path.module}/templates/cloudwatch_config.json",
{
aws_region = var.aws_region
environment = var.environment
logfiles = jsonencode(local.logfiles_linux)
aws_region = var.aws_region
environment = var.environment
logfiles = jsonencode(local.logfiles_linux)
metrics_collected = templatefile("${path.module}/templates/cloudwatch_config_linux_nvidia.json", {})
}
)
Expand Down Expand Up @@ -135,10 +135,10 @@ resource "aws_ssm_parameter" "cloudwatch_agent_config_runner_windows" {
templatefile(
"${path.module}/templates/cloudwatch_config.json",
{
aws_region = var.aws_region
environment = var.environment
aws_region = var.aws_region
environment = var.environment
metrics_collected = templatefile("${path.module}/templates/cloudwatch_config_windows.json", {})
logfiles = jsonencode(local.logfiles_linux)
logfiles = jsonencode(local.logfiles_linux)
}
)
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
locals {
name_sg = var.overrides["name_sg"] == "" ? local.tags["Name"] : var.overrides["name_sg"]
name_sg = var.overrides["name_sg"] == "" ? local.tags["Name"] : var.overrides["name_sg"]
}

resource "aws_security_group" "runners_sg" {
Expand Down
Loading

0 comments on commit 9439562

Please sign in to comment.