Skip to content

Commit

Permalink
feat: add autoscaler_policy_overrides support to module (#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikenorgate authored Jun 11, 2024
1 parent 3a0e76d commit 2e6eeba
Show file tree
Hide file tree
Showing 4 changed files with 279 additions and 12 deletions.
148 changes: 139 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,53 @@ module "castai-eks-cluster" {
}
}
}
autoscaler_settings = {
enabled = true
node_templates_partial_matching_enabled = false
unschedulable_pods = {
enabled = true
headroom = {
enabled = true
cpu_percentage = 10
memory_percentage = 10
}
headroom_spot = {
enabled = true
cpu_percentage = 10
memory_percentage = 10
}
}
node_downscaler = {
enabled = true
empty_nodes = {
enabled = true
}
evictor = {
aggressive_mode = false
cycle_interval = "5s10s"
dry_run = false
enabled = true
node_grace_period_minutes = 10
scoped_mode = false
}
}
cluster_limits = {
enabled = true
cpu = {
max_cores = 20
min_cores = 1
}
}
}
}
```

Expand Down Expand Up @@ -311,6 +358,88 @@ module "castai-eks-cluster" {
}
```

Migrating from 9.x.x to 9.3.x
---------------------------

Version 9.3.x changed:
* Deprecated `autoscaler_policies_json` attribute. Use `autoscaler_settings` instead.

Old configuration:
```hcl
module "castai-eks-cluster" {
autoscaler_policies_json = <<-EOT
{
"enabled": true,
"unschedulablePods": {
"enabled": true
},
"nodeDownscaler": {
"enabled": true,
"emptyNodes": {
"enabled": true
},
"evictor": {
"aggressiveMode": false,
"cycleInterval": "5m10s",
"dryRun": false,
"enabled": true,
"nodeGracePeriodMinutes": 10,
"scopedMode": false
}
},
"nodeTemplatesPartialMatchingEnabled": false,
"clusterLimits": {
"cpu": {
"maxCores": 20,
"minCores": 1
},
"enabled": true
}
}
EOT
}
```

New configuration:
```hcl
module "castai-eks-cluster" {
autoscaler_settings = {
enabled = true
node_templates_partial_matching_enabled = false
unschedulable_pods = {
enabled = true
}
node_downscaler = {
enabled = true
empty_nodes = {
enabled = true
}
evictor = {
aggressive_mode = false
cycle_interval = "5m10s"
dry_run = false
enabled = true
node_grace_period_minutes = 10
scoped_mode = false
}
}
cluster_limits = {
enabled = true
cpu = {
max_cores = 20
min_cores = 1
}
}
}
}
```

# Examples

Usage examples are located in [terraform provider repo](https://github.com/castai/terraform-provider-castai/tree/master/examples/eks)
Expand All @@ -324,20 +453,20 @@ terraform-docs markdown table . --output-file README.md
<!-- BEGIN_TF_DOCS -->
## Requirements

| Name | Version |
|------|---------|
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 0.13 |
| <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 2.49 |
| <a name="requirement_castai"></a> [castai](#requirement\_castai) | ~> 7.3.0 |
| Name | Version |
|------|----------|
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 0.13 |
| <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 2.49 |
| <a name="requirement_castai"></a> [castai](#requirement\_castai) | ~> 7.4.0 |
| <a name="requirement_helm"></a> [helm](#requirement\_helm) | >= 2.0.0 |

## Providers

| Name | Version |
|------|---------|
| <a name="provider_castai"></a> [castai](#provider\_castai) | ~> 7.3.0 |
| <a name="provider_helm"></a> [helm](#provider\_helm) | >= 2.0.0 |
| <a name="provider_null"></a> [null](#provider\_null) | n/a |
| <a name="provider_castai"></a> [castai](#provider\_castai) | 7.4.0 |
| <a name="provider_helm"></a> [helm](#provider\_helm) | 2.13.2 |
| <a name="provider_null"></a> [null](#provider\_null) | 3.2.2 |

## Modules

Expand Down Expand Up @@ -380,7 +509,8 @@ No modules.
| <a name="input_agent_version"></a> [agent\_version](#input\_agent\_version) | Version of castai-agent helm chart. Default latest | `string` | `null` | no |
| <a name="input_api_grpc_addr"></a> [api\_grpc\_addr](#input\_api\_grpc\_addr) | CAST AI GRPC API address | `string` | `"api-grpc.cast.ai:443"` | no |
| <a name="input_api_url"></a> [api\_url](#input\_api\_url) | URL of alternative CAST AI API to be used during development or testing | `string` | `"https://api.cast.ai"` | no |
| <a name="input_autoscaler_policies_json"></a> [autoscaler\_policies\_json](#input\_autoscaler\_policies\_json) | Optional json object to override CAST AI cluster autoscaler policies | `string` | `null` | no |
| <a name="input_autoscaler_policies_json"></a> [autoscaler\_policies\_json](#input\_autoscaler\_policies\_json) | Optional json object to override CAST AI cluster autoscaler policies. Deprecated, use `autoscaler_settings` instead. | `string` | `null` | no |
| <a name="input_autoscaler_settings"></a> [autoscaler\_policy\_overrides](#input\_autoscaler\_policy\_overrides) | Optional Autoscaler policy definitions to override current autoscaler settings | `any` | `null` | no |
| <a name="input_aws_account_id"></a> [aws\_account\_id](#input\_aws\_account\_id) | ID of AWS account the cluster is located in. | `string` | n/a | yes |
| <a name="input_aws_assume_role_arn"></a> [aws\_assume\_role\_arn](#input\_aws\_assume\_role\_arn) | Arn of the role to be used by CAST AI for IAM access | `string` | `null` | no |
| <a name="input_aws_cluster_name"></a> [aws\_cluster\_name](#input\_aws\_cluster\_name) | Name of the cluster to be connected to CAST AI. | `string` | n/a | yes |
Expand Down
133 changes: 132 additions & 1 deletion main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -811,8 +811,139 @@ resource "helm_release" "castai_kvisor_self_managed" {
}

resource "castai_autoscaler" "castai_autoscaler_policies" {
cluster_id = castai_eks_cluster.my_castai_cluster.id

autoscaler_policies_json = var.autoscaler_policies_json
cluster_id = castai_eks_cluster.my_castai_cluster.id

dynamic "autoscaler_settings" {
for_each = var.autoscaler_settings != null ? [var.autoscaler_settings] : []

content {
enabled = try(autoscaler_settings.value.enabled, null)
is_scoped_mode = try(autoscaler_settings.value.is_scoped_mode, null)
node_templates_partial_matching_enabled = try(autoscaler_settings.value.node_templates_partial_matching_enabled, null)

dynamic "unschedulable_pods" {
for_each = try([autoscaler_settings.value.unschedulable_pods], [])

content {
enabled = try(unschedulable_pods.value.enabled, null)
custom_instances_enabled = try(unschedulable_pods.value.custom_instances_enabled, null)

dynamic "headroom" {
for_each = try([unschedulable_pods.value.headroom], [])

content {
enabled = try(headroom.value.enabled, null)
cpu_percentage = try(headroom.value.cpu_percentage, null)
memory_percentage = try(headroom.value.memory_percentage, null)
}
}

dynamic "headroom_spot" {
for_each = try([unschedulable_pods.value.headroom_spot], [])

content {
enabled = try(headroom_spot.value.enabled, null)
cpu_percentage = try(headroom_spot.value.cpu_percentage, null)
memory_percentage = try(headroom_spot.value.memory_percentage, null)
}
}

dynamic "node_constraints" {
for_each = try([unschedulable_pods.value.node_constraints], [])

content {
enabled = try(node_constraints.value.enabled, null)
min_cpu_cores = try(node_constraints.value.min_cpu_cores, null)
max_cpu_cores = try(node_constraints.value.max_cpu_cores, null)
min_ram_mib = try(node_constraints.value.min_ram_mib, null)
max_ram_mib = try(node_constraints.value.max_ram_mib, null)
}
}
}
}

dynamic "cluster_limits" {
for_each = try([autoscaler_settings.value.cluster_limits], [])

content {
enabled = try(cluster_limits.value.enabled, null)


dynamic "cpu" {
for_each = try([cluster_limits.value.cpu], [])

content {
min_cores = try(cpu.value.min_cores, null)
max_cores = try(cpu.value.max_cores, null)
}
}
}
}

dynamic "spot_instances" {
for_each = try([autoscaler_settings.value.spot_instances], [])

content {
enabled = try(spot_instances.value.enabled, null)
max_reclaim_rate = try(spot_instances.value.max_reclaim_rate, null)
spot_diversity_enabled = try(spot_instances.value.spot_diversity_enabled, null)
spot_diversity_price_increase_limit = try(spot_instances.value.spot_diversity_price_increase_limit, null)

dynamic "spot_backups" {
for_each = try([spot_instances.value.spot_backups], [])

content {
enabled = try(spot_backups.value.enabled, null)
spot_backup_restore_rate_seconds = try(spot_backups.value.spot_backup_restore_rate_seconds, null)
}
}

dynamic "spot_interruption_predictions" {
for_each = try([spot_instances.value.spot_interruption_predictions], [])

content {
enabled = try(spot_interruption_predictions.value.enabled, null)
spot_interruption_predictions_type = try(spot_interruption_predictions.value.spot_interruption_predictions_type, null)
}
}
}
}

dynamic "node_downscaler" {
for_each = try([autoscaler_settings.value.node_downscaler], [])

content {
enabled = try(node_downscaler.value.enabled, null)

dynamic "empty_nodes" {
for_each = try([node_downscaler.value.empty_nodes], [])

content {
enabled = try(empty_nodes.value.enabled, null)
delay_seconds = try(empty_nodes.value.delay_seconds, null)
}
}

dynamic "evictor" {
for_each = try([node_downscaler.value.evictor], [])

content {
enabled = try(evictor.value.enabled, null)
dry_run = try(evictor.value.dry_run, null)
aggressive_mode = try(evictor.value.aggressive_mode, null)
scoped_mode = try(evictor.value.scoped_mode, null)
cycle_interval = try(evictor.value.cycle_interval, null)
node_grace_period_minutes = try(evictor.value.node_grace_period_minutes, null)
pod_eviction_failure_back_off_interval = try(evictor.value.pod_eviction_failure_back_off_interval, null)
ignore_pod_disruption_budgets = try(evictor.value.ignore_pod_disruption_budgets, null)
}
}
}
}
}
}

depends_on = [helm_release.castai_agent, helm_release.castai_evictor, helm_release.castai_evictor_ext]
}
8 changes: 7 additions & 1 deletion variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,13 @@ variable "api_grpc_addr" {

variable "autoscaler_policies_json" {
type = string
description = "Optional json object to override CAST AI cluster autoscaler policies"
description = "Optional json object to override CAST AI cluster autoscaler policies. Deprecated, use `autoscaler_settings` instead."
default = null
}

variable "autoscaler_settings" {
type = any
description = "Optional Autoscaler policy definitions to override current autoscaler settings"
default = null
}

Expand Down
2 changes: 1 addition & 1 deletion versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ terraform {
}
castai = {
source = "castai/castai"
version = "~> 7.3.0"
version = "~> 7.4.0"
}
helm = {
source = "hashicorp/helm"
Expand Down

0 comments on commit 2e6eeba

Please sign in to comment.