diff --git a/infra/modules/sagemaker_deployment/cloudwatch.tf b/infra/modules/sagemaker_deployment/cloudwatch.tf
index 5235fd1..fdafd55 100644
--- a/infra/modules/sagemaker_deployment/cloudwatch.tf
+++ b/infra/modules/sagemaker_deployment/cloudwatch.tf
@@ -1,44 +1,267 @@
-resource "aws_cloudwatch_metric_alarm" "cloudwatch_alarm" {
-  count = length(var.alarms)
+resource "aws_cloudwatch_composite_alarm" "scale_up_from_n_to_np1" {
+  alarm_name        = "scale_up_from_n_to_np1"
+  alarm_description = "Where there exists a high backlog and a high state of any of CPU, GPU, RAM, HardDisk (i.e. live instances are insufficient for the tasks being performed)"
 
-  alarm_name          = "${var.alarms[count.index].alarm_name_prefix}-${aws_sagemaker_endpoint.main.name}"
-  alarm_description   = var.alarms[count.index].alarm_description
-  metric_name         = var.alarms[count.index].metric_name
-  namespace           = var.alarms[count.index].namespace
-  comparison_operator = var.alarms[count.index].comparison_operator
-  threshold           = var.alarms[count.index].threshold
-  evaluation_periods  = var.alarms[count.index].evaluation_periods
-  datapoints_to_alarm = var.alarms[count.index].datapoints_to_alarm
-  period              = var.alarms[count.index].period
-  statistic           = var.alarms[count.index].statistic
-  alarm_actions       = concat(var.alarms[count.index].alarm_actions, [aws_sns_topic.alarmstate[count.index].arn])
-  ok_actions          = concat(var.alarms[count.index].ok_actions, [aws_sns_topic.okstate[count.index].arn])
-  dimensions = (count.index == 0 || count.index == 1 || count.index == 2) ? { # TODO: this logic is brittle as it assumes "backlog" has index [0,1,2]; it would be better to have a logic that rests on the specific name of that metric
-    EndpointName = aws_sagemaker_endpoint.main.name                           # Only EndpointName is used in this case
-    } : {
-    EndpointName = aws_sagemaker_endpoint.main.name,                                             # Both EndpointName and VariantName are used in all other cases
-    VariantName  = aws_sagemaker_endpoint_configuration.main.production_variants[0].variant_name # Note this logic would not work if there were ever more than one production variant deployed for an LLM
-  }
+  alarm_actions = [aws_appautoscaling_policy.scale_up_from_n_to_np1.arn]
+  ok_actions    = []
+
+  alarm_rule = "ALARM(${aws_cloudwatch_metric_alarm.backlog_high.alarm_name}) AND (ALARM(${aws_cloudwatch_metric_alarm.cpu_high.alarm_name}) OR ALARM(${aws_cloudwatch_metric_alarm.gpu_high.alarm_name}) OR ALARM(${aws_cloudwatch_metric_alarm.ram_high.alarm_name}) OR ALARM(${aws_cloudwatch_metric_alarm.harddisk_high.alarm_name}))"
+}
+
+resource "aws_cloudwatch_composite_alarm" "scale_up_from_0_to_1" {
+  alarm_name        = "scale_up_from_0_to_1"
+  alarm_description = "Where there exists a high backlog and there exists a state of insufficient data for any of CPU, GPU, RAM, HardDisk (i.e. there are tasks to do but no instance is live to perform it)"
+
+  alarm_actions = [aws_appautoscaling_policy.scale_up_from_0_to_1.arn]
+  ok_actions    = []
+
+  alarm_rule = "ALARM(${aws_cloudwatch_metric_alarm.backlog_high.alarm_name}) AND (INSUFFICIENT_DATA(${aws_cloudwatch_metric_alarm.cpu_high.alarm_name}) OR INSUFFICIENT_DATA(${aws_cloudwatch_metric_alarm.gpu_high.alarm_name}) OR INSUFFICIENT_DATA(${aws_cloudwatch_metric_alarm.ram_high.alarm_name}) OR INSUFFICIENT_DATA(${aws_cloudwatch_metric_alarm.harddisk_high.alarm_name}))"
+}
+
+
+resource "aws_cloudwatch_composite_alarm" "scale_down_from_n_to_nm1" {
+  alarm_name        = "scale_down_from_n_to_nm1"
+  alarm_description = "Where there exists a high backlog and a low state of any of CPU, GPU, RAM, HardDisk (i.e. live instances are excessive for the current tasks)"
+
+  alarm_actions = [aws_appautoscaling_policy.scale_down_from_n_to_nm1.arn]
+  ok_actions    = []
+
+  alarm_rule = "ALARM(${aws_cloudwatch_metric_alarm.backlog_high.alarm_name}) AND (ALARM(${aws_cloudwatch_metric_alarm.cpu_low.alarm_name}) OR ALARM(${aws_cloudwatch_metric_alarm.gpu_low.alarm_name}) OR ALARM(${aws_cloudwatch_metric_alarm.ram_low.alarm_name}) OR ALARM(${aws_cloudwatch_metric_alarm.harddisk_low.alarm_name}))"
+}
+
+
+resource "aws_cloudwatch_composite_alarm" "scale_down_from_n_to_0" {
+  alarm_name        = "example-composite-alarm"
+  alarm_description = "Where there exists a low backlog and a low state of any of CPU, GPU, RAM, HardDisk (i.e. there is no task to come and live instances are excessive for any tasks currently in process)"
+
+  alarm_actions = [aws_appautoscaling_policy.scale_down_from_n_to_0.arn]
+  ok_actions    = []
+
+  alarm_rule = "ALARM(${aws_cloudwatch_metric_alarm.backlog_low.alarm_name}) AND (ALARM(${aws_cloudwatch_metric_alarm.cpu_low.alarm_name}) OR ALARM(${aws_cloudwatch_metric_alarm.gpu_low.alarm_name}) OR ALARM(${aws_cloudwatch_metric_alarm.ram_low.alarm_name}) OR ALARM(${aws_cloudwatch_metric_alarm.harddisk_low.alarm_name}))"
+}
+
+
+resource "aws_cloudwatch_metric_alarm" "backlog_high" {
+
+  alarm_name          = "${aws_sagemaker_endpoint.main.name}-backlog-high"
+  alarm_description   = "Alarm when in high Backlog Usage"
+  metric_name         = "ApproximateBacklogSize"
+  namespace           = "AWS/SageMaker"
+  comparison_operator = "GreaterThanOrEqualToThreshold"
+  threshold           = var.backlog_threshold_high
+  evaluation_periods  = var.evaluation_periods_high
+  datapoints_to_alarm = var.datapoints_to_alarm_high
+  period              = 60
+  statistic           = "Maximum"
+  dimensions          = { EndpointName = aws_sagemaker_endpoint.main.name }
+
+  depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarmstate, aws_sns_topic.okstate]
+}
+
+
+resource "aws_cloudwatch_metric_alarm" "backlog_low" {
+
+  alarm_name          = "${aws_sagemaker_endpoint.main.name}-backlog-low"
+  alarm_description   = "Alarm when in low Backlog Usage"
+  metric_name         = "ApproximateBacklogSize"
+  namespace           = "AWS/SageMaker"
+  comparison_operator = "LessThanThreshold"
+  threshold           = var.backlog_threshold_low
+  evaluation_periods  = var.evaluation_periods_low
+  datapoints_to_alarm = var.datapoints_to_alarm_low
+  period              = 60
+  statistic           = "Maximum"
+  dimensions          = { EndpointName = aws_sagemaker_endpoint.main.name }
+
+  depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarmstate, aws_sns_topic.okstate]
+}
+
+
+resource "aws_cloudwatch_metric_alarm" "cpu_high" {
+
+  alarm_name          = "${aws_sagemaker_endpoint.main.name}-cpu-high"
+  alarm_description   = "Alarm when in high vCPU Usage"
+  metric_name         = "CPUUtilization"
+  namespace           = "/aws/sagemaker/Endpoints"
+  comparison_operator = "GreaterThanOrEqualToThreshold"
+  threshold           = var.cpu_threshold_high
+  evaluation_periods  = var.evaluation_periods_high
+  datapoints_to_alarm = var.datapoints_to_alarm_high
+  period              = 60
+  statistic           = "Average"
+  dimensions = { EndpointName = aws_sagemaker_endpoint.main.name,
+  VariantName = aws_sagemaker_endpoint_configuration.main.production_variants[0].variant_name }
+
+  depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarmstate, aws_sns_topic.okstate]
+}
+
+
+resource "aws_cloudwatch_metric_alarm" "cpu_low" {
+
+  alarm_name          = "${aws_sagemaker_endpoint.main.name}-cpu-low"
+  alarm_description   = "Alarm when in low vCPU Usage"
+  metric_name         = "CPUUtilization"
+  namespace           = "/aws/sagemaker/Endpoints"
+  comparison_operator = "LessThanOrEqualToThreshold"
+  threshold           = var.cpu_threshold_low
+  evaluation_periods  = var.evaluation_periods_low
+  datapoints_to_alarm = var.datapoints_to_alarm_low
+  period              = 60
+  statistic           = "Average"
+  dimensions = { EndpointName = aws_sagemaker_endpoint.main.name,
+  VariantName = aws_sagemaker_endpoint_configuration.main.production_variants[0].variant_name }
+
+  depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarmstate, aws_sns_topic.okstate]
+}
+
+
+resource "aws_cloudwatch_metric_alarm" "gpu_high" {
+
+  alarm_name          = "${aws_sagemaker_endpoint.main.name}-gpu-high"
+  alarm_description   = "Alarm when in high GPU Usage"
+  metric_name         = "GPUUtilization"
+  namespace           = "/aws/sagemaker/Endpoints"
+  comparison_operator = "GreaterThanOrEqualToThreshold"
+  threshold           = var.gpu_threshold_high
+  evaluation_periods  = var.evaluation_periods_high
+  datapoints_to_alarm = var.datapoints_to_alarm_high
+  period              = 60
+  statistic           = "Average"
+  dimensions = { EndpointName = aws_sagemaker_endpoint.main.name,
+  VariantName = aws_sagemaker_endpoint_configuration.main.production_variants[0].variant_name }
+
+  depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarmstate, aws_sns_topic.okstate]
+}
+
+
+resource "aws_cloudwatch_metric_alarm" "gpu_low" {
+
+  alarm_name          = "${aws_sagemaker_endpoint.main.name}-gpu-low"
+  alarm_description   = "Alarm when in low GPU Usage"
+  metric_name         = "GPUUtilization"
+  namespace           = "/aws/sagemaker/Endpoints"
+  comparison_operator = "LessThanOrEqualToThreshold"
+  threshold           = var.gpu_threshold_low
+  evaluation_periods  = var.evaluation_periods_low
+  datapoints_to_alarm = var.datapoints_to_alarm_low
+  period              = 60
+  statistic           = "Average"
+  dimensions = { EndpointName = aws_sagemaker_endpoint.main.name,
+  VariantName = aws_sagemaker_endpoint_configuration.main.production_variants[0].variant_name }
+
+  depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarmstate, aws_sns_topic.okstate]
+}
+
+
+resource "aws_cloudwatch_metric_alarm" "ram_high" {
+
+  alarm_name          = "${aws_sagemaker_endpoint.main.name}-ram-high"
+  alarm_description   = "Alarm when in high RAM Usage"
+  metric_name         = "MemoryUtilization"
+  namespace           = "/aws/sagemaker/Endpoints"
+  comparison_operator = "GreaterThanOrEqualToThreshold"
+  threshold           = var.ram_threshold_high
+  evaluation_periods  = var.evaluation_periods_high
+  datapoints_to_alarm = var.datapoints_to_alarm_high
+  period              = 60
+  statistic           = "Average"
+  dimensions = { EndpointName = aws_sagemaker_endpoint.main.name,
+  VariantName = aws_sagemaker_endpoint_configuration.main.production_variants[0].variant_name }
+
+  depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarmstate, aws_sns_topic.okstate]
+}
+
+
+resource "aws_cloudwatch_metric_alarm" "ram_low" {
+
+  alarm_name          = "${aws_sagemaker_endpoint.main.name}-ram-low"
+  alarm_description   = "Alarm when in low RAM Usage"
+  metric_name         = "MemoryUtilization"
+  namespace           = "/aws/sagemaker/Endpoints"
+  comparison_operator = "LessThanOrEqualToThreshold"
+  threshold           = var.ram_threshold_low
+  evaluation_periods  = var.evaluation_periods_low
+  datapoints_to_alarm = var.datapoints_to_alarm_low
+  period              = 60
+  statistic           = "Average"
+  dimensions = { EndpointName = aws_sagemaker_endpoint.main.name,
+  VariantName = aws_sagemaker_endpoint_configuration.main.production_variants[0].variant_name }
 
   depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarmstate, aws_sns_topic.okstate]
 }
 
 
-resource "null_resource" "wait_for_metric_alarms" {
-  #  Aggregating metric alarms dependencies so we wait for them to be deleted/created before composite alarms are created or deleted. This prevents cyclic dependency issues.
-  depends_on = [aws_cloudwatch_metric_alarm.cloudwatch_alarm]
+resource "aws_cloudwatch_metric_alarm" "harddisk_high" {
+
+  alarm_name          = "${aws_sagemaker_endpoint.main.name}-harddisk-high"
+  alarm_description   = "Alarm when in high HardDisk Usage"
+  metric_name         = "DiskUtilization"
+  namespace           = "/aws/sagemaker/Endpoints"
+  comparison_operator = "GreaterThanOrEqualToThreshold"
+  threshold           = var.harddisk_threshold_high
+  evaluation_periods  = var.evaluation_periods_high
+  datapoints_to_alarm = var.datapoints_to_alarm_high
+  period              = 60
+  statistic           = "Average"
+  dimensions = { EndpointName = aws_sagemaker_endpoint.main.name,
+  VariantName = aws_sagemaker_endpoint_configuration.main.production_variants[0].variant_name }
+
+  depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarmstate, aws_sns_topic.okstate]
 }
 
 
-resource "aws_cloudwatch_composite_alarm" "composite_alarm" {
-  count = length(var.alarm_composites)
+resource "aws_cloudwatch_metric_alarm" "harddisk_low" {
+
+  alarm_name          = "${aws_sagemaker_endpoint.main.name}-harddisk-low"
+  alarm_description   = "Alarm when in low RAM Usage"
+  metric_name         = "MemoryUtilization"
+  namespace           = "/aws/sagemaker/Endpoints"
+  comparison_operator = "LessThanOrEqualToThreshold"
+  threshold           = var.ram_threshold_low
+  evaluation_periods  = var.evaluation_periods_low
+  datapoints_to_alarm = var.datapoints_to_alarm_low
+  period              = 60
+  statistic           = "Average"
+  dimensions = { EndpointName = aws_sagemaker_endpoint.main.name,
+  VariantName = aws_sagemaker_endpoint_configuration.main.production_variants[0].variant_name }
 
-  alarm_name        = "${var.alarm_composites[count.index].alarm_name}-${aws_sagemaker_endpoint.main.name}"
-  alarm_description = var.alarm_composites[count.index].alarm_description
-  alarm_rule        = var.alarm_composites[count.index].alarm_rule
-  alarm_actions     = concat(var.alarm_composites[count.index].alarm_actions, [aws_sns_topic.alarm_composite_notifications[count.index].arn], [aws_sns_topic.composite_alarmstate[count.index].arn])
-  ok_actions        = var.alarm_composites[count.index].ok_actions
+  depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarmstate, aws_sns_topic.okstate]
+}
 
-  depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarm_composite_notifications, aws_sns_topic.composite_alarmstate, null_resource.wait_for_metric_alarms]
 
+resource "aws_cloudwatch_metric_alarm" "unauthorized_operations" {
+
+  alarm_name          = "${aws_sagemaker_endpoint.main.name}-unauthorized-operations"
+  alarm_description   = "Alarm when unauthorized operations are detected in the CloudTrail Logs"
+  metric_name         = "UnauthorizedOperationsCount"
+  namespace           = "CloudTrailMetrics"
+  comparison_operator = "GreaterThanOrEqualToThreshold"
+  threshold           = 1
+  evaluation_periods  = 1
+  datapoints_to_alarm = 1
+  period              = 60
+  statistic           = "Maximum"
+  dimensions = { EndpointName = aws_sagemaker_endpoint.main.name,
+  VariantName = aws_sagemaker_endpoint_configuration.main.production_variants[0].variant_name }
+
+  depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarmstate, aws_sns_topic.okstate]
+}
+
+
+resource "aws_cloudwatch_metric_alarm" "errors_4xx" {
+
+  alarm_name          = "${aws_sagemaker_endpoint.main.name}-errors-4XX"
+  alarm_description   = "4XX errors are detected in the CloudTrail Logs"
+  metric_name         = "Invocation4XXErrors"
+  namespace           = "AWS/SageMaker"
+  comparison_operator = "GreaterThanOrEqualToThreshold"
+  threshold           = 1
+  evaluation_periods  = 1
+  datapoints_to_alarm = 1
+  period              = 60
+  statistic           = "Average"
+  dimensions = { EndpointName = aws_sagemaker_endpoint.main.name,
+  VariantName = aws_sagemaker_endpoint_configuration.main.production_variants[0].variant_name }
+
+  depends_on = [aws_sagemaker_endpoint.main, aws_sns_topic.alarmstate, aws_sns_topic.okstate]
 }
diff --git a/infra/modules/sagemaker_deployment/lambda.tf b/infra/modules/sagemaker_deployment/lambda.tf
index a63ee0c..9618128 100644
--- a/infra/modules/sagemaker_deployment/lambda.tf
+++ b/infra/modules/sagemaker_deployment/lambda.tf
@@ -14,45 +14,26 @@ resource "aws_lambda_function" "slack_alert_function" {
   runtime          = "python3.12"
   timeout          = 30
 
-  environment {
-    variables = {
-      SNS_TO_WEBHOOK_JSON = jsonencode(local.sns_to_webhook_mapping),
-      ADDRESS             = "arn:aws:sns:eu-west-2:${var.aws_account_id}:"
-    }
-  }
 }
 
 
-resource "aws_lambda_permission" "allow_sns_composite" {
-  count = length(var.alarm_composites)
+resource "aws_lambda_permission" "allow_sns_okstate" {
 
-  statement_id  = "AllowSNS-composite-${count.index}"
+  statement_id  = "AllowSNS-ok"
   action        = "lambda:InvokeFunction"
   function_name = aws_lambda_function.slack_alert_function.function_name
   principal     = "sns.amazonaws.com"
-  source_arn    = aws_sns_topic.composite_alarmstate[count.index].arn
+  source_arn    = aws_sns_topic.okstate.arn
 }
 
 
 resource "aws_lambda_permission" "allow_sns_alarmstate" {
-  count = length(var.alarms)
-
-  statement_id  = "AllowSNS-alarm-${count.index}"
-  action        = "lambda:InvokeFunction"
-  function_name = aws_lambda_function.slack_alert_function.function_name
-  principal     = "sns.amazonaws.com"
-  source_arn    = aws_sns_topic.alarmstate[count.index].arn
-}
-
-
-resource "aws_lambda_permission" "allow_sns_okstate" {
-  count = length(var.alarms)
 
-  statement_id  = "AllowSNS-ok-${count.index}"
+  statement_id  = "AllowSNS-alarm"
   action        = "lambda:InvokeFunction"
   function_name = aws_lambda_function.slack_alert_function.function_name
   principal     = "sns.amazonaws.com"
-  source_arn    = aws_sns_topic.okstate[count.index].arn
+  source_arn    = aws_sns_topic.alarmstate.arn
 }
 
 
diff --git a/infra/modules/sagemaker_deployment/main.tf b/infra/modules/sagemaker_deployment/main.tf
index fcfceef..d88aafa 100644
--- a/infra/modules/sagemaker_deployment/main.tf
+++ b/infra/modules/sagemaker_deployment/main.tf
@@ -65,7 +65,7 @@ resource "aws_appautoscaling_target" "main" {
 }
 
 
-resource "aws_appautoscaling_policy" "scale_up_to_n_policy" {
+resource "aws_appautoscaling_policy" "scale_up_from_n_to_np1" {
   name = "scale-up-to-n-policy-${var.model_name}"
 
   policy_type        = "StepScaling"
@@ -87,7 +87,7 @@ resource "aws_appautoscaling_policy" "scale_up_to_n_policy" {
 }
 
 
-resource "aws_appautoscaling_policy" "scale_down_to_n_policy" {
+resource "aws_appautoscaling_policy" "scale_down_from_n_to_nm1" {
   name = "scale-down-to-n-policy-${var.model_name}"
 
   policy_type        = "StepScaling"
@@ -109,7 +109,7 @@ resource "aws_appautoscaling_policy" "scale_down_to_n_policy" {
 }
 
 
-resource "aws_appautoscaling_policy" "scale_up_to_one_policy" {
+resource "aws_appautoscaling_policy" "scale_up_from_0_to_1" {
   name = "scale-up-to-one-policy-${var.model_name}"
 
   policy_type        = "StepScaling"
@@ -131,7 +131,7 @@ resource "aws_appautoscaling_policy" "scale_up_to_one_policy" {
 }
 
 
-resource "aws_appautoscaling_policy" "scale_down_to_zero_policy" {
+resource "aws_appautoscaling_policy" "scale_down_from_n_to_0" {
   name = "scale-down-to-zero-policy-${var.model_name}"
 
   policy_type        = "StepScaling"
@@ -153,22 +153,6 @@ resource "aws_appautoscaling_policy" "scale_down_to_zero_policy" {
 }
 
 
-# Mapping SNS topic ARNs to Slack webhook URLs
-locals {
-  sns_to_webhook_mapping = merge({
-    for idx, alarm in var.alarms :
-    replace(aws_sns_topic.alarmstate[idx].arn, "arn:aws:sns:eu-west-2:${var.aws_account_id}:", "") => alarm.slack_webhook_url
-    }, {
-    for idx, alarm in var.alarms :
-    replace(aws_sns_topic.okstate[idx].arn, "arn:aws:sns:eu-west-2:${var.aws_account_id}:", "") => alarm.slack_webhook_url
-    }, {
-    for idx, alarm_composite in var.alarm_composites :
-    replace(aws_sns_topic.composite_alarmstate[idx].arn, "arn:aws:sns:eu-west-2:${var.aws_account_id}:", "") => alarm_composite.slack_webhook_url
-    }
-  )
-}
-
-
 resource "aws_cloudwatch_log_metric_filter" "unauthorized_operations" {
   name           = "unauthorized-operations-filter"
   log_group_name = "/aws/sagemaker/Endpoints/${aws_sagemaker_endpoint.main.name}"
diff --git a/infra/modules/sagemaker_deployment/outputs.tf b/infra/modules/sagemaker_deployment/outputs.tf
index 0fda7d4..a96f9a7 100644
--- a/infra/modules/sagemaker_deployment/outputs.tf
+++ b/infra/modules/sagemaker_deployment/outputs.tf
@@ -6,23 +6,3 @@ output "model_name" {
 output "endpoint_name" {
   value = aws_sagemaker_endpoint.main.name
 }
-
-
-output "scale_up_to_one_policy_arn" {
-  value = aws_appautoscaling_policy.scale_up_to_one_policy.arn
-}
-
-
-output "scale_down_to_zero_policy_arn" {
-  value = aws_appautoscaling_policy.scale_down_to_zero_policy.arn
-}
-
-
-output "scale_up_to_n_policy_arn" {
-  value = aws_appautoscaling_policy.scale_up_to_n_policy.arn
-}
-
-
-output "scale_down_to_n_policy_arn" {
-  value = aws_appautoscaling_policy.scale_down_to_n_policy.arn
-}
diff --git a/infra/modules/sagemaker_deployment/sns.tf b/infra/modules/sagemaker_deployment/sns.tf
index 61248ff..12fcb40 100644
--- a/infra/modules/sagemaker_deployment/sns.tf
+++ b/infra/modules/sagemaker_deployment/sns.tf
@@ -1,7 +1,6 @@
 resource "aws_sns_topic" "alarmstate" {
-  count = length(var.alarms)
 
-  name = "alarm-alarmstate-${var.alarms[count.index].alarm_name_prefix}-${aws_sagemaker_endpoint.main.name}"
+  name = "alarm-alarmstate-${aws_sagemaker_endpoint.main.name}"
   policy = jsonencode({
     Version = "2012-10-17",
     Statement = [
@@ -19,9 +18,8 @@ resource "aws_sns_topic" "alarmstate" {
 
 
 resource "aws_sns_topic" "okstate" {
-  count = length(var.alarms)
 
-  name = "alarm-okstate-${var.alarms[count.index].alarm_name_prefix}-${aws_sagemaker_endpoint.main.name}"
+  name = "alarm-okstate-${aws_sagemaker_endpoint.main.name}"
 
   policy = jsonencode({
     Version = "2012-10-17",
@@ -39,99 +37,17 @@ resource "aws_sns_topic" "okstate" {
 }
 
 
-resource "aws_sns_topic" "composite_alarmstate" {
-  count = length(var.alarm_composites)
-
-  name = "alarm-alarm-composite-lambda-${var.alarm_composites[count.index].alarm_name}-${aws_sagemaker_endpoint.main.name}-topic"
-
-  policy = jsonencode({
-    Version = "2012-10-17",
-    Statement = [
-      {
-        Effect = "Allow",
-        Principal = {
-          Service = "cloudwatch.amazonaws.com"
-        },
-        Action   = "sns:Publish",
-        Resource = "*"
-      }
-    ]
-  })
-}
-
-
-resource "aws_sns_topic" "alarm_composite_notifications" {
-  count = length(var.alarm_composites)
-  name  = "alarm-composite-${var.alarm_composites[count.index].alarm_name}-${aws_sagemaker_endpoint.main.name}-sns-topic"
-}
-
-
-resource "aws_sns_topic_policy" "composite_sns_topic_policy" {
-  count = length(var.alarm_composites)
-
-  arn = aws_sns_topic.alarm_composite_notifications[count.index].arn
-  policy = jsonencode({
-    Version = "2012-10-17",
-    Statement = [
-      {
-        Sid    = "AllowPublishFromCloudWatch"
-        Effect = "Allow",
-        Principal = {
-          Service = "cloudwatch.amazonaws.com"
-        },
-        Action   = "SNS:Publish",
-        Resource = aws_sns_topic.alarm_composite_notifications[count.index].arn
-      },
-      {
-        Sid       = "AllowSubscriptionActions"
-        Effect    = "Allow",
-        Principal = "*",
-        Action = [
-          "sns:Subscribe",
-          "sns:Receive"
-        ],
-        Resource = aws_sns_topic.alarm_composite_notifications[count.index].arn
-      }
-    ]
-  })
-}
-
-
 resource "aws_sns_topic_subscription" "sns_lambda_subscription_okstate" {
-  count = length(var.alarms)
 
-  topic_arn = aws_sns_topic.okstate[count.index].arn
+  topic_arn = aws_sns_topic.okstate.arn
   protocol  = "lambda"
   endpoint  = aws_lambda_function.slack_alert_function.arn
 }
 
 
 resource "aws_sns_topic_subscription" "sns_lambda_subscription_alarmstate" {
-  count = length(var.alarms)
 
-  topic_arn = aws_sns_topic.alarmstate[count.index].arn
+  topic_arn = aws_sns_topic.alarmstate.arn
   protocol  = "lambda"
   endpoint  = aws_lambda_function.slack_alert_function.arn
 }
-
-resource "aws_sns_topic_subscription" "sns_lambda_subscription_composite" {
-  count = length(var.alarm_composites)
-
-  topic_arn = aws_sns_topic.composite_alarmstate[count.index].arn
-  protocol  = "lambda"
-  endpoint  = aws_lambda_function.slack_alert_function.arn
-}
-
-
-resource "aws_sns_topic_subscription" "email_subscription" {
-  count     = length(var.alarm_composites)
-  topic_arn = aws_sns_topic.alarm_composite_notifications[count.index].arn
-  protocol  = "email"
-  endpoint = flatten([
-    for variables in var.alarm_composites :
-    [
-      for email in variables.emails :
-      email
-    ]
-  ])[count.index]
-}
diff --git a/infra/modules/sagemaker_deployment/variables.tf b/infra/modules/sagemaker_deployment/variables.tf
index 14be800..14794e4 100644
--- a/infra/modules/sagemaker_deployment/variables.tf
+++ b/infra/modules/sagemaker_deployment/variables.tf
@@ -87,38 +87,87 @@ variable "scale_down_cooldown" {
   description = "Cooldown period for scale down"
 }
 
+variable "backlog_threshold_high" {
+  type        = number
+  description = "Threshold for high backlog alarm"
+}
+
+
+variable "backlog_threshold_low" {
+  type        = number
+  description = "Threshold for low backlog alarm"
+}
+
+
+variable "cpu_threshold_high" {
+  type        = number
+  description = "Threshold for high CPU alarm (NOTE this varies based on number of vCPU)"
+}
+
+
+variable "cpu_threshold_low" {
+  type        = number
+  description = "Threshold for low CPU alarm (NOTE this varies based on number of vCPU)"
+}
+
+
+variable "gpu_threshold_high" {
+  type        = number
+  description = "Threshold for high GPU alarm (NOTE this varies based on number of GPU)"
+}
+
+
+variable "gpu_threshold_low" {
+  type        = number
+  description = "Threshold for low GPU alarm (NOTE this varies based on number of GPU)"
+}
+
+
+variable "ram_threshold_high" {
+  type        = number
+  description = "Threshold for high RAM alarm"
+}
 
-variable "alarms" {
-  type = list(object({
-    alarm_name_prefix   = string
-    alarm_description   = string
-    metric_name         = string
-    namespace           = string
-    comparison_operator = string
-    threshold           = number
-    evaluation_periods  = number
-    datapoints_to_alarm = number
-    period              = number
-    statistic           = string
-    slack_webhook_url   = string
-    alarm_actions       = list(string)
-    ok_actions          = list(string)
-  }))
-  description = "List of CloudWatch alarms to be created"
-}
-
-
-variable "alarm_composites" {
-  type = list(object({
-    alarm_name        = string
-    alarm_description = string
-    alarm_rule        = string
-    alarm_actions     = list(string)
-    ok_actions        = list(string)
-    slack_webhook_url = string
-    emails            = list(string)
-  }))
-  description = "List of CloudWatch composite alarms to be created utilizing pre-existing alarms"
+
+variable "ram_threshold_low" {
+  type        = number
+  description = "Threshold for low RAM alarm"
+}
+
+
+variable "harddisk_threshold_high" {
+  type        = number
+  description = "Threshold for high HardDisk alarm"
+}
+
+
+variable "harddisk_threshold_low" {
+  type        = number
+  description = "Threshold for low HardDisk alarm"
+}
+
+
+variable "evaluation_periods_high" {
+  type        = number
+  description = "Number of evaluation periods to consider for high alarm states"
+}
+
+
+variable "datapoints_to_alarm_high" {
+  type        = number
+  description = "Number of datapoints within an evaluation period to require for low alarm states"
+}
+
+
+variable "evaluation_periods_low" {
+  type        = number
+  description = "Number of evaluation periods to consider for low alarm states"
+}
+
+
+variable "datapoints_to_alarm_low" {
+  type        = number
+  description = "Number of datapoints within an evaluation period to require for low alarm states"
 }
 
 
diff --git a/infra/sagemaker_llm_resources.tf b/infra/sagemaker_llm_resources.tf
index a4023df..8aa2424 100644
--- a/infra/sagemaker_llm_resources.tf
+++ b/infra/sagemaker_llm_resources.tf
@@ -33,256 +33,20 @@ module "gpt_neo_125m_deployment" {
     "SAGEMAKER_PROGRAM" : "inference.py",
     "SM_NUM_GPUS" : "1"
   }
-
-  alarms = [
-    {
-      alarm_name_prefix   = "nonzero-backlog" # TODO: backlog is currently required to have index [0,1] which is brittle
-      alarm_description   = "Scale up based on existence of backlog"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = [module.gpt_neo_125m_deployment.scale_up_to_one_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "zero-backlog" # TODO: backlog is currently required to have index [0,1] which is brittle
-      alarm_description   = "Scale down based on non-existence of backlog"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "LessThanThreshold"
-      threshold           = 1
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = [module.gpt_neo_125m_deployment.scale_down_to_zero_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "backlog-composite-alarm" # TODO: backlog is currently required to have index 0, which is brittle
-      alarm_description   = "Detect if queries in backlog for extended time period"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 0
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-cpu"
-      alarm_description   = "Scale up when CPU usage is heavy"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80 * 8 # TODO: we must manually multiply by vCPU count as Normalized metric not available
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = [module.gpt_neo_125m_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-cpu"
-      alarm_description   = "Scale down when CPU usage is light"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20 * 8 # TODO: we must manually multiply by vCPU count as Normalized metric not available
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = [module.gpt_neo_125m_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-gpu"
-      alarm_description   = "Scale up when GPU usage is heavy"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = [module.gpt_neo_125m_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-gpu"
-      alarm_description   = "Scale down when GPU usage is light"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = [module.gpt_neo_125m_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-ram"
-      alarm_description   = "Scale up when RAM usage is heavy"
-      metric_name         = "MemoryUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.gpt_neo_125m_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-ram"
-      alarm_description   = "Scale down when RAM usage is light"
-      metric_name         = "MemoryUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.gpt_neo_125m_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-hard-disk"
-      alarm_description   = "Scale up when Hard Disk usage is heavy"
-      metric_name         = "DiskUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.gpt_neo_125m_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-hard-disk"
-      alarm_description   = "Scale down when Hard Disk usage is light"
-      metric_name         = "DiskUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.gpt_neo_125m_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "unauthorized-operations"
-      alarm_description   = "Unauthorized operations are detected in the CloudTrail Logs"
-      metric_name         = "UnauthorizedOperationsCount"
-      namespace           = "CloudTrailMetrics"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Sum"
-      slack_webhook_url   = var.slack_webhook_security_alerts
-      alarm_actions       = [] # SNS to give alert to developers
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "errors-4XX"
-      alarm_description   = "4XX errors are detected in the CloudTrail Logs"
-      metric_name         = "Invocation4XXErrors"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Sum"
-      slack_webhook_url   = var.slack_webhook_security_alerts
-      alarm_actions       = [] # SNS to give alert to developers
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "elevated-cpu-composite"
-      alarm_description   = "Detect CPU activity above idle for extended time period"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 20 * 8 # TODO: we must manually multiply by CPU count as Normalized metric not available
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-gpu-composite"
-      alarm_description   = "Scale down when GPU usage is light"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 20 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    }
-  ]
-
-  #  Note that we have cyclic dependency issues now and you have to destroy to get this to work
-  alarm_composites = [
-    {
-      alarm_name        = "ElevatedCPUUtilizationNoBackLog"
-      alarm_description = "Triggered when CPU util is above idle and no backlog query exists for an extended time"
-      alarm_rule        = "ALARM(elevated-cpu-composite-${module.gpt_neo_125m_deployment.model_name}-endpoint) AND ALARM(backlog-composite-alarm-${module.gpt_neo_125m_deployment.model_name}-endpoint)"
-      alarm_actions     = []
-      ok_actions        = []
-      slack_webhook_url = var.slack_webhook_backlog_alerts
-      emails            = var.sagemaker_budget_emails
-    },
-    {
-      alarm_name        = "ElevatedGPUUtilizationNoBackLog"
-      alarm_description = "Triggered when GPU util is above idle and no backlog query exists for an extended time"
-      alarm_rule        = "ALARM(low-gpu-composite-${module.gpt_neo_125m_deployment.model_name}-endpoint) AND ALARM(backlog-composite-alarm-${module.gpt_neo_125m_deployment.model_name}-endpoint)"
-      alarm_actions     = []
-      ok_actions        = []
-      slack_webhook_url = var.slack_webhook_backlog_alerts
-      emails            = var.sagemaker_budget_emails
-    }
-  ]
+  backlog_threshold_high   = 1
+  backlog_threshold_low    = 1
+  cpu_threshold_high       = 80 * 8 # 8 vCPUs
+  cpu_threshold_low        = 20 * 8 # 8 vCPUs
+  gpu_threshold_high       = 80 * 1 # 1 GPU
+  gpu_threshold_low        = 20 * 1 # 1 GPU
+  ram_threshold_high       = 80
+  ram_threshold_low        = 20
+  harddisk_threshold_high  = 80
+  harddisk_threshold_low   = 20
+  evaluation_periods_high  = 1
+  datapoints_to_alarm_high = 1
+  evaluation_periods_low   = 20
+  datapoints_to_alarm_low  = 15
 
   # These variables do not change between LLMs
   source                = "./modules/sagemaker_deployment"
@@ -317,256 +81,20 @@ module "phi_2_3b_deployment" {
     "SAGEMAKER_MODEL_SERVER_WORKERS" : "1",
     "SAGEMAKER_PROGRAM" : "inference.py"
   }
-
-  alarms = [
-    {
-      alarm_name_prefix   = "nonzero-backlog" # TODO: backlog is currently required to have index [0,1] which is brittle
-      alarm_description   = "Scale up based on existence of backlog"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = [module.phi_2_3b_deployment.scale_up_to_one_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "zero-backlog" # TODO: backlog is currently required to have index [0,1] which is brittle
-      alarm_description   = "Scale down based on non-existence of backlog"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "LessThanThreshold"
-      threshold           = 1
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = [module.phi_2_3b_deployment.scale_down_to_zero_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "backlog-composite-alarm"
-      alarm_description   = "Detect if queries in backlog for extended time period"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 0
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-cpu"
-      alarm_description   = "Scale up when CPU usage is heavy"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80 * 4 # TODO: we must manually multiply by vCPU count as Normalized metric not available
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = [module.phi_2_3b_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-cpu"
-      alarm_description   = "Scale down when CPU usage is light"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20 * 4 # TODO: we must manually multiply by vCPU count as Normalized metric not available
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = [module.phi_2_3b_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-gpu"
-      alarm_description   = "Scale up when GPU usage is heavy"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = [module.phi_2_3b_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-gpu"
-      alarm_description   = "Scale down when GPU usage is light"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = [module.phi_2_3b_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-ram"
-      alarm_description   = "Scale up when RAM usage is heavy"
-      metric_name         = "MemoryUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.phi_2_3b_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-ram"
-      alarm_description   = "Scale down when RAM usage is light"
-      metric_name         = "MemoryUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.phi_2_3b_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-hard-disk"
-      alarm_description   = "Scale up when Hard Disk usage is heavy"
-      metric_name         = "DiskUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.phi_2_3b_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-hard-disk"
-      alarm_description   = "Scale down when Hard Disk usage is light"
-      metric_name         = "DiskUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.phi_2_3b_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "unauthorized-operations"
-      alarm_description   = "Unauthorized operations are detected in the CloudTrail Logs"
-      metric_name         = "UnauthorizedOperationsCount"
-      namespace           = "CloudTrailMetrics"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Sum"
-      slack_webhook_url   = var.slack_webhook_security_alerts
-      alarm_actions       = [] # SNS to give alert to developers
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "errors-4XX"
-      alarm_description   = "4XX errors are detected in the CloudTrail Logs"
-      metric_name         = "Invocation4XXErrors"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Sum"
-      slack_webhook_url   = var.slack_webhook_security_alerts
-      alarm_actions       = [] # SNS to give alert to developers
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "elevated-cpu-composite"
-      alarm_description   = "Detect CPU activity above idle for extended time period"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 20 * 4 # TODO: we must manually multiply by CPU count as Normalized metric not available
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-gpu-composite"
-      alarm_description   = "Scale down when GPU usage is light"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 20 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    }
-  ]
-
-  alarm_composites = [
-    {
-      alarm_name        = "ElevatedCPUUtilizationNoBackLog"
-      alarm_description = "Triggered when CPU util is above idle and no backlog query exists for an extended time"
-      alarm_rule        = "ALARM(elevated-cpu-composite-${module.phi_2_3b_deployment.model_name}-endpoint) AND ALARM(backlog-composite-alarm-${module.phi_2_3b_deployment.model_name}-endpoint)"
-      alarm_actions     = []
-      ok_actions        = []
-      slack_webhook_url = var.slack_webhook_backlog_alerts
-      emails            = var.sagemaker_budget_emails
-    },
-    {
-      alarm_name        = "ElevatedGPUUtilizationNoBackLog"
-      alarm_description = "Triggered when GPU util is above idle and no backlog query exists for an extended time"
-      alarm_rule        = "ALARM(low-gpu-composite-${module.phi_2_3b_deployment.model_name}-endpoint) AND ALARM(backlog-composite-alarm-${module.phi_2_3b_deployment.model_name}-endpoint)"
-      alarm_actions     = []
-      ok_actions        = []
-      slack_webhook_url = var.slack_webhook_backlog_alerts
-      emails            = var.sagemaker_budget_emails
-    }
-
-  ]
+  backlog_threshold_high   = 1
+  backlog_threshold_low    = 1
+  cpu_threshold_high       = 80 * 4 # 4 vCPUs
+  cpu_threshold_low        = 20 * 4 # 4 vCPUs
+  gpu_threshold_high       = 80 * 1 # 1 GPU
+  gpu_threshold_low        = 20 * 1 # 1 GPU
+  ram_threshold_high       = 80
+  ram_threshold_low        = 20
+  harddisk_threshold_high  = 80
+  harddisk_threshold_low   = 20
+  evaluation_periods_high  = 1
+  datapoints_to_alarm_high = 1
+  evaluation_periods_low   = 20
+  datapoints_to_alarm_low  = 15
 
   # These variables do not change between LLMs
   source                = "./modules/sagemaker_deployment"
@@ -606,256 +134,20 @@ module "llama_3_3b_deployment" {
     "SAGEMAKER_MODEL_SERVER_WORKERS" : "1",
     "SAGEMAKER_PROGRAM" : "inference.py"
   }
-
-  alarms = [
-    {
-      alarm_name_prefix   = "nonzero-backlog" # TODO: backlog is currently required to have index [0,1] which is brittle
-      alarm_description   = "Scale up based on existence of backlog"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = [module.llama_3_3b_deployment.scale_up_to_one_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "zero-backlog" # TODO: backlog is currently required to have index [0,1] which is brittle
-      alarm_description   = "Scale down based on non-existence of backlog"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "LessThanThreshold"
-      threshold           = 1
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = [module.llama_3_3b_deployment.scale_down_to_zero_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "backlog-composite-alarm"
-      alarm_description   = "Detect if queries in backlog for extended time period"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 0
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-cpu"
-      alarm_description   = "Scale up when CPU usage is heavy"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80 * 4 # TODO: we must manually multiply by vCPU count as Normalized metric not available
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = [module.llama_3_3b_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-cpu"
-      alarm_description   = "Scale down when CPU usage is light"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20 * 4 # TODO: we must manually multiply by vCPU count as Normalized metric not available
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = [module.llama_3_3b_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-gpu"
-      alarm_description   = "Scale up when GPU usage is heavy"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = [module.llama_3_3b_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-gpu"
-      alarm_description   = "Scale down when GPU usage is light"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = [module.llama_3_3b_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-ram"
-      alarm_description   = "Scale up when RAM usage is heavy"
-      metric_name         = "MemoryUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.llama_3_3b_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-ram"
-      alarm_description   = "Scale down when RAM usage is light"
-      metric_name         = "MemoryUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.llama_3_3b_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-hard-disk"
-      alarm_description   = "Scale up when Hard Disk usage is heavy"
-      metric_name         = "DiskUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.llama_3_3b_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-hard-disk"
-      alarm_description   = "Scale down when Hard Disk usage is light"
-      metric_name         = "DiskUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.llama_3_3b_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "unauthorized-operations"
-      alarm_description   = "Unauthorized operations are detected in the CloudTrail Logs"
-      metric_name         = "UnauthorizedOperationsCount"
-      namespace           = "CloudTrailMetrics"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Sum"
-      slack_webhook_url   = var.slack_webhook_security_alerts
-      alarm_actions       = [] # SNS to give alert to developers
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "errors-4XX"
-      alarm_description   = "4XX errors are detected in the CloudTrail Logs"
-      metric_name         = "Invocation4XXErrors"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Sum"
-      slack_webhook_url   = var.slack_webhook_security_alerts
-      alarm_actions       = [] # SNS to give alert to developers
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "elevated-cpu-composite"
-      alarm_description   = "Detect CPU activity above idle for extended time period"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 20 * 4 # TODO: we must manually multiply by CPU count as Normalized metric not available
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-gpu-composite"
-      alarm_description   = "Scale down when GPU usage is light"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 20 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    }
-  ]
-
-  alarm_composites = [
-    {
-      alarm_name        = "ElevatedCPUUtilizationNoBackLog"
-      alarm_description = "Triggered when CPU util is above idle and no backlog query exists for an extended time"
-      alarm_rule        = "ALARM(elevated-cpu-composite-${module.phi_2_3b_deployment.model_name}-endpoint) AND ALARM(backlog-composite-alarm-${module.phi_2_3b_deployment.model_name}-endpoint)"
-      alarm_actions     = []
-      ok_actions        = []
-      slack_webhook_url = var.slack_webhook_backlog_alerts
-      emails            = var.sagemaker_budget_emails
-    },
-    {
-      alarm_name        = "ElevatedGPUUtilizationNoBackLog"
-      alarm_description = "Triggered when GPU util is above idle and no backlog query exists for an extended time"
-      alarm_rule        = "ALARM(low-gpu-composite-${module.phi_2_3b_deployment.model_name}-endpoint) AND ALARM(backlog-composite-alarm-${module.phi_2_3b_deployment.model_name}-endpoint)"
-      alarm_actions     = []
-      ok_actions        = []
-      slack_webhook_url = var.slack_webhook_backlog_alerts
-      emails            = var.sagemaker_budget_emails
-    }
-
-  ]
+  backlog_threshold_high   = 1
+  backlog_threshold_low    = 1
+  cpu_threshold_high       = 80 * 4 # 4 vCPUs
+  cpu_threshold_low        = 20 * 4 # 4 vCPUs
+  gpu_threshold_high       = 80 * 1 # 1 GPU
+  gpu_threshold_low        = 20 * 1 # 1 GPU
+  ram_threshold_high       = 80
+  ram_threshold_low        = 20
+  harddisk_threshold_high  = 80
+  harddisk_threshold_low   = 20
+  evaluation_periods_high  = 1
+  datapoints_to_alarm_high = 1
+  evaluation_periods_low   = 20
+  datapoints_to_alarm_low  = 15
 
   # These variables do not change between LLMs
   source                = "./modules/sagemaker_deployment"
@@ -894,256 +186,20 @@ module "llama_3_3b_instruct_deployment" {
     "SAGEMAKER_MODEL_SERVER_WORKERS" : "1",
     "SAGEMAKER_PROGRAM" : "inference.py"
   }
-
-  alarms = [
-    {
-      alarm_name_prefix   = "nonzero-backlog" # TODO: backlog is currently required to have index [0,1] which is brittle
-      alarm_description   = "Scale up based on existence of backlog"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = [module.llama_3_3b_instruct_deployment.scale_up_to_one_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "zero-backlog" # TODO: backlog is currently required to have index [0,1] which is brittle
-      alarm_description   = "Scale down based on non-existence of backlog"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "LessThanThreshold"
-      threshold           = 1
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = [module.llama_3_3b_instruct_deployment.scale_down_to_zero_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "backlog-composite-alarm"
-      alarm_description   = "Detect if queries in backlog for extended time period"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 0
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-cpu"
-      alarm_description   = "Scale up when CPU usage is heavy"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80 * 4 # TODO: we must manually multiply by vCPU count as Normalized metric not available
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = [module.llama_3_3b_instruct_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-cpu"
-      alarm_description   = "Scale down when CPU usage is light"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20 * 4 # TODO: we must manually multiply by vCPU count as Normalized metric not available
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = [module.llama_3_3b_instruct_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-gpu"
-      alarm_description   = "Scale up when GPU usage is heavy"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = [module.llama_3_3b_instruct_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-gpu"
-      alarm_description   = "Scale down when GPU usage is light"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = [module.llama_3_3b_instruct_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-ram"
-      alarm_description   = "Scale up when RAM usage is heavy"
-      metric_name         = "MemoryUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.llama_3_3b_instruct_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-ram"
-      alarm_description   = "Scale down when RAM usage is light"
-      metric_name         = "MemoryUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.llama_3_3b_instruct_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-hard-disk"
-      alarm_description   = "Scale up when Hard Disk usage is heavy"
-      metric_name         = "DiskUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.llama_3_3b_instruct_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-hard-disk"
-      alarm_description   = "Scale down when Hard Disk usage is light"
-      metric_name         = "DiskUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.llama_3_3b_instruct_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "unauthorized-operations"
-      alarm_description   = "Unauthorized operations are detected in the CloudTrail Logs"
-      metric_name         = "UnauthorizedOperationsCount"
-      namespace           = "CloudTrailMetrics"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Sum"
-      slack_webhook_url   = var.slack_webhook_security_alerts
-      alarm_actions       = [] # SNS to give alert to developers
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "errors-4XX"
-      alarm_description   = "4XX errors are detected in the CloudTrail Logs"
-      metric_name         = "Invocation4XXErrors"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Sum"
-      slack_webhook_url   = var.slack_webhook_security_alerts
-      alarm_actions       = [] # SNS to give alert to developers
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "elevated-cpu-composite"
-      alarm_description   = "Detect CPU activity above idle for extended time period"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 20 * 4 # TODO: we must manually multiply by CPU count as Normalized metric not available
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-gpu-composite"
-      alarm_description   = "Scale down when GPU usage is light"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 20 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    }
-  ]
-
-  alarm_composites = [
-    {
-      alarm_name        = "ElevatedCPUUtilizationNoBackLog"
-      alarm_description = "Triggered when CPU util is above idle and no backlog query exists for an extended time"
-      alarm_rule        = "ALARM(elevated-cpu-composite-${module.phi_2_3b_deployment.model_name}-endpoint) AND ALARM(backlog-composite-alarm-${module.phi_2_3b_deployment.model_name}-endpoint)"
-      alarm_actions     = []
-      ok_actions        = []
-      slack_webhook_url = var.slack_webhook_backlog_alerts
-      emails            = var.sagemaker_budget_emails
-    },
-    {
-      alarm_name        = "ElevatedGPUUtilizationNoBackLog"
-      alarm_description = "Triggered when GPU util is above idle and no backlog query exists for an extended time"
-      alarm_rule        = "ALARM(low-gpu-composite-${module.phi_2_3b_deployment.model_name}-endpoint) AND ALARM(backlog-composite-alarm-${module.phi_2_3b_deployment.model_name}-endpoint)"
-      alarm_actions     = []
-      ok_actions        = []
-      slack_webhook_url = var.slack_webhook_backlog_alerts
-      emails            = var.sagemaker_budget_emails
-    }
-
-  ]
+  backlog_threshold_high   = 1
+  backlog_threshold_low    = 1
+  cpu_threshold_high       = 80 * 4 # 4 vCPUs
+  cpu_threshold_low        = 20 * 4 # 4 vCPUs
+  gpu_threshold_high       = 80 * 1 # 1 GPU
+  gpu_threshold_low        = 20 * 1 # 1 GPU
+  ram_threshold_high       = 80
+  ram_threshold_low        = 20
+  harddisk_threshold_high  = 80
+  harddisk_threshold_low   = 20
+  evaluation_periods_high  = 1
+  datapoints_to_alarm_high = 1
+  evaluation_periods_low   = 20
+  datapoints_to_alarm_low  = 15
 
   # These variables do not change between LLMs
   source                = "./modules/sagemaker_deployment"
@@ -1180,242 +236,20 @@ module "mistral_7b_instruct_deployment" {
     "SAGEMAKER_MODEL_SERVER_WORKERS" : "1",
     "SAGEMAKER_PROGRAM" : "inference.py",
   }
-
-  alarms = [
-    {
-      alarm_name_prefix   = "nonzero-backlog" # TODO: backlog is currently required to have index [0,1] which is brittle
-      alarm_description   = "Scale up based on existence of backlog"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = [module.mistral_7b_instruct_deployment.scale_up_to_one_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "zero-backlog" # TODO: backlog is currently required to have index [0,1] which is brittle
-      alarm_description   = "Scale down based on non-existence of backlog"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "LessThanThreshold"
-      threshold           = 1
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = [module.mistral_7b_instruct_deployment.scale_down_to_zero_policy_arn]
-      ok_actions          = []
-    },
-
-    {
-      alarm_name_prefix   = "backlog-composite-alarm" # TODO: backlog is currently required to have index 0, which is brittle
-      alarm_description   = "Detect if queries in backlog for extended time period"
-      metric_name         = "ApproximateBacklogSize"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 0
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_backlog_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-cpu"
-      alarm_description   = "Scale down when CPU usage is light"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20 * 48 # TODO: we must manually multiply by vCPU count as Normalized metric not available
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = [module.mistral_7b_instruct_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-gpu"
-      alarm_description   = "Scale up when GPU usage is heavy"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80 * 4 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = [module.mistral_7b_instruct_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-gpu"
-      alarm_description   = "Scale down when GPU usage is light"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20 * 4 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = [module.mistral_7b_instruct_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-ram"
-      alarm_description   = "Scale up when RAM usage is heavy"
-      metric_name         = "MemoryUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.mistral_7b_instruct_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-ram"
-      alarm_description   = "Scale down when RAM usage is light"
-      metric_name         = "MemoryUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.mistral_7b_instruct_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "high-hard-disk"
-      alarm_description   = "Scale up when Hard Disk usage is heavy"
-      metric_name         = "DiskUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 80
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.mistral_7b_instruct_deployment.scale_up_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-hard-disk"
-      alarm_description   = "Scale down when Hard Disk usage is light"
-      metric_name         = "DiskUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "LessThanOrEqualToThreshold"
-      threshold           = 20
-      evaluation_periods  = 15
-      datapoints_to_alarm = 15
-      period              = 60
-      statistic           = "Maximum"
-      slack_webhook_url   = var.slack_webhook_resource_alerts
-      alarm_actions       = [module.mistral_7b_instruct_deployment.scale_down_to_n_policy_arn]
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "unauthorized-operations"
-      alarm_description   = "Unauthorized operations are detected in the CloudTrail Logs"
-      metric_name         = "UnauthorizedOperationsCount"
-      namespace           = "CloudTrailMetrics"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Sum"
-      slack_webhook_url   = var.slack_webhook_security_alerts
-      alarm_actions       = [] # SNS to give alert to developers
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "errors-4XX"
-      alarm_description   = "4XX errors are detected in the CloudTrail Logs"
-      metric_name         = "Invocation4XXErrors"
-      namespace           = "AWS/SageMaker"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 1
-      evaluation_periods  = 1
-      datapoints_to_alarm = 1
-      period              = 60
-      statistic           = "Sum"
-      slack_webhook_url   = var.slack_webhook_security_alerts
-      alarm_actions       = [] # SNS to give alert to developers
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "elevated-cpu-composite"
-      alarm_description   = "Detect CPU activity above idle for extended time period"
-      metric_name         = "CPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 20 * 48 # TODO: we must manually multiply by CPU count as Normalized metric not available
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_cpu_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    },
-    {
-      alarm_name_prefix   = "low-gpu-composite"
-      alarm_description   = "Scale down when GPU usage is light"
-      metric_name         = "GPUUtilization"
-      namespace           = "/aws/sagemaker/Endpoints"
-      comparison_operator = "GreaterThanOrEqualToThreshold"
-      threshold           = 20 * 4 # TODO: we must manually multiply by GPU count as Normalized metric not available
-      evaluation_periods  = 3
-      datapoints_to_alarm = 3
-      period              = 3600
-      statistic           = "Average"
-      slack_webhook_url   = var.slack_webhook_gpu_alerts
-      alarm_actions       = []
-      ok_actions          = []
-    }
-  ]
-
-  alarm_composites = [
-    {
-      alarm_name        = "ElevatedCPUUtilizationNoBackLog"
-      alarm_description = "Triggered when CPU util is above idle and no backlog query exists for an extended time"
-      alarm_rule        = "ALARM(elevated-cpu-composite-${module.mistral_7b_instruct_deployment.model_name}-endpoint) AND ALARM(backlog-composite-alarm-${module.mistral_7b_instruct_deployment.model_name}-endpoint)"
-      alarm_actions     = []
-      ok_actions        = []
-      slack_webhook_url = var.slack_webhook_backlog_alerts
-      emails            = var.sagemaker_budget_emails
-    },
-    {
-      alarm_name        = "ElevatedGPUUtilizationNoBackLog"
-      alarm_description = "Triggered when GPU util is above idle and no backlog query exists for an extended time"
-      alarm_rule        = "ALARM(low-gpu-composite-${module.mistral_7b_instruct_deployment.model_name}-endpoint) AND ALARM(backlog-composite-alarm-${module.mistral_7b_instruct_deployment.model_name}-endpoint)"
-      alarm_actions     = []
-      ok_actions        = []
-      slack_webhook_url = var.slack_webhook_backlog_alerts
-      emails            = var.sagemaker_budget_emails
-    }
-
-  ]
+  backlog_threshold_high   = 1
+  backlog_threshold_low    = 1
+  cpu_threshold_high       = 80 * 48 # 48 vCPUs
+  cpu_threshold_low        = 20 * 48 # 48 vCPUs
+  gpu_threshold_high       = 80 * 4  # 4 GPUs
+  gpu_threshold_low        = 20 * 4  # 4 GPUs
+  ram_threshold_high       = 80
+  ram_threshold_low        = 20
+  harddisk_threshold_high  = 80
+  harddisk_threshold_low   = 20
+  evaluation_periods_high  = 1
+  datapoints_to_alarm_high = 1
+  evaluation_periods_low   = 15
+  datapoints_to_alarm_low  = 15
 
   # These variables do not change between LLMs
   source                = "./modules/sagemaker_deployment"