From d6e36e517aa030ca64042709eabf111d97f8872f Mon Sep 17 00:00:00 2001 From: rvasahu-amazon <106207732+rvasahu-amazon@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:40:21 -0800 Subject: [PATCH] Add new OTel pipeline for scraping Kueue metrics and publishing to CloudWatch (#1426) Co-authored-by: Rohan Gujarathi Co-authored-by: Steven Tran <86030690+transtv@users.noreply.github.com> --- .github/workflows/PR-build.yml | 1 - go.mod | 48 +- go.sum | 94 +-- internal/containerinsightscommon/const.go | 13 + internal/containerinsightscommon/util.go | 2 +- plugins/processors/kueueattributes/config.go | 19 + .../processors/kueueattributes/config_test.go | 18 + plugins/processors/kueueattributes/factory.go | 56 ++ .../kueueattributes/factory_test.go | 45 ++ .../processors/kueueattributes/processor.go | 106 +++ .../kueueattributes/processor_test.go | 123 ++++ service/defaultcomponents/components.go | 4 + service/defaultcomponents/components_test.go | 2 + .../appsignals_and_eks_config.json | 3 +- .../appsignals_and_k8s_config.json | 3 +- .../appsignals_fallback_and_eks_config.json | 3 +- .../appsignals_over_fallback_config.json | 3 +- .../base_container_insights_config.json | 3 +- .../emf_and_kubernetes_config.json | 3 +- .../emf_and_kubernetes_config.yaml | 2 +- .../emf_and_kubernetes_with_gpu_config.json | 3 +- .../emf_and_kubernetes_with_kueue_config.conf | 27 + .../emf_and_kubernetes_with_kueue_config.json | 21 + .../emf_and_kubernetes_with_kueue_config.yaml | 617 ++++++++++++++++++ .../kubernetes_on_prem_config.json | 3 +- .../kueue_container_insights_config.conf | 27 + .../kueue_container_insights_config.json | 21 + .../kueue_container_insights_config.yaml | 350 ++++++++++ .../logs_and_kubernetes_config.json | 3 +- translator/tocwconfig/tocwconfig_test.go | 26 + translator/translate/otel/common/common.go | 5 + .../awsemf_default_kubernetes_kueue.yaml | 10 + .../translate/otel/exporter/awsemf/kueue.go | 69 ++ .../otel/exporter/awsemf/translator.go | 41 +- .../otel/exporter/awsemf/translator_test.go | 100 +++ .../pipeline/containerinsights/translator.go | 66 +- .../containerinsights/translator_test.go | 53 +- .../pipeline/containerinsights/translators.go | 30 + .../containerinsights/translators_test.go | 106 +++ .../otel/processor/kueue/translator.go | 33 + .../awscontainerinsightskueue/translator.go | 74 +++ .../translator_test.go | 79 +++ translator/translate/otel/translate_otel.go | 3 +- 43 files changed, 2202 insertions(+), 116 deletions(-) create mode 100644 plugins/processors/kueueattributes/config.go create mode 100644 plugins/processors/kueueattributes/config_test.go create mode 100644 plugins/processors/kueueattributes/factory.go create mode 100644 plugins/processors/kueueattributes/factory_test.go create mode 100644 plugins/processors/kueueattributes/processor.go create mode 100644 plugins/processors/kueueattributes/processor_test.go create mode 100644 translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.conf create mode 100644 translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.json create mode 100644 translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.yaml create mode 100644 translator/tocwconfig/sampleConfig/kueue_container_insights_config.conf create mode 100644 translator/tocwconfig/sampleConfig/kueue_container_insights_config.json create mode 100644 translator/tocwconfig/sampleConfig/kueue_container_insights_config.yaml create mode 100644 translator/translate/otel/exporter/awsemf/awsemf_default_kubernetes_kueue.yaml create mode 100644 translator/translate/otel/exporter/awsemf/kueue.go create mode 100644 translator/translate/otel/pipeline/containerinsights/translators.go create mode 100644 translator/translate/otel/pipeline/containerinsights/translators_test.go create mode 100644 translator/translate/otel/processor/kueue/translator.go create mode 100644 translator/translate/otel/receiver/awscontainerinsightskueue/translator.go create mode 100644 translator/translate/otel/receiver/awscontainerinsightskueue/translator_test.go diff --git a/.github/workflows/PR-build.yml b/.github/workflows/PR-build.yml index ec4b07249c..32b1315def 100644 --- a/.github/workflows/PR-build.yml +++ b/.github/workflows/PR-build.yml @@ -136,4 +136,3 @@ jobs: - name: Build if: steps.cached_binaries.outputs.cache-hit != 'true' && needs.changes.outputs.build == 'true' run: make amazon-cloudwatch-agent-${{ matrix.family }} - \ No newline at end of file diff --git a/go.mod b/go.mod index 39ba997845..62a0d39b19 100644 --- a/go.mod +++ b/go.mod @@ -7,40 +7,41 @@ replace github.com/influxdata/telegraf => github.com/aws/telegraf v0.10.2-0.2024 // Replace with https://github.com/amazon-contributing/opentelemetry-collector-contrib, there are no requirements for all receivers/processors/exporters // to be all replaced since there are some changes that will always be from upstream replace ( - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/awscloudwatchlogsexporter => github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awscloudwatchlogsexporter v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/awsemfexporter => github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awsemfexporter v0.0.0-20241024185216-f0b80d5c67a5 - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/awsxrayexporter => github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awsxrayexporter v0.0.0-20241104203805-20919412150d + github.com/open-telemetry/opentelemetry-collector-contrib/exporter/awscloudwatchlogsexporter => github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awscloudwatchlogsexporter v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/exporter/awsemfexporter => github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awsemfexporter v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/exporter/awsxrayexporter => github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awsxrayexporter v0.0.0-20241114192544-cb251fe99cbb ) -replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/awsproxy => github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsproxy v0.0.0-20241104203805-20919412150d +replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/awsproxy => github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsproxy v0.0.0-20241114192544-cb251fe99cbb replace ( - github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/awsutil => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/awsutil v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/containerinsight v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/cwlogs => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/cwlogs v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/k8s => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/k8s v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/proxy => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/proxy v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/xray => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/xray v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/coreinternal v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/internal/k8sconfig => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/k8sconfig v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/internal/kubelet => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/kubelet v0.0.0-20241104203805-20919412150d + github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/awsutil => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/awsutil v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/containerinsight v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/cwlogs => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/cwlogs v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/k8s => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/k8s v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/proxy => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/proxy v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/xray => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/xray v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/coreinternal v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/internal/k8sconfig => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/k8sconfig v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/internal/kubelet => github.com/amazon-contributing/opentelemetry-collector-contrib/internal/kubelet v0.0.0-20241114192544-cb251fe99cbb ) replace ( // For clear resource attributes after copy functionality https://github.com/amazon-contributing/opentelemetry-collector-contrib/pull/148 - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/resourcetotelemetry => github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/resourcetotelemetry v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza => github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/stanza v0.0.0-20241104203805-20919412150d + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/resourcetotelemetry => github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/resourcetotelemetry v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza => github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/stanza v0.0.0-20241114192544-cb251fe99cbb // Replace with contrib to revert upstream change https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/20519 - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus => github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/translator/prometheus v0.0.0-20241104203805-20919412150d + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus => github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/translator/prometheus v0.0.0-20241114192544-cb251fe99cbb ) -replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor => github.com/amazon-contributing/opentelemetry-collector-contrib/processor/resourcedetectionprocessor v0.0.0-20241104203805-20919412150d +replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor => github.com/amazon-contributing/opentelemetry-collector-contrib/processor/resourcedetectionprocessor v0.0.0-20241114192544-cb251fe99cbb replace ( - github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver => github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsxrayreceiver => github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awsxrayreceiver v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/receiver/jmxreceiver => github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/jmxreceiver v0.0.0-20241104203805-20919412150d - github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver => github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/prometheusreceiver v0.0.0-20241104203805-20919412150d + github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver => github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightskueuereceiver => github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awscontainerinsightskueuereceiver v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsxrayreceiver => github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awsxrayreceiver v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/receiver/jmxreceiver => github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/jmxreceiver v0.0.0-20241114192544-cb251fe99cbb + github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver => github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/prometheusreceiver v0.0.0-20241114192544-cb251fe99cbb ) // Temporary fix, pending PR https://github.com/shirou/gopsutil/pull/957 @@ -91,7 +92,7 @@ replace github.com/aws/aws-sdk-go => github.com/aws/aws-sdk-go v1.48.6 require ( github.com/BurntSushi/toml v1.3.2 github.com/Jeffail/gabs v1.4.0 - github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsmiddleware v0.0.0-20241104203805-20919412150d + github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsmiddleware v0.0.0-20241114192544-cb251fe99cbb github.com/aws/aws-sdk-go v1.53.11 github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.30.2 github.com/bigkevmcd/go-configparser v0.0.0-20200217161103-d137835d2579 @@ -143,6 +144,7 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/processor/tailsamplingprocessor v0.103.0 github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor v0.103.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.103.0 + github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightskueuereceiver v0.0.0-20241114192544-cb251fe99cbb github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsecscontainermetricsreceiver v0.103.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsxrayreceiver v0.103.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/filelogreceiver v0.103.0 @@ -230,7 +232,7 @@ require ( github.com/alecthomas/participle v0.4.1 // indirect github.com/alecthomas/participle/v2 v2.1.1 // indirect github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 // indirect - github.com/amazon-contributing/opentelemetry-collector-contrib/override/aws v0.0.0-20241104203805-20919412150d // indirect + github.com/amazon-contributing/opentelemetry-collector-contrib/override/aws v0.0.0-20241114192544-cb251fe99cbb // indirect github.com/antchfx/jsonquery v1.1.5 // indirect github.com/antchfx/xmlquery v1.3.9 // indirect github.com/antchfx/xpath v1.2.0 // indirect diff --git a/go.sum b/go.sum index a13e40e7fc..0434a49d74 100644 --- a/go.sum +++ b/go.sum @@ -180,52 +180,54 @@ github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 h1:ez/4by2iGztzR4 github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= github.com/aliyun/alibaba-cloud-sdk-go v1.61.1483 h1:J8HaD+Zpfi1gcel3HCKpoHHEsrcuRrZlSnx7R9SCf5I= github.com/aliyun/alibaba-cloud-sdk-go v1.61.1483/go.mod h1:RcDobYh8k5VP6TNybz9m++gL3ijVI5wueVr0EM10VsU= -github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awscloudwatchlogsexporter v0.0.0-20241104203805-20919412150d h1:1nlENK4aucOEP+jSMFTgMfJaL8Rl/h6q2LXKiS8kFkM= -github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awscloudwatchlogsexporter v0.0.0-20241104203805-20919412150d/go.mod h1:f9JwM/LQdKH8ZbYoH9TO35rmjM6WoTNlhWtYJ3YZucc= -github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awsemfexporter v0.0.0-20241024185216-f0b80d5c67a5 h1:JOp9f4a1pP1zb5U+JPTezTdTZ1LCkcJSTcCd/AAqknM= -github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awsemfexporter v0.0.0-20241024185216-f0b80d5c67a5/go.mod h1:LPWBVdTSNbZkk80v6aCUthS59cnR4VauVRdAIE3ifaY= -github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awsxrayexporter v0.0.0-20241104203805-20919412150d h1:OaeHJsqrLqlgsIG24uYZvDkZ41nFr9LRhKPtX8f4O+Q= -github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awsxrayexporter v0.0.0-20241104203805-20919412150d/go.mod h1:GNeNylfr5KMt55XowzSdgbP7z8CkIAfIHtWSd+xxtws= -github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsmiddleware v0.0.0-20241104203805-20919412150d h1:eXEVMyh0ESwB4HGuPmDH4DDTlEq8kSJ4o+Hr0tdy+RI= -github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsmiddleware v0.0.0-20241104203805-20919412150d/go.mod h1:/RaNSxxO06niapGT00snMdgFfjjjW/kV3TZGX8kHuwM= -github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsproxy v0.0.0-20241104203805-20919412150d h1:f13PqXJBa78lokYPhoL1b96csNXfroC8nlK7R7zpN7U= -github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsproxy v0.0.0-20241104203805-20919412150d/go.mod h1:hRZt1DsvoLDIYBwjFvjwg/9IkaBXeCPG0QI57wbj98Q= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/awsutil v0.0.0-20241104203805-20919412150d h1:Oz6LLJwcpPD5IYHBzsb0mWZUt0OOgUpDth35qCAF+1Q= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/awsutil v0.0.0-20241104203805-20919412150d/go.mod h1:YL1Y62PxJ7dem1ZBUqCfvbnePaGr5p7DTSyOXSCi6O4= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/containerinsight v0.0.0-20241104203805-20919412150d h1:SDkwRzHzXO7HfmhCCvamdQmPH2fKK7PYMXZKhbrpHCk= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/containerinsight v0.0.0-20241104203805-20919412150d/go.mod h1:LT+qAyMutoADv2qezO+vkm/BkxR88qEfXdF2d13mV+E= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/cwlogs v0.0.0-20241104203805-20919412150d h1:1iitRjrfd7JjmfJbe1iM7XENgzjJ6L2THTKtv6V/qmU= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/cwlogs v0.0.0-20241104203805-20919412150d/go.mod h1:SkscNdWANcuDJ7PkjS5wurSTAuY69nqP0I+cEVY9Ryw= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/k8s v0.0.0-20241104203805-20919412150d h1:JEGyD9HBLCRaw7wWq1H+1oarZ0Aui3a/27V1t0kcnXg= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/k8s v0.0.0-20241104203805-20919412150d/go.mod h1:/TOECDME2jYRPY21CrpTX2eMADJdkmBFBXc1lV/nRZA= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/proxy v0.0.0-20241104203805-20919412150d h1:/ogt2A7O9rOZRCgb0kK4ps0T1b9gCnIiF6XfgaOLuVo= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/proxy v0.0.0-20241104203805-20919412150d/go.mod h1:J08A2gx8VFQfuoBiEfZ6uHIkMtVLd0OuRe5pP88b3I0= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/xray v0.0.0-20241104203805-20919412150d h1:kSwmEzswg2jdB/WpHhj54u4XL2gLRlwC3dBxO4u9WPY= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/xray v0.0.0-20241104203805-20919412150d/go.mod h1:+w647+1nLYvZWdk24gZWvdl/kFowbe2iDyISXLDYdmQ= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/coreinternal v0.0.0-20241104203805-20919412150d h1:1thAqvybJZ8Uvelq1rwUKbvdHG3+tmHXYru2j3PkGaQ= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/coreinternal v0.0.0-20241104203805-20919412150d/go.mod h1:Ai4BsM7C05bEQYO2O272S1LmsyIhO5r0iLmXF5NN5so= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/k8sconfig v0.0.0-20241104203805-20919412150d h1:Gdw6doVzDuXBheQS2pdA2b8nEEyUCBIadcHRwPceDB4= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/k8sconfig v0.0.0-20241104203805-20919412150d/go.mod h1:VS66oUydCMwiWl1BFmLs7iNy4lGsfVYsriXr/d1fpAk= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/kubelet v0.0.0-20241104203805-20919412150d h1:IeExpZfRsSLKsRW7ee7IriYHW2k+to26bQ7NxTvYygw= -github.com/amazon-contributing/opentelemetry-collector-contrib/internal/kubelet v0.0.0-20241104203805-20919412150d/go.mod h1:4qvmHiXPOkOXJdpmmxMqprb2BXxOGPgOG45BwLdipUM= -github.com/amazon-contributing/opentelemetry-collector-contrib/override/aws v0.0.0-20241104203805-20919412150d h1:JM0ZVUKL4vNXQcGOpB3PALHrpe9Uq7YrD3zyOdDXleQ= -github.com/amazon-contributing/opentelemetry-collector-contrib/override/aws v0.0.0-20241104203805-20919412150d/go.mod h1:t/hYoRTnlPuRjh8y0BwVGgNvNIXpU2QJME5YVppUUHQ= -github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/resourcetotelemetry v0.0.0-20241104203805-20919412150d h1:IyQzj0JRHEU00FfiwE/mMX6CepK4I8Dhz3XYYetRaaM= -github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/resourcetotelemetry v0.0.0-20241104203805-20919412150d/go.mod h1:Rr5b3hr6Jy9w/zTjsOl3vcyDDusc90P+iGdOd0UCYOo= -github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/stanza v0.0.0-20241104203805-20919412150d h1:l+U6eNiVjhUjsCXD9fFtTssXjRgO2oboalpcCtz2FdM= -github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/stanza v0.0.0-20241104203805-20919412150d/go.mod h1:2NSghK+mafMGxM8c4Gff8qcprdMD3YQebZtD9UAdB3E= -github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/translator/prometheus v0.0.0-20241104203805-20919412150d h1:5310F/G6U5YcjKots1ovIJzf+4+NMJ/2UAHfVN2JAVE= -github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/translator/prometheus v0.0.0-20241104203805-20919412150d/go.mod h1:21nuEQl7YYeLkVrGGvxPXkljqjR40teBCG5trGZ5LxM= -github.com/amazon-contributing/opentelemetry-collector-contrib/processor/resourcedetectionprocessor v0.0.0-20241104203805-20919412150d h1:2LmndCpP8KMMn7l9zyI5zeiu6OD18vIBMcrGuFRHdfs= -github.com/amazon-contributing/opentelemetry-collector-contrib/processor/resourcedetectionprocessor v0.0.0-20241104203805-20919412150d/go.mod h1:uzpU7Y6+oL6RdOv8IWi6fjT8LNV6FYX6CN6NATLJOiQ= -github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.0.0-20241104203805-20919412150d h1:LvHBMj+2Kh+SAESWTVoLbJ8bD4Xq3toB4SU66hYjg0M= -github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.0.0-20241104203805-20919412150d/go.mod h1:StgsMi0cNUydO2N/7WbLYPUBXzvp9wIMcWp9P8x/Vck= -github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awsxrayreceiver v0.0.0-20241104203805-20919412150d h1:eQsOVRzXk32sVDkwJALfN0A4kZEMC2NNG8jBjZAZ7UM= -github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awsxrayreceiver v0.0.0-20241104203805-20919412150d/go.mod h1:igQaQJt7eA/y3dZ2VLXVql+6k/ZXBgrAa2y9FrMMIKQ= -github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/jmxreceiver v0.0.0-20241104203805-20919412150d h1:Cld9lc7zzU/EV70Lv8EXylsx4ATjz9jHfsObEYfjyzQ= -github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/jmxreceiver v0.0.0-20241104203805-20919412150d/go.mod h1:hRUrYatVP/GFNxHn2yW1gJcnPyGtdlTXyebpzzzjZeU= -github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/prometheusreceiver v0.0.0-20241104203805-20919412150d h1:awQLc9eG7SbW5vZCt1lxyRc+ZwTpjXwsH/AhtPF51uE= -github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/prometheusreceiver v0.0.0-20241104203805-20919412150d/go.mod h1:TTwtk1IMiqmyGm66w9aIq/TInicOm2Y6DwcJzRIpv1U= +github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awscloudwatchlogsexporter v0.0.0-20241114192544-cb251fe99cbb h1:UePw4/QUtalit5jG7K301PPxtq/tRgYeXiPLaBgA71I= +github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awscloudwatchlogsexporter v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:f9JwM/LQdKH8ZbYoH9TO35rmjM6WoTNlhWtYJ3YZucc= +github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awsemfexporter v0.0.0-20241114192544-cb251fe99cbb h1:JUMGOjiVxW7uKAqqCYYP/S6Jv2TRDlFno3KHeMDva2c= +github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awsemfexporter v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:LPWBVdTSNbZkk80v6aCUthS59cnR4VauVRdAIE3ifaY= +github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awsxrayexporter v0.0.0-20241114192544-cb251fe99cbb h1:ViZMK5wtqDwNkKIMW/HxtogaKouIn5ZMjuobz+5ui9g= +github.com/amazon-contributing/opentelemetry-collector-contrib/exporter/awsxrayexporter v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:GNeNylfr5KMt55XowzSdgbP7z8CkIAfIHtWSd+xxtws= +github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsmiddleware v0.0.0-20241114192544-cb251fe99cbb h1:NVIpIv4JrYgZ/rowSOqHaNpwGhs9cAbU1T7MNqF6QPw= +github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsmiddleware v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:/RaNSxxO06niapGT00snMdgFfjjjW/kV3TZGX8kHuwM= +github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsproxy v0.0.0-20241114192544-cb251fe99cbb h1:Gk9EIxKayGfo1WV20/1elx8vRlO4YgXmM2JRgnB497I= +github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsproxy v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:hRZt1DsvoLDIYBwjFvjwg/9IkaBXeCPG0QI57wbj98Q= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/awsutil v0.0.0-20241114192544-cb251fe99cbb h1:nyR+UQJeHa4qxaLujo7bEBLb5aeTYbgVXf3AYuZegcI= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/awsutil v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:YL1Y62PxJ7dem1ZBUqCfvbnePaGr5p7DTSyOXSCi6O4= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/containerinsight v0.0.0-20241114192544-cb251fe99cbb h1:xbEI9BrUR5CPVAmjG5+XoAisV+FQQQzs28lgXHli8dY= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/containerinsight v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:LT+qAyMutoADv2qezO+vkm/BkxR88qEfXdF2d13mV+E= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/cwlogs v0.0.0-20241114192544-cb251fe99cbb h1:LtkELbLo6ini93/ww8QOFcpSbjeI3QloupqSYYl7M/I= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/cwlogs v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:SkscNdWANcuDJ7PkjS5wurSTAuY69nqP0I+cEVY9Ryw= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/k8s v0.0.0-20241114192544-cb251fe99cbb h1:61ZIUSIemzpU9fSPfcF2x9MX6z/bRnML2iiOo2TKj64= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/k8s v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:/TOECDME2jYRPY21CrpTX2eMADJdkmBFBXc1lV/nRZA= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/proxy v0.0.0-20241114192544-cb251fe99cbb h1:c5BAAF/nC9A9+7VcXoujR/5MPiPPMu85hJRTq1W1Ckg= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/proxy v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:J08A2gx8VFQfuoBiEfZ6uHIkMtVLd0OuRe5pP88b3I0= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/xray v0.0.0-20241114192544-cb251fe99cbb h1:FhnG36f47Xr6Qt4/zCd4yT/rwAswizH7dQ5tVHeFVP8= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/aws/xray v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:+w647+1nLYvZWdk24gZWvdl/kFowbe2iDyISXLDYdmQ= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/coreinternal v0.0.0-20241114192544-cb251fe99cbb h1:FhAGXTeK6GBDrdwYZ8xlgMVC7yWdnf3wU2wEu3LAgsM= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/coreinternal v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:Ai4BsM7C05bEQYO2O272S1LmsyIhO5r0iLmXF5NN5so= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/k8sconfig v0.0.0-20241114192544-cb251fe99cbb h1:s7JP9zgrIzwuN0X7dD5qbmQYkl+hLwnDb6spXnc9H+8= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/k8sconfig v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:VS66oUydCMwiWl1BFmLs7iNy4lGsfVYsriXr/d1fpAk= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/kubelet v0.0.0-20241114192544-cb251fe99cbb h1:YUf64mMnt76wV2zAfR97/h4trbrecyoXabN69ioGQZY= +github.com/amazon-contributing/opentelemetry-collector-contrib/internal/kubelet v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:4qvmHiXPOkOXJdpmmxMqprb2BXxOGPgOG45BwLdipUM= +github.com/amazon-contributing/opentelemetry-collector-contrib/override/aws v0.0.0-20241114192544-cb251fe99cbb h1:D0Pj3U4Tu8aOuVSYVS+uwKMfQ+V1tiMEgSYJa+3VklM= +github.com/amazon-contributing/opentelemetry-collector-contrib/override/aws v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:t/hYoRTnlPuRjh8y0BwVGgNvNIXpU2QJME5YVppUUHQ= +github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/resourcetotelemetry v0.0.0-20241114192544-cb251fe99cbb h1:yP5shDdkxuoYPX06Ae1W9WibvLc0cUXtsYD4504Dk7U= +github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/resourcetotelemetry v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:Rr5b3hr6Jy9w/zTjsOl3vcyDDusc90P+iGdOd0UCYOo= +github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/stanza v0.0.0-20241114192544-cb251fe99cbb h1:2MuvnSSZM/oO1jaxe100FvI5lYjARtKLII6NVkkb1Rw= +github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/stanza v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:2NSghK+mafMGxM8c4Gff8qcprdMD3YQebZtD9UAdB3E= +github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/translator/prometheus v0.0.0-20241114192544-cb251fe99cbb h1:U7GKcASLD5rwnjxhU/eAsHJziB2ay9ILcG3fcoiRtkQ= +github.com/amazon-contributing/opentelemetry-collector-contrib/pkg/translator/prometheus v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:21nuEQl7YYeLkVrGGvxPXkljqjR40teBCG5trGZ5LxM= +github.com/amazon-contributing/opentelemetry-collector-contrib/processor/resourcedetectionprocessor v0.0.0-20241114192544-cb251fe99cbb h1:yKACQrKRT2ug6LEKuJWG471UgGPQat5OFONjruCr+JI= +github.com/amazon-contributing/opentelemetry-collector-contrib/processor/resourcedetectionprocessor v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:uzpU7Y6+oL6RdOv8IWi6fjT8LNV6FYX6CN6NATLJOiQ= +github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.0.0-20241114192544-cb251fe99cbb h1:Tm0tYYa7SHmOGlyjUOSAGaR8/MnIZUlWiFmPup4N7t0= +github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:StgsMi0cNUydO2N/7WbLYPUBXzvp9wIMcWp9P8x/Vck= +github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awscontainerinsightskueuereceiver v0.0.0-20241114192544-cb251fe99cbb h1:im6grEA5ApFxNWSDiOLk5GZvFq3/xc6awZME10zP3Jc= +github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awscontainerinsightskueuereceiver v0.0.0-20241114192544-cb251fe99cbb/go.mod h1://9Xy+KG8K9KvujBh6sZXIYPDvbu8xsiU1l8StFHjMA= +github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awsxrayreceiver v0.0.0-20241114192544-cb251fe99cbb h1:Nlebx6xeywLO195gSd2E96jiPTydTfJbkr0+jvYjwn0= +github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/awsxrayreceiver v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:igQaQJt7eA/y3dZ2VLXVql+6k/ZXBgrAa2y9FrMMIKQ= +github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/jmxreceiver v0.0.0-20241114192544-cb251fe99cbb h1:rgCu3nHj3jh26HjIMUu03+VQxAU+4FIdhPXn0KEhqzM= +github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/jmxreceiver v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:hRUrYatVP/GFNxHn2yW1gJcnPyGtdlTXyebpzzzjZeU= +github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/prometheusreceiver v0.0.0-20241114192544-cb251fe99cbb h1:pnXdRMiah9knvHazdQKUvyCbUmPxixfRpPtKarnKX6U= +github.com/amazon-contributing/opentelemetry-collector-contrib/receiver/prometheusreceiver v0.0.0-20241114192544-cb251fe99cbb/go.mod h1:TTwtk1IMiqmyGm66w9aIq/TInicOm2Y6DwcJzRIpv1U= github.com/amir/raidman v0.0.0-20170415203553-1ccc43bfb9c9 h1:FXrPTd8Rdlc94dKccl7KPmdmIbVh/OjelJ8/vgMRzcQ= github.com/amir/raidman v0.0.0-20170415203553-1ccc43bfb9c9/go.mod h1:eliMa/PW+RDr2QLWRmLH1R1ZA4RInpmvOzDDXtaIZkc= github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= diff --git a/internal/containerinsightscommon/const.go b/internal/containerinsightscommon/const.go index 6a0a44706f..d957f3152f 100644 --- a/internal/containerinsightscommon/const.go +++ b/internal/containerinsightscommon/const.go @@ -18,6 +18,12 @@ const ( SourcesKey = "Sources" GpuDeviceKey = "GpuDevice" + ClusterQueueNameKey = "ClusterQueue" + ClusterQueueStatusKey = "Status" + ClusterQueueReasonKey = "Reason" + ClusterQueueResourceKey = "Resource" + Flavor = "Flavor" + // metric collected CpuTotal = "cpu_usage_total" CpuUser = "cpu_usage_user" @@ -102,10 +108,17 @@ const ( NeuronHardware = "neuron_hardware" NeuronExecutionLatency = "neuron_execution_latency" + KueuePendingWorkloads = "kueue_pending_workloads" + KueueEvictedWorkloadsTotal = "kueue_evicted_workloads_total" + KueueAdmittedActiveWorkloads = "kueue_admitted_active_workloads" + KueueClusterQueueResourceUsage = "kueue_cluster_queue_resource_usage" + KueueClusterQueueNominalUsage = "kueue_cluster_queue_nominal_quota" + TypeCluster = "Cluster" TypeClusterService = "ClusterService" TypeClusterNamespace = "ClusterNamespace" TypeService = "Service" + TypeClusterQueue = "ClusterQueue" // Both TypeInstance and TypeNode mean EC2 Instance, they are used in ECS and EKS separately TypeInstance = "Instance" diff --git a/internal/containerinsightscommon/util.go b/internal/containerinsightscommon/util.go index 352bbcb0a8..7e7d65b159 100644 --- a/internal/containerinsightscommon/util.go +++ b/internal/containerinsightscommon/util.go @@ -50,7 +50,7 @@ func MetricName(mType string, name string) string { prefix = containerPrefix case TypeService: prefix = service - case TypeCluster, TypeGpuCluster: + case TypeCluster, TypeGpuCluster, TypeClusterQueue: prefix = cluster case K8sNamespace: prefix = namespace diff --git a/plugins/processors/kueueattributes/config.go b/plugins/processors/kueueattributes/config.go new file mode 100644 index 0000000000..b2c04331b7 --- /dev/null +++ b/plugins/processors/kueueattributes/config.go @@ -0,0 +1,19 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package kueueattributes + +import ( + "go.opentelemetry.io/collector/component" +) + +type Config struct{} + +// Verify Config implements Processor interface. +var _ component.Config = (*Config)(nil) + +// Validate does not check for unsupported dimension key-value pairs, because those +// get silently dropped and ignored during translation. +func (cfg *Config) Validate() error { + return nil +} diff --git a/plugins/processors/kueueattributes/config_test.go b/plugins/processors/kueueattributes/config_test.go new file mode 100644 index 0000000000..5be7a7c8f2 --- /dev/null +++ b/plugins/processors/kueueattributes/config_test.go @@ -0,0 +1,18 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package kueueattributes + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/collector/confmap" +) + +func TestUnmarshalDefaultConfig(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + assert.NoError(t, confmap.New().Unmarshal(cfg)) + assert.Equal(t, factory.CreateDefaultConfig(), cfg) +} diff --git a/plugins/processors/kueueattributes/factory.go b/plugins/processors/kueueattributes/factory.go new file mode 100644 index 0000000000..511b525e28 --- /dev/null +++ b/plugins/processors/kueueattributes/factory.go @@ -0,0 +1,56 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package kueueattributes + +import ( + "context" + "fmt" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/consumer" + "go.opentelemetry.io/collector/processor" + "go.opentelemetry.io/collector/processor/processorhelper" +) + +const ( + stability = component.StabilityLevelBeta +) + +var ( + TypeStr, _ = component.NewType("kueueattributes") + processorCapabilities = consumer.Capabilities{MutatesData: true} +) + +func NewFactory() processor.Factory { + return processor.NewFactory( + TypeStr, + createDefaultConfig, + processor.WithMetrics(createMetricsProcessor, stability)) +} + +func createDefaultConfig() component.Config { + return &Config{} +} + +func createMetricsProcessor( + ctx context.Context, + set processor.CreateSettings, + cfg component.Config, + nextConsumer consumer.Metrics, +) (processor.Metrics, error) { + processorConfig, ok := cfg.(*Config) + if !ok { + return nil, fmt.Errorf("configuration parsing error") + } + + metricsProcessor := newKueueAttributesProcessor(processorConfig, set.Logger) + + return processorhelper.NewMetricsProcessor( + ctx, + set, + cfg, + nextConsumer, + metricsProcessor.processMetrics, + processorhelper.WithCapabilities(processorCapabilities)) +} diff --git a/plugins/processors/kueueattributes/factory_test.go b/plugins/processors/kueueattributes/factory_test.go new file mode 100644 index 0000000000..bc636e35f1 --- /dev/null +++ b/plugins/processors/kueueattributes/factory_test.go @@ -0,0 +1,45 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package kueueattributes + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/consumer/consumertest" + "go.opentelemetry.io/collector/processor/processortest" +) + +func TestCreateDefaultConfig(t *testing.T) { + factory := NewFactory() + require.NotNil(t, factory) + + cfg := factory.CreateDefaultConfig() + assert.NotNil(t, cfg, "failed to create default config") + assert.NoError(t, componenttest.CheckConfigStruct(cfg)) +} + +func TestCreateProcessor(t *testing.T) { + factory := NewFactory() + require.NotNil(t, factory) + + cfg := factory.CreateDefaultConfig() + setting := processortest.NewNopCreateSettings() + + tProcessor, err := factory.CreateTracesProcessor(context.Background(), setting, cfg, consumertest.NewNop()) + assert.Equal(t, err, component.ErrDataTypeIsNotSupported) + assert.Nil(t, tProcessor) + + mProcessor, err := factory.CreateMetricsProcessor(context.Background(), setting, cfg, consumertest.NewNop()) + assert.NoError(t, err) + assert.NotNil(t, mProcessor) + + lProcessor, err := factory.CreateLogsProcessor(context.Background(), setting, cfg, consumertest.NewNop()) + assert.Equal(t, err, component.ErrDataTypeIsNotSupported) + assert.Nil(t, lProcessor) +} diff --git a/plugins/processors/kueueattributes/processor.go b/plugins/processors/kueueattributes/processor.go new file mode 100644 index 0000000000..8769f797ff --- /dev/null +++ b/plugins/processors/kueueattributes/processor.go @@ -0,0 +1,106 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package kueueattributes + +import ( + "context" + "strings" + + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.uber.org/zap" + + "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" +) + +const ( + kueueMetricsIdentifier = "kueue" +) + +var kueueLabelFilter = map[string]interface{}{ + containerinsightscommon.ClusterNameKey: nil, + containerinsightscommon.ClusterQueueNameKey: nil, + containerinsightscommon.ClusterQueueStatusKey: nil, + containerinsightscommon.ClusterQueueReasonKey: nil, + containerinsightscommon.ClusterQueueResourceKey: nil, + containerinsightscommon.Flavor: nil, + containerinsightscommon.NodeNameKey: nil, +} + +type kueueAttributesProcessor struct { + *Config + logger *zap.Logger + labelFilter map[string]interface{} +} + +func newKueueAttributesProcessor(config *Config, logger *zap.Logger) *kueueAttributesProcessor { + d := &kueueAttributesProcessor{ + Config: config, + logger: logger, + labelFilter: kueueLabelFilter, + } + return d +} + +func (d *kueueAttributesProcessor) processMetrics(_ context.Context, md pmetric.Metrics) (pmetric.Metrics, error) { + rms := md.ResourceMetrics() + for i := 0; i < rms.Len(); i++ { + rm := rms.At(i) + sms := rm.ScopeMetrics() + for j := 0; j < sms.Len(); j++ { + metrics := sms.At(j).Metrics() + for k := 0; k < metrics.Len(); k++ { + m := metrics.At(k) + d.processMetricAttributes(m) + } + } + d.dropResourceMetricAttributes(rm) + } + return md, nil +} + +func (d *kueueAttributesProcessor) processMetricAttributes(m pmetric.Metric) { + // only decorate kueue metrics + if !strings.HasPrefix(m.Name(), kueueMetricsIdentifier) { + return + } + + var dps pmetric.NumberDataPointSlice + switch m.Type() { + case pmetric.MetricTypeGauge: + dps = m.Gauge().DataPoints() + case pmetric.MetricTypeSum: + dps = m.Sum().DataPoints() + default: + d.logger.Debug("Ignore unknown metric type", zap.String(containerinsightscommon.MetricType, m.Type().String())) + } + + for i := 0; i < dps.Len(); i++ { + d.filterAttributes(dps.At(i).Attributes()) + } +} + +func (d *kueueAttributesProcessor) filterAttributes(attributes pcommon.Map) { + labels := d.labelFilter + if len(labels) == 0 { + return + } + // remove labels that are not in the keep list + attributes.RemoveIf(func(k string, _ pcommon.Value) bool { + if _, ok := labels[k]; ok { + return false + } + return true + }) +} + +func (d *kueueAttributesProcessor) dropResourceMetricAttributes(resourceMetric pmetric.ResourceMetrics) { + serviceNameKey := "service.name" + attributes := resourceMetric.Resource().Attributes() + serviceName, exists := attributes.Get(serviceNameKey) + + if exists && (serviceName.Str() == "containerInsightsKueueMetricsScraper") { + resourceMetric.Resource().Attributes().Clear() + } +} diff --git a/plugins/processors/kueueattributes/processor_test.go b/plugins/processors/kueueattributes/processor_test.go new file mode 100644 index 0000000000..4d0676664f --- /dev/null +++ b/plugins/processors/kueueattributes/processor_test.go @@ -0,0 +1,123 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package kueueattributes + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.uber.org/zap" +) + +func TestProcessMetricsForKueueMetrics(t *testing.T) { + logger, _ := zap.NewDevelopment() + kp := newKueueAttributesProcessor(createDefaultConfig().(*Config), logger) + ctx := context.Background() + + testcases := map[string]struct { + resource string + metrics pmetric.Metrics + wantMetriccnt int + want []map[string]string + }{ + "nonKueue": { + metrics: generateKueueMetrics("someOthermetric", []map[string]string{ + { + "ClusterName": "cluster", + }, + }), + wantMetriccnt: 1, + want: []map[string]string{ + { + "ClusterName": "cluster", + }, + }, + }, + "KeepAll": { + metrics: generateKueueMetrics("kueue_pending_workloads", []map[string]string{ + { + "ClusterName": "cluster", + "ClusterQueue": "production", + "Status": "active", + }, + { + "ClusterName": "cluster", + "ClusterQueue": "development", + "Status": "inadmissible", + "NodeName": "kubernetes-kueue", + }, + }), + wantMetriccnt: 1, + want: []map[string]string{ + { + "ClusterName": "cluster", + "ClusterQueue": "production", + "Status": "active", + }, + { + "ClusterName": "cluster", + "ClusterQueue": "development", + "Status": "inadmissible", + "NodeName": "kubernetes-kueue", + }, + }, + }, + "dropLabel": { + metrics: generateKueueMetrics("kueue_pending_workloads", []map[string]string{ + { + "ClusterName": "cluster", + "ClusterQueue": "production", + "Status": "active", + "Pod": "somepod", + }, + }), + wantMetriccnt: 1, + want: []map[string]string{ + { + "ClusterName": "cluster", + "ClusterQueue": "production", + "Status": "active", + }, + }, + }, + } + + for tname, tc := range testcases { + fmt.Printf("running %s\n", tname) + ms, _ := kp.processMetrics(ctx, tc.metrics) + assert.Equal(t, tc.wantMetriccnt, ms.MetricCount()) + if tc.wantMetriccnt > 0 { + dps := ms.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(0).Gauge().DataPoints() + assert.Equal(t, len(tc.want), dps.Len()) + for i, dim := range tc.want { + attrs := dps.At(i).Attributes() + assert.Equal(t, len(dim), attrs.Len()) + for k, v := range dim { + got, ok := attrs.Get(k) + assert.True(t, ok) + assert.Equal(t, v, got.Str()) + } + } + } + } + +} + +func generateKueueMetrics(metricName string, dimensions []map[string]string) pmetric.Metrics { + md := pmetric.NewMetrics() + ms := md.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics().AppendEmpty() + ms.SetName(metricName) + dps := ms.SetEmptyGauge().DataPoints() + for _, dim := range dimensions { + dp := dps.AppendEmpty() + dp.SetIntValue(10) + for k, v := range dim { + dp.Attributes().PutStr(k, v) + } + } + return md +} diff --git a/service/defaultcomponents/components.go b/service/defaultcomponents/components.go index 9fc9661522..64725b01ed 100644 --- a/service/defaultcomponents/components.go +++ b/service/defaultcomponents/components.go @@ -29,6 +29,7 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/processor/tailsamplingprocessor" "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver" + "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightskueuereceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsecscontainermetricsreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsxrayreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/filelogreceiver" @@ -62,6 +63,7 @@ import ( "github.com/aws/amazon-cloudwatch-agent/plugins/processors/awsentity" "github.com/aws/amazon-cloudwatch-agent/plugins/processors/ec2tagger" "github.com/aws/amazon-cloudwatch-agent/plugins/processors/gpuattributes" + "github.com/aws/amazon-cloudwatch-agent/plugins/processors/kueueattributes" "github.com/aws/amazon-cloudwatch-agent/processor/rollupprocessor" ) @@ -71,6 +73,7 @@ func Factories() (otelcol.Factories, error) { if factories.Receivers, err = receiver.MakeFactoryMap( awscontainerinsightreceiver.NewFactory(), + awscontainerinsightskueuereceiver.NewFactory(), awsecscontainermetricsreceiver.NewFactory(), awsxrayreceiver.NewFactory(), filelogreceiver.NewFactory(), @@ -98,6 +101,7 @@ func Factories() (otelcol.Factories, error) { ec2tagger.NewFactory(), filterprocessor.NewFactory(), gpuattributes.NewFactory(), + kueueattributes.NewFactory(), groupbytraceprocessor.NewFactory(), k8sattributesprocessor.NewFactory(), memorylimiterprocessor.NewFactory(), diff --git a/service/defaultcomponents/components_test.go b/service/defaultcomponents/components_test.go index e912749dfa..9ef627aff3 100644 --- a/service/defaultcomponents/components_test.go +++ b/service/defaultcomponents/components_test.go @@ -18,6 +18,7 @@ func TestComponents(t *testing.T) { assert.NoError(t, err) wantReceivers := []string{ "awscontainerinsightreceiver", + "awscontainerinsightskueuereceiver", "awsecscontainermetrics", "awsxray", "filelog", @@ -49,6 +50,7 @@ func TestComponents(t *testing.T) { "experimental_metricsgeneration", "filter", "gpuattributes", + "kueueattributes", "groupbytrace", "k8sattributes", "memory_limiter", diff --git a/translator/tocwconfig/sampleConfig/appsignals_and_eks_config.json b/translator/tocwconfig/sampleConfig/appsignals_and_eks_config.json index 0c44e570b4..c6d415b3c1 100644 --- a/translator/tocwconfig/sampleConfig/appsignals_and_eks_config.json +++ b/translator/tocwconfig/sampleConfig/appsignals_and_eks_config.json @@ -20,7 +20,8 @@ "metrics_collection_interval": 30, "disable_metric_extraction": true, "enhanced_container_insights": false, - "accelerated_compute_metrics": false + "accelerated_compute_metrics": false, + "kueue_container_insights": false } }, "force_flush_interval": 5, diff --git a/translator/tocwconfig/sampleConfig/appsignals_and_k8s_config.json b/translator/tocwconfig/sampleConfig/appsignals_and_k8s_config.json index 4ca852e3d4..7f823a760d 100644 --- a/translator/tocwconfig/sampleConfig/appsignals_and_k8s_config.json +++ b/translator/tocwconfig/sampleConfig/appsignals_and_k8s_config.json @@ -16,7 +16,8 @@ "metrics_collection_interval": 30, "disable_metric_extraction": true, "enhanced_container_insights": false, - "accelerated_compute_metrics": false + "accelerated_compute_metrics": false, + "kueue_container_insights": false } }, "force_flush_interval": 5, diff --git a/translator/tocwconfig/sampleConfig/appsignals_fallback_and_eks_config.json b/translator/tocwconfig/sampleConfig/appsignals_fallback_and_eks_config.json index 140b2d83ab..a51cb35bc3 100644 --- a/translator/tocwconfig/sampleConfig/appsignals_fallback_and_eks_config.json +++ b/translator/tocwconfig/sampleConfig/appsignals_fallback_and_eks_config.json @@ -20,7 +20,8 @@ "metrics_collection_interval": 30, "disable_metric_extraction": true, "enhanced_container_insights": false, - "accelerated_compute_metrics": false + "accelerated_compute_metrics": false, + "kueue_container_insights": false } }, "force_flush_interval": 5, diff --git a/translator/tocwconfig/sampleConfig/appsignals_over_fallback_config.json b/translator/tocwconfig/sampleConfig/appsignals_over_fallback_config.json index 371a6c9957..83e9d88019 100644 --- a/translator/tocwconfig/sampleConfig/appsignals_over_fallback_config.json +++ b/translator/tocwconfig/sampleConfig/appsignals_over_fallback_config.json @@ -31,7 +31,8 @@ "metrics_collection_interval": 30, "disable_metric_extraction": true, "enhanced_container_insights": false, - "accelerated_compute_metrics": false + "accelerated_compute_metrics": false, + "kueue_container_insights": false } }, "force_flush_interval": 5, diff --git a/translator/tocwconfig/sampleConfig/base_container_insights_config.json b/translator/tocwconfig/sampleConfig/base_container_insights_config.json index 510cb41463..45787f069a 100644 --- a/translator/tocwconfig/sampleConfig/base_container_insights_config.json +++ b/translator/tocwconfig/sampleConfig/base_container_insights_config.json @@ -11,7 +11,8 @@ "metrics_collection_interval": 30, "disable_metric_extraction": true, "prefer_full_pod_name": true, - "accelerated_compute_metrics": false + "accelerated_compute_metrics": false, + "kueue_container_insights": false } }, "force_flush_interval": 5, diff --git a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_config.json b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_config.json index 5581444869..68aa347ee7 100644 --- a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_config.json +++ b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_config.json @@ -11,7 +11,8 @@ "metrics_collection_interval": 30, "disable_metric_extraction": true, "enhanced_container_insights": true, - "accelerated_compute_metrics": false + "accelerated_compute_metrics": false, + "kueue_container_insights": false } }, "force_flush_interval": 5, diff --git a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_config.yaml b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_config.yaml index 18e4401d58..f2e17099c6 100644 --- a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_config.yaml +++ b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_config.yaml @@ -134,9 +134,9 @@ exporters: - pod_memory_request - pod_memory_limit - pod_cpu_limit + - pod_cpu_request - pod_cpu_usage_total - pod_memory_working_set - - pod_cpu_request - pod_container_status_running - pod_container_status_terminated - pod_container_status_waiting diff --git a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_gpu_config.json b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_gpu_config.json index 8e1ffdbbf9..b73013d933 100644 --- a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_gpu_config.json +++ b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_gpu_config.json @@ -10,7 +10,8 @@ "cluster_name": "TestCluster", "metrics_collection_interval": 30, "disable_metric_extraction": true, - "enhanced_container_insights": true + "enhanced_container_insights": true, + "kueue_container_insights": false } }, "force_flush_interval": 5, diff --git a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.conf b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.conf new file mode 100644 index 0000000000..007bb60efb --- /dev/null +++ b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.conf @@ -0,0 +1,27 @@ +[agent] + collection_jitter = "0s" + debug = false + flush_interval = "1s" + flush_jitter = "0s" + hostname = "host_name_from_env" + interval = "60s" + logfile = "" + logtarget = "lumberjack" + metric_batch_size = 1000 + metric_buffer_limit = 10000 + omit_hostname = false + precision = "" + quiet = false + round_interval = false + +[inputs] + +[outputs] + + [[outputs.cloudwatchlogs]] + endpoint_override = "https://fake_endpoint" + force_flush_interval = "5s" + log_stream_name = "host_name_from_env" + region = "us-east-1" + +[processors] diff --git a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.json b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.json new file mode 100644 index 0000000000..7a9db60ddd --- /dev/null +++ b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.json @@ -0,0 +1,21 @@ +{ + "agent": { + "region": "us-east-1" + }, + "logs": { + "metrics_collected": { + "emf": { + }, + "kubernetes": { + "cluster_name": "TestCluster", + "metrics_collection_interval": 30, + "disable_metric_extraction": true, + "enhanced_container_insights": true, + "accelerated_compute_metrics": false, + "kueue_container_insights": true + } + }, + "force_flush_interval": 5, + "endpoint_override":"https://fake_endpoint" + } +} diff --git a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.yaml b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.yaml new file mode 100644 index 0000000000..36d9202ca7 --- /dev/null +++ b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.yaml @@ -0,0 +1,617 @@ +exporters: + awscloudwatchlogs/emf_logs: + certificate_file_path: "" + emf_only: true + endpoint: https://fake_endpoint + imds_retries: 2 + local_mode: true + log_group_name: emf/logs/default + log_retention: 0 + log_stream_name: host_name_from_env + max_retries: 2 + middleware: agenthealth/logs + no_verify_ssl: false + num_workers: 8 + profile: default + proxy_address: "" + raw_log: true + region: us-east-1 + request_timeout_seconds: 30 + resource_arn: "" + retry_on_failure: + enabled: true + initial_interval: 5s + max_elapsed_time: 5m0s + max_interval: 30s + multiplier: 1.5 + randomization_factor: 0.5 + role_arn: "" + sending_queue: + enabled: true + num_consumers: 1 + queue_size: 1000 + shared_credentials_file: + - /root/.aws/credentials + awsemf/containerinsights: + certificate_file_path: "" + detailed_metrics: false + dimension_rollup_option: NoDimensionRollup + disable_metric_extraction: true + eks_fargate_container_insights_enabled: false + endpoint: https://fake_endpoint + enhanced_container_insights: true + imds_retries: 2 + local_mode: true + log_group_name: /aws/containerinsights/{ClusterName}/performance + log_retention: 0 + log_stream_name: '{NodeName}' + max_retries: 2 + metric_declarations: + - dimensions: + - - ClusterName + - - ClusterName + - ContainerName + - FullPodName + - Namespace + - PodName + - - ClusterName + - ContainerName + - Namespace + - PodName + metric_name_selectors: + - container_cpu_utilization + - container_cpu_utilization_over_container_limit + - container_cpu_limit + - container_cpu_request + - container_memory_utilization + - container_memory_utilization_over_container_limit + - container_memory_failures_total + - container_memory_limit + - container_memory_request + - container_filesystem_usage + - container_filesystem_available + - container_filesystem_utilization + - dimensions: + - - ClusterName + - Namespace + - PodName + - - ClusterName + - - ClusterName + - Namespace + - Service + - - ClusterName + - Namespace + - - ClusterName + - FullPodName + - Namespace + - PodName + metric_name_selectors: + - pod_cpu_utilization + - pod_memory_utilization + - pod_network_rx_bytes + - pod_network_tx_bytes + - pod_cpu_utilization_over_pod_limit + - pod_memory_utilization_over_pod_limit + - dimensions: + - - ClusterName + - FullPodName + - Namespace + - PodName + - - ClusterName + - Namespace + - PodName + - - ClusterName + - Namespace + - - ClusterName + metric_name_selectors: + - pod_interface_network_rx_dropped + - pod_interface_network_tx_dropped + - dimensions: + - - ClusterName + - Namespace + - PodName + - - ClusterName + - - ClusterName + - FullPodName + - Namespace + - PodName + - - ClusterName + - Namespace + - Service + metric_name_selectors: + - pod_cpu_reserved_capacity + - pod_memory_reserved_capacity + - pod_number_of_container_restarts + - pod_number_of_containers + - pod_number_of_running_containers + - pod_status_ready + - pod_status_scheduled + - pod_status_running + - pod_status_pending + - pod_status_failed + - pod_status_unknown + - pod_status_succeeded + - pod_memory_request + - pod_memory_limit + - pod_cpu_limit + - pod_cpu_request + - pod_cpu_usage_total + - pod_memory_working_set + - pod_container_status_running + - pod_container_status_terminated + - pod_container_status_waiting + - pod_container_status_waiting_reason_crash_loop_back_off + - pod_container_status_waiting_reason_image_pull_error + - pod_container_status_waiting_reason_start_error + - pod_container_status_waiting_reason_create_container_error + - pod_container_status_waiting_reason_create_container_config_error + - pod_container_status_terminated_reason_oom_killed + - dimensions: + - - ClusterName + - InstanceId + - NodeName + - - ClusterName + metric_name_selectors: + - node_cpu_utilization + - node_memory_utilization + - node_network_total_bytes + - node_cpu_reserved_capacity + - node_memory_reserved_capacity + - node_number_of_running_pods + - node_number_of_running_containers + - node_cpu_usage_total + - node_cpu_limit + - node_memory_working_set + - node_memory_limit + - node_status_condition_ready + - node_status_condition_disk_pressure + - node_status_condition_memory_pressure + - node_status_condition_pid_pressure + - node_status_condition_network_unavailable + - node_status_condition_unknown + - node_status_capacity_pods + - node_status_allocatable_pods + - dimensions: + - - ClusterName + - InstanceId + - NodeName + - - ClusterName + metric_name_selectors: + - node_interface_network_rx_dropped + - node_interface_network_tx_dropped + - node_diskio_io_service_bytes_total + - node_diskio_io_serviced_total + - hyperpod_node_health_status_schedulable + - hyperpod_node_health_status_unschedulable_pending_replacement + - hyperpod_node_health_status_unschedulable_pending_reboot + - hyperpod_node_health_status_unschedulable + - dimensions: + - - ClusterName + - InstanceId + - NodeName + - - ClusterName + metric_name_selectors: + - node_filesystem_utilization + - node_filesystem_inodes + - node_filesystem_inodes_free + - dimensions: + - - ClusterName + - Namespace + - Service + - - ClusterName + metric_name_selectors: + - service_number_of_running_pods + - dimensions: + - - ClusterName + - Namespace + - PodName + - - ClusterName + metric_name_selectors: + - replicas_desired + - replicas_ready + - status_replicas_available + - status_replicas_unavailable + - dimensions: + - - ClusterName + - Namespace + - PodName + - - ClusterName + metric_name_selectors: + - daemonset_status_number_available + - daemonset_status_number_unavailable + - dimensions: + - - ClusterName + - Namespace + - - ClusterName + metric_name_selectors: + - namespace_number_of_running_pods + - dimensions: + - - ClusterName + metric_name_selectors: + - cluster_node_count + - cluster_failed_node_count + - cluster_number_of_running_pods + - dimensions: + - - ClusterName + - endpoint + - - ClusterName + metric_name_selectors: + - apiserver_storage_size_bytes + - apiserver_storage_db_total_size_in_bytes + - etcd_db_total_size_in_bytes + - dimensions: + - - ClusterName + - resource + - - ClusterName + metric_name_selectors: + - apiserver_storage_list_duration_seconds + - apiserver_longrunning_requests + - apiserver_storage_objects + - dimensions: + - - ClusterName + - verb + - - ClusterName + metric_name_selectors: + - apiserver_request_duration_seconds + - rest_client_request_duration_seconds + - dimensions: + - - ClusterName + - code + - verb + - - ClusterName + metric_name_selectors: + - apiserver_request_total + - apiserver_request_total_5xx + - dimensions: + - - ClusterName + - operation + - - ClusterName + metric_name_selectors: + - apiserver_admission_controller_admission_duration_seconds + - apiserver_admission_step_admission_duration_seconds + - etcd_request_duration_seconds + - dimensions: + - - ClusterName + - code + - method + - - ClusterName + metric_name_selectors: + - rest_client_requests_total + - dimensions: + - - ClusterName + - request_kind + - - ClusterName + metric_name_selectors: + - apiserver_current_inflight_requests + - apiserver_current_inqueue_requests + - dimensions: + - - ClusterName + - name + - - ClusterName + metric_name_selectors: + - apiserver_admission_webhook_admission_duration_seconds + - dimensions: + - - ClusterName + - group + - - ClusterName + metric_name_selectors: + - apiserver_requested_deprecated_apis + - dimensions: + - - ClusterName + - reason + - - ClusterName + metric_name_selectors: + - apiserver_flowcontrol_rejected_requests_total + - dimensions: + - - ClusterName + - priority_level + - - ClusterName + metric_name_selectors: + - apiserver_flowcontrol_request_concurrency_limit + metric_descriptors: + - metric_name: apiserver_admission_controller_admission_duration_seconds + overwrite: true + unit: Seconds + - metric_name: apiserver_admission_step_admission_duration_seconds + overwrite: true + unit: Seconds + - metric_name: apiserver_admission_webhook_admission_duration_seconds + overwrite: true + unit: Seconds + - metric_name: apiserver_current_inflight_requests + overwrite: true + unit: Count + - metric_name: apiserver_current_inqueue_requests + overwrite: true + unit: Count + - metric_name: apiserver_flowcontrol_rejected_requests_total + overwrite: true + unit: Count + - metric_name: apiserver_flowcontrol_request_concurrency_limit + overwrite: true + unit: Count + - metric_name: apiserver_longrunning_requests + overwrite: true + unit: Count + - metric_name: apiserver_request_duration_seconds + overwrite: true + unit: Seconds + - metric_name: apiserver_request_total + overwrite: true + unit: Count + - metric_name: apiserver_request_total_5xx + overwrite: true + unit: Count + - metric_name: apiserver_requested_deprecated_apis + overwrite: true + unit: Count + - metric_name: apiserver_storage_objects + overwrite: true + unit: Count + - metric_name: etcd_request_duration_seconds + overwrite: true + unit: Seconds + - metric_name: apiserver_storage_list_duration_seconds + overwrite: true + unit: Seconds + - metric_name: apiserver_storage_db_total_size_in_bytes + overwrite: true + unit: Bytes + - metric_name: apiserver_storage_size_bytes + overwrite: true + unit: Bytes + - metric_name: etcd_db_total_size_in_bytes + overwrite: true + unit: Bytes + - metric_name: rest_client_request_duration_seconds + overwrite: true + unit: Seconds + - metric_name: rest_client_requests_total + overwrite: true + unit: Count + middleware: agenthealth/logs + namespace: ContainerInsights + no_verify_ssl: false + num_workers: 8 + output_destination: cloudwatch + parse_json_encoded_attr_values: + - Sources + - kubernetes + profile: default + proxy_address: "" + region: us-east-1 + request_timeout_seconds: 30 + resource_arn: "" + resource_to_telemetry_conversion: + enabled: true + retain_initial_value_of_delta_metric: false + role_arn: "" + shared_credentials_file: + - /root/.aws/credentials + version: "0" + awsemf/kueueContainerInsights: + certificate_file_path: "" + detailed_metrics: false + dimension_rollup_option: NoDimensionRollup + disable_metric_extraction: false + eks_fargate_container_insights_enabled: false + endpoint: https://fake_endpoint + enhanced_container_insights: false + imds_retries: 2 + local_mode: true + log_group_name: /aws/containerinsights/{ClusterName}/performance + log_retention: 0 + log_stream_name: kubernetes-kueue + max_retries: 2 + metric_declarations: + - dimensions: + - - ClusterName + - - ClusterName + - ClusterQueue + - - ClusterName + - ClusterQueue + - Status + - - ClusterName + - Status + metric_name_selectors: + - kueue_pending_workloads + - dimensions: + - - ClusterName + - - ClusterName + - ClusterQueue + - - ClusterName + - ClusterQueue + - Reason + - - ClusterName + - Reason + metric_name_selectors: + - kueue_evicted_workloads_total + - dimensions: + - - ClusterName + - - ClusterName + - ClusterQueue + metric_name_selectors: + - kueue_admitted_active_workloads + - dimensions: + - - ClusterName + - - ClusterName + - ClusterQueue + - - ClusterName + - ClusterQueue + - Resource + - - ClusterName + - ClusterQueue + - Flavor + - Resource + - - ClusterName + - ClusterQueue + - Flavor + metric_name_selectors: + - kueue_cluster_queue_resource_usage + - kueue_cluster_queue_nominal_quota + middleware: agenthealth/logs + namespace: ContainerInsights/Prometheus + no_verify_ssl: false + num_workers: 8 + output_destination: cloudwatch + parse_json_encoded_attr_values: + - Sources + - kubernetes + profile: default + proxy_address: "" + region: us-east-1 + request_timeout_seconds: 30 + resource_arn: "" + resource_to_telemetry_conversion: + enabled: true + retain_initial_value_of_delta_metric: false + role_arn: "" + shared_credentials_file: + - /root/.aws/credentials + version: "0" +extensions: + agenthealth/logs: + is_usage_data_enabled: true + stats: + operations: + - PutLogEvents + usage_flags: + mode: OP + region_type: ACJ + entitystore: + mode: onPremise + profile: default + region: us-east-1 + shared_credential_file: /root/.aws/credentials +processors: + batch/containerinsights: + metadata_cardinality_limit: 1000 + send_batch_max_size: 0 + send_batch_size: 8192 + timeout: 5s + batch/emf_logs: + metadata_cardinality_limit: 1000 + send_batch_max_size: 0 + send_batch_size: 8192 + timeout: 5s + batch/kueueContainerInsights: + metadata_cardinality_limit: 1000 + send_batch_max_size: 0 + send_batch_size: 8192 + timeout: 5s + kueueattributes/kueueContainerInsights: {} + metricstransform/containerinsights: + transforms: + - action: insert + aggregation_type: "" + experimental_match_labels: + code: ^5.* + include: apiserver_request_total + match_type: regexp + new_name: apiserver_request_total_5xx + submatch_case: "" +receivers: + awscontainerinsightreceiver: + accelerated_compute_metrics: false + add_container_name_metric_label: true + add_full_pod_name_metric_label: true + add_service_as_attribute: true + certificate_file_path: "" + cluster_name: TestCluster + collection_interval: 30s + container_orchestrator: eks + enable_control_plane_metrics: true + endpoint: "" + host_ip: "" + host_name: "" + imds_retries: 2 + kube_config_path: "" + leader_lock_name: cwagent-clusterleader + leader_lock_using_config_map_only: true + local_mode: true + max_retries: 0 + no_verify_ssl: false + num_workers: 0 + prefer_full_pod_name: true + profile: default + proxy_address: "" + region: us-east-1 + request_timeout_seconds: 0 + resource_arn: "" + role_arn: "" + shared_credentials_file: + - /root/.aws/credentials + awscontainerinsightskueuereceiver: + cluster_name: TestCluster + collection_interval: 1m0s + tcplog/emf_logs: + encoding: utf-8 + id: tcp_input + listen_address: 0.0.0.0:25888 + operators: [] + retry_on_failure: + enabled: false + initial_interval: 0s + max_elapsed_time: 0s + max_interval: 0s + type: tcp_input + udplog/emf_logs: + encoding: utf-8 + id: udp_input + listen_address: 0.0.0.0:25888 + multiline: + line_end_pattern: .^ + line_start_pattern: "" + omit_pattern: false + operators: [] + retry_on_failure: + enabled: false + initial_interval: 0s + max_elapsed_time: 0s + max_interval: 0s + type: udp_input +service: + extensions: + - agenthealth/logs + - entitystore + pipelines: + logs/emf_logs: + exporters: + - awscloudwatchlogs/emf_logs + processors: + - batch/emf_logs + receivers: + - tcplog/emf_logs + - udplog/emf_logs + metrics/containerinsights: + exporters: + - awsemf/containerinsights + processors: + - batch/containerinsights + - metricstransform/containerinsights + receivers: + - awscontainerinsightreceiver + metrics/kueueContainerInsights: + exporters: + - awsemf/kueueContainerInsights + processors: + - batch/kueueContainerInsights + - kueueattributes/kueueContainerInsights + receivers: + - awscontainerinsightskueuereceiver + telemetry: + logs: + development: false + disable_caller: false + disable_stacktrace: false + encoding: console + level: info + sampling: + enabled: true + initial: 2 + thereafter: 500 + tick: 10s + metrics: + address: "" + level: None + traces: {} diff --git a/translator/tocwconfig/sampleConfig/kubernetes_on_prem_config.json b/translator/tocwconfig/sampleConfig/kubernetes_on_prem_config.json index 1552a1b7b4..1c3894aac0 100644 --- a/translator/tocwconfig/sampleConfig/kubernetes_on_prem_config.json +++ b/translator/tocwconfig/sampleConfig/kubernetes_on_prem_config.json @@ -9,7 +9,8 @@ "metrics_collection_interval": 30, "disable_metric_extraction": true, "enhanced_container_insights": true, - "accelerated_compute_metrics": false + "accelerated_compute_metrics": false, + "kueue_container_insights": false } }, "force_flush_interval": 5, diff --git a/translator/tocwconfig/sampleConfig/kueue_container_insights_config.conf b/translator/tocwconfig/sampleConfig/kueue_container_insights_config.conf new file mode 100644 index 0000000000..007bb60efb --- /dev/null +++ b/translator/tocwconfig/sampleConfig/kueue_container_insights_config.conf @@ -0,0 +1,27 @@ +[agent] + collection_jitter = "0s" + debug = false + flush_interval = "1s" + flush_jitter = "0s" + hostname = "host_name_from_env" + interval = "60s" + logfile = "" + logtarget = "lumberjack" + metric_batch_size = 1000 + metric_buffer_limit = 10000 + omit_hostname = false + precision = "" + quiet = false + round_interval = false + +[inputs] + +[outputs] + + [[outputs.cloudwatchlogs]] + endpoint_override = "https://fake_endpoint" + force_flush_interval = "5s" + log_stream_name = "host_name_from_env" + region = "us-east-1" + +[processors] diff --git a/translator/tocwconfig/sampleConfig/kueue_container_insights_config.json b/translator/tocwconfig/sampleConfig/kueue_container_insights_config.json new file mode 100644 index 0000000000..cd2290daed --- /dev/null +++ b/translator/tocwconfig/sampleConfig/kueue_container_insights_config.json @@ -0,0 +1,21 @@ +{ + "agent": { + "region": "us-east-1" + }, + "logs": { + "metrics_collected": { + "emf": { + }, + "kubernetes": { + "cluster_name": "TestCluster", + "metrics_collection_interval": 30, + "disable_metric_extraction": true, + "prefer_full_pod_name": true, + "accelerated_compute_metrics": false, + "kueue_container_insights": true + } + }, + "force_flush_interval": 5, + "endpoint_override":"https://fake_endpoint" + } +} diff --git a/translator/tocwconfig/sampleConfig/kueue_container_insights_config.yaml b/translator/tocwconfig/sampleConfig/kueue_container_insights_config.yaml new file mode 100644 index 0000000000..7d2fa024b0 --- /dev/null +++ b/translator/tocwconfig/sampleConfig/kueue_container_insights_config.yaml @@ -0,0 +1,350 @@ +exporters: + awscloudwatchlogs/emf_logs: + certificate_file_path: /etc/test/ca_bundle.pem + emf_only: true + endpoint: https://fake_endpoint + imds_retries: 1 + local_mode: false + log_group_name: emf/logs/default + log_retention: 0 + log_stream_name: host_name_from_env + max_retries: 2 + middleware: agenthealth/logs + no_verify_ssl: false + num_workers: 8 + profile: "" + proxy_address: "" + raw_log: true + region: us-east-1 + request_timeout_seconds: 30 + resource_arn: "" + retry_on_failure: + enabled: true + initial_interval: 5s + max_elapsed_time: 5m0s + max_interval: 30s + multiplier: 1.5 + randomization_factor: 0.5 + role_arn: "" + sending_queue: + enabled: true + num_consumers: 1 + queue_size: 1000 + awsemf/containerinsights: + certificate_file_path: /etc/test/ca_bundle.pem + detailed_metrics: false + dimension_rollup_option: NoDimensionRollup + disable_metric_extraction: true + eks_fargate_container_insights_enabled: false + endpoint: https://fake_endpoint + enhanced_container_insights: false + imds_retries: 1 + local_mode: false + log_group_name: /aws/containerinsights/{ClusterName}/performance + log_retention: 0 + log_stream_name: '{NodeName}' + max_retries: 2 + metric_declarations: + - dimensions: + - - ClusterName + - Namespace + - PodName + - - ClusterName + - - ClusterName + - Namespace + - Service + - - ClusterName + - Namespace + metric_name_selectors: + - pod_cpu_utilization + - pod_memory_utilization + - pod_network_rx_bytes + - pod_network_tx_bytes + - pod_cpu_utilization_over_pod_limit + - pod_memory_utilization_over_pod_limit + - dimensions: + - - ClusterName + - Namespace + - PodName + metric_name_selectors: + - pod_number_of_container_restarts + - dimensions: + - - ClusterName + - Namespace + - PodName + - - ClusterName + metric_name_selectors: + - pod_cpu_reserved_capacity + - pod_memory_reserved_capacity + - dimensions: + - - ClusterName + - InstanceId + - NodeName + - - ClusterName + metric_name_selectors: + - node_cpu_utilization + - node_memory_utilization + - node_network_total_bytes + - node_cpu_reserved_capacity + - node_memory_reserved_capacity + - node_number_of_running_pods + - node_number_of_running_containers + - dimensions: + - - ClusterName + metric_name_selectors: + - node_cpu_usage_total + - node_cpu_limit + - node_memory_working_set + - node_memory_limit + - dimensions: + - - ClusterName + - InstanceId + - NodeName + - - ClusterName + metric_name_selectors: + - node_filesystem_utilization + - dimensions: + - - ClusterName + - Namespace + - Service + - - ClusterName + metric_name_selectors: + - service_number_of_running_pods + - dimensions: + - - ClusterName + - Namespace + - - ClusterName + metric_name_selectors: + - namespace_number_of_running_pods + - dimensions: + - - ClusterName + metric_name_selectors: + - cluster_node_count + - cluster_failed_node_count + middleware: agenthealth/logs + namespace: ContainerInsights + no_verify_ssl: false + num_workers: 8 + output_destination: cloudwatch + parse_json_encoded_attr_values: + - Sources + - kubernetes + profile: "" + proxy_address: "" + region: us-east-1 + request_timeout_seconds: 30 + resource_arn: "" + resource_to_telemetry_conversion: + enabled: true + retain_initial_value_of_delta_metric: false + role_arn: "" + version: "0" + awsemf/kueueContainerInsights: + certificate_file_path: /etc/test/ca_bundle.pem + detailed_metrics: false + dimension_rollup_option: NoDimensionRollup + disable_metric_extraction: false + eks_fargate_container_insights_enabled: false + endpoint: https://fake_endpoint + enhanced_container_insights: false + imds_retries: 1 + local_mode: false + log_group_name: /aws/containerinsights/{ClusterName}/performance + log_retention: 0 + log_stream_name: kubernetes-kueue + max_retries: 2 + metric_declarations: + - dimensions: + - - ClusterName + - - ClusterName + - ClusterQueue + - - ClusterName + - ClusterQueue + - Status + - - ClusterName + - Status + metric_name_selectors: + - kueue_pending_workloads + - dimensions: + - - ClusterName + - - ClusterName + - ClusterQueue + - - ClusterName + - ClusterQueue + - Reason + - - ClusterName + - Reason + metric_name_selectors: + - kueue_evicted_workloads_total + - dimensions: + - - ClusterName + - - ClusterName + - ClusterQueue + metric_name_selectors: + - kueue_admitted_active_workloads + - dimensions: + - - ClusterName + - - ClusterName + - ClusterQueue + - - ClusterName + - ClusterQueue + - Resource + - - ClusterName + - ClusterQueue + - Flavor + - Resource + - - ClusterName + - ClusterQueue + - Flavor + metric_name_selectors: + - kueue_cluster_queue_resource_usage + - kueue_cluster_queue_nominal_quota + middleware: agenthealth/logs + namespace: ContainerInsights/Prometheus + no_verify_ssl: false + num_workers: 8 + output_destination: cloudwatch + parse_json_encoded_attr_values: + - Sources + - kubernetes + profile: "" + proxy_address: "" + region: us-east-1 + request_timeout_seconds: 30 + resource_arn: "" + resource_to_telemetry_conversion: + enabled: true + retain_initial_value_of_delta_metric: false + role_arn: "" + version: "0" +extensions: + agenthealth/logs: + is_usage_data_enabled: true + stats: + operations: + - PutLogEvents + usage_flags: + mode: EC2 + region_type: ACJ + entitystore: + mode: ec2 + region: us-east-1 +processors: + batch/containerinsights: + metadata_cardinality_limit: 1000 + send_batch_max_size: 0 + send_batch_size: 8192 + timeout: 5s + batch/emf_logs: + metadata_cardinality_limit: 1000 + send_batch_max_size: 0 + send_batch_size: 8192 + timeout: 5s + batch/kueueContainerInsights: + metadata_cardinality_limit: 1000 + send_batch_max_size: 0 + send_batch_size: 8192 + timeout: 5s + kueueattributes/kueueContainerInsights: {} +receivers: + awscontainerinsightreceiver: + accelerated_compute_metrics: false + add_container_name_metric_label: false + add_full_pod_name_metric_label: false + add_service_as_attribute: true + certificate_file_path: "" + cluster_name: TestCluster + collection_interval: 30s + container_orchestrator: eks + enable_control_plane_metrics: false + endpoint: "" + host_ip: "" + host_name: "" + imds_retries: 1 + kube_config_path: "" + leader_lock_name: cwagent-clusterleader + leader_lock_using_config_map_only: true + local_mode: false + max_retries: 0 + no_verify_ssl: false + num_workers: 0 + prefer_full_pod_name: true + profile: "" + proxy_address: "" + region: us-east-1 + request_timeout_seconds: 0 + resource_arn: "" + role_arn: "" + awscontainerinsightskueuereceiver: + cluster_name: TestCluster + collection_interval: 1m0s + tcplog/emf_logs: + encoding: utf-8 + id: tcp_input + listen_address: 0.0.0.0:25888 + operators: [] + retry_on_failure: + enabled: false + initial_interval: 0s + max_elapsed_time: 0s + max_interval: 0s + type: tcp_input + udplog/emf_logs: + encoding: utf-8 + id: udp_input + listen_address: 0.0.0.0:25888 + multiline: + line_end_pattern: .^ + line_start_pattern: "" + omit_pattern: false + operators: [] + retry_on_failure: + enabled: false + initial_interval: 0s + max_elapsed_time: 0s + max_interval: 0s + type: udp_input +service: + extensions: + - agenthealth/logs + - entitystore + pipelines: + logs/emf_logs: + exporters: + - awscloudwatchlogs/emf_logs + processors: + - batch/emf_logs + receivers: + - tcplog/emf_logs + - udplog/emf_logs + metrics/containerinsights: + exporters: + - awsemf/containerinsights + processors: + - batch/containerinsights + receivers: + - awscontainerinsightreceiver + metrics/kueueContainerInsights: + exporters: + - awsemf/kueueContainerInsights + processors: + - batch/kueueContainerInsights + - kueueattributes/kueueContainerInsights + receivers: + - awscontainerinsightskueuereceiver + telemetry: + logs: + development: false + disable_caller: false + disable_stacktrace: false + encoding: console + level: info + sampling: + enabled: true + initial: 2 + thereafter: 500 + tick: 10s + metrics: + address: "" + level: None + traces: {} diff --git a/translator/tocwconfig/sampleConfig/logs_and_kubernetes_config.json b/translator/tocwconfig/sampleConfig/logs_and_kubernetes_config.json index eed2cbb8ac..5c536cc48b 100644 --- a/translator/tocwconfig/sampleConfig/logs_and_kubernetes_config.json +++ b/translator/tocwconfig/sampleConfig/logs_and_kubernetes_config.json @@ -10,7 +10,8 @@ "cluster_name": "TestCluster", "metrics_collection_interval": 30, "enhanced_container_insights": true, - "accelerated_compute_metrics": false + "accelerated_compute_metrics": false, + "kueue_container_insights": false } }, "logs_collected": { diff --git a/translator/tocwconfig/tocwconfig_test.go b/translator/tocwconfig/tocwconfig_test.go index cbed96ffb3..16b5d52928 100644 --- a/translator/tocwconfig/tocwconfig_test.go +++ b/translator/tocwconfig/tocwconfig_test.go @@ -233,6 +233,18 @@ func TestEmfAndKubernetesWithGpuConfig(t *testing.T) { checkTranslation(t, "emf_and_kubernetes_with_gpu_config", "darwin", nil, "") } +func TestEmfAndKubernetesWithKueueConfig(t *testing.T) { + resetContext(t) + readCommonConfig(t, "./sampleConfig/commonConfig/withCredentials.toml") + context.CurrentContext().SetRunInContainer(true) + context.CurrentContext().SetMode(config.ModeOnPremise) + t.Setenv(config.HOST_NAME, "host_name_from_env") + t.Setenv(config.HOST_IP, "127.0.0.1") + expectedEnvVars := map[string]string{} + checkTranslation(t, "emf_and_kubernetes_with_kueue_config", "linux", expectedEnvVars, "") + checkTranslation(t, "emf_and_kubernetes_with_kueue_config", "darwin", nil, "") +} + func TestKubernetesModeOnPremiseConfig(t *testing.T) { resetContext(t) context.CurrentContext().SetRunInContainer(true) @@ -243,6 +255,20 @@ func TestKubernetesModeOnPremiseConfig(t *testing.T) { checkTranslation(t, "kubernetes_on_prem_config", "linux", expectedEnvVars, "") } +func TestKueueContainerInsightsConfig(t *testing.T) { + resetContext(t) + context.CurrentContext().SetRunInContainer(true) + context.CurrentContext().SetMode(config.ModeEC2) + t.Setenv(config.HOST_NAME, "host_name_from_env") + t.Setenv(config.HOST_IP, "127.0.0.1") + t.Setenv(envconfig.AWS_CA_BUNDLE, "/etc/test/ca_bundle.pem") + expectedEnvVars := map[string]string{ + "AWS_CA_BUNDLE": "/etc/test/ca_bundle.pem", + } + checkTranslation(t, "kueue_container_insights_config", "linux", expectedEnvVars, "") + checkTranslation(t, "kueue_container_insights_config", "darwin", nil, "") +} + func TestLogsAndKubernetesConfig(t *testing.T) { resetContext(t) context.CurrentContext().SetRunInContainer(true) diff --git a/translator/translate/otel/common/common.go b/translator/translate/otel/common/common.go index 8e192ef5c9..a6bb7699be 100644 --- a/translator/translate/otel/common/common.go +++ b/translator/translate/otel/common/common.go @@ -58,6 +58,7 @@ const ( EnhancedContainerInsights = "enhanced_container_insights" PreferFullPodName = "prefer_full_pod_name" EnableAcceleratedComputeMetric = "accelerated_compute_metrics" + EnableKueueContainerInsights = "kueue_container_insights" AppendDimensionsKey = "append_dimensions" Console = "console" DiskKey = "disk" @@ -449,3 +450,7 @@ func IsAnySet(conf *confmap.Conf, keys []string) bool { } return false } + +func KueueContainerInsightsEnabled(conf *confmap.Conf) bool { + return GetOrDefaultBool(conf, ConfigKey(LogsKey, MetricsCollectedKey, KubernetesKey, EnableKueueContainerInsights), false) +} diff --git a/translator/translate/otel/exporter/awsemf/awsemf_default_kubernetes_kueue.yaml b/translator/translate/otel/exporter/awsemf/awsemf_default_kubernetes_kueue.yaml new file mode 100644 index 0000000000..2ee8d9b5f7 --- /dev/null +++ b/translator/translate/otel/exporter/awsemf/awsemf_default_kubernetes_kueue.yaml @@ -0,0 +1,10 @@ +namespace: ContainerInsights/Prometheus +log_group_name: '/aws/containerinsights/{ClusterName}/performance' +log_stream_name: 'kubernetes-kueue' +detailed_metrics: false +dimension_rollup_option: NoDimensionRollup +version: "0" +retain_initial_value_of_delta_metric: false +parse_json_encoded_attr_values: [ Sources, kubernetes ] +resource_to_telemetry_conversion: + enabled: true \ No newline at end of file diff --git a/translator/translate/otel/exporter/awsemf/kueue.go b/translator/translate/otel/exporter/awsemf/kueue.go new file mode 100644 index 0000000000..10dd880aca --- /dev/null +++ b/translator/translate/otel/exporter/awsemf/kueue.go @@ -0,0 +1,69 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package awsemf + +import ( + "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/awsemfexporter" + "go.opentelemetry.io/collector/confmap" + + "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/common" +) + +func setKubernetesKueueMetricDeclaration(conf *confmap.Conf, cfg *awsemfexporter.Config) error { + cfg.MetricDeclarations = getKueueMetricDeclarations(conf) + return nil +} + +func getKueueMetricDeclarations(conf *confmap.Conf) []*awsemfexporter.MetricDeclaration { + var metricDeclarations []*awsemfexporter.MetricDeclaration + if common.KueueContainerInsightsEnabled(conf) { + metricDeclarations = []*awsemfexporter.MetricDeclaration{ + { + Dimensions: [][]string{ + {"ClusterName"}, + {"ClusterName", "ClusterQueue"}, + {"ClusterName", "ClusterQueue", "Status"}, + {"ClusterName", "Status"}, + }, + MetricNameSelectors: []string{ + "kueue_pending_workloads", + }, + }, + { + Dimensions: [][]string{ + {"ClusterName"}, + {"ClusterName", "ClusterQueue"}, + {"ClusterName", "ClusterQueue", "Reason"}, + {"ClusterName", "Reason"}, + }, + MetricNameSelectors: []string{ + "kueue_evicted_workloads_total", + }, + }, + { + Dimensions: [][]string{ + {"ClusterName"}, + {"ClusterName", "ClusterQueue"}, + }, + MetricNameSelectors: []string{ + "kueue_admitted_active_workloads", + }, + }, + { + Dimensions: [][]string{ + {"ClusterName"}, + {"ClusterName", "ClusterQueue"}, + {"ClusterName", "ClusterQueue", "Resource"}, + {"ClusterName", "ClusterQueue", "Resource", "Flavor"}, + {"ClusterName", "ClusterQueue", "Flavor"}, + }, + MetricNameSelectors: []string{ + "kueue_cluster_queue_resource_usage", + "kueue_cluster_queue_nominal_quota", + }, + }, + } + } + return metricDeclarations +} diff --git a/translator/translate/otel/exporter/awsemf/translator.go b/translator/translate/otel/exporter/awsemf/translator.go index 7ce0af0472..18ed408bed 100644 --- a/translator/translate/otel/exporter/awsemf/translator.go +++ b/translator/translate/otel/exporter/awsemf/translator.go @@ -25,6 +25,10 @@ import ( "github.com/aws/amazon-cloudwatch-agent/translator/util/ecsutil" ) +const ( + kueuePipelineName = "kueueContainerInsights" +) + //go:embed awsemf_default_generic.yaml var defaultGenericConfig string @@ -34,6 +38,9 @@ var defaultEcsConfig string //go:embed awsemf_default_kubernetes.yaml var defaultKubernetesConfig string +//go:embed awsemf_default_kubernetes_kueue.yaml +var defaultKubernetesKueueConfig string + //go:embed awsemf_default_prometheus.yaml var defaultPrometheusConfig string @@ -50,12 +57,13 @@ var appSignalsConfigGeneric string var defaultJmxConfig string var ( - ecsBasePathKey = common.ConfigKey(common.LogsKey, common.MetricsCollectedKey, common.ECSKey) - kubernetesBasePathKey = common.ConfigKey(common.LogsKey, common.MetricsCollectedKey, common.KubernetesKey) - prometheusBasePathKey = common.ConfigKey(common.LogsKey, common.MetricsCollectedKey, common.PrometheusKey) - emfProcessorBasePathKey = common.ConfigKey(prometheusBasePathKey, common.EMFProcessorKey) - endpointOverrideKey = common.ConfigKey(common.LogsKey, common.EndpointOverrideKey) - roleARNPathKey = common.ConfigKey(common.LogsKey, common.CredentialsKey, common.RoleARNKey) + ecsBasePathKey = common.ConfigKey(common.LogsKey, common.MetricsCollectedKey, common.ECSKey) + kubernetesBasePathKey = common.ConfigKey(common.LogsKey, common.MetricsCollectedKey, common.KubernetesKey) + kubernetesKueueBasePathKey = common.ConfigKey(common.LogsKey, common.MetricsCollectedKey, common.KubernetesKey, common.EnableKueueContainerInsights) + prometheusBasePathKey = common.ConfigKey(common.LogsKey, common.MetricsCollectedKey, common.PrometheusKey) + emfProcessorBasePathKey = common.ConfigKey(prometheusBasePathKey, common.EMFProcessorKey) + endpointOverrideKey = common.ConfigKey(common.LogsKey, common.EndpointOverrideKey) + roleARNPathKey = common.ConfigKey(common.LogsKey, common.CredentialsKey, common.RoleARNKey) ) type translator struct { @@ -89,6 +97,8 @@ func (t *translator) Translate(c *confmap.Conf) (component.Config, error) { defaultConfig = defaultJmxConfig } else if isEcs(c) { defaultConfig = defaultEcsConfig + } else if isKubernetesKueue(c, t.name) { + defaultConfig = defaultKubernetesKueueConfig } else if isKubernetes(c) { defaultConfig = defaultKubernetesConfig } else if isPrometheus(c) { @@ -137,6 +147,10 @@ func (t *translator) Translate(c *confmap.Conf) (component.Config, error) { if err := setEcsFields(c, cfg); err != nil { return nil, err } + } else if isKubernetesKueue(c, t.name) { + if err := setKubernetesKueueFields(c, cfg); err != nil { + return nil, err + } } else if isKubernetes(c) { if err := setKubernetesFields(c, cfg); err != nil { return nil, err @@ -189,6 +203,11 @@ func isKubernetes(conf *confmap.Conf) bool { return conf.IsSet(kubernetesBasePathKey) } +// `kueue_container_insights` is a child of `kubernetes` in config spec. +func isKubernetesKueue(conf *confmap.Conf, pipelineName string) bool { + return isKubernetes(conf) && pipelineName == kueuePipelineName && common.GetOrDefaultBool(conf, kubernetesKueueBasePathKey, false) +} + func isPrometheus(conf *confmap.Conf) bool { return conf.IsSet(prometheusBasePathKey) } @@ -219,6 +238,16 @@ func setKubernetesFields(conf *confmap.Conf, cfg *awsemfexporter.Config) error { func setCiJmxFields() error { return nil } +func setKubernetesKueueFields(conf *confmap.Conf, cfg *awsemfexporter.Config) error { + setDisableMetricExtraction(kubernetesKueueBasePathKey, conf, cfg) + + if err := setKubernetesKueueMetricDeclaration(conf, cfg); err != nil { + return err + } + + return nil +} + func setPrometheusFields(conf *confmap.Conf, cfg *awsemfexporter.Config) error { setDisableMetricExtraction(prometheusBasePathKey, conf, cfg) diff --git a/translator/translate/otel/exporter/awsemf/translator_test.go b/translator/translate/otel/exporter/awsemf/translator_test.go index 10e7d08280..b0c779198a 100644 --- a/translator/translate/otel/exporter/awsemf/translator_test.go +++ b/translator/translate/otel/exporter/awsemf/translator_test.go @@ -808,6 +808,106 @@ func TestTranslator(t *testing.T) { } } +func TestTranslatorForKueue(t *testing.T) { + t.Setenv(envconfig.AWS_CA_BUNDLE, "/ca/bundle") + agent.Global_Config.Region = "us-east-1" + agent.Global_Config.Role_arn = "global_arn" + tt := NewTranslatorWithName("kueueContainerInsights") + require.EqualValues(t, "awsemf/kueueContainerInsights", tt.ID().String()) + testCases := map[string]struct { + env map[string]string + input map[string]any + want map[string]any // Can't construct & use awsemfexporter.Config as it uses internal only types + wantErr error + }{ + "GenerateAwsEmfExporterConfigForKubernetesKueueMetrics": { + input: map[string]any{ + "logs": map[string]any{ + "metrics_collected": map[string]any{ + "kubernetes": map[string]any{ + "kueue_container_insights": true, + }, + }, + }, + }, + want: map[string]any{ + "namespace": "ContainerInsights/Prometheus", + "log_group_name": "/aws/containerinsights/{ClusterName}/performance", + "log_stream_name": "kubernetes-kueue", + "dimension_rollup_option": "NoDimensionRollup", + "disable_metric_extraction": false, + "enhanced_container_insights": false, + "parse_json_encoded_attr_values": []string{"Sources", "kubernetes"}, + "output_destination": "cloudwatch", + "eks_fargate_container_insights_enabled": false, + "resource_to_telemetry_conversion": resourcetotelemetry.Settings{ + Enabled: true, + }, + "metric_declarations": []*awsemfexporter.MetricDeclaration{ + { + Dimensions: [][]string{{"ClusterName"}, {"ClusterName", "ClusterQueue"}, {"ClusterName", "ClusterQueue", "Status"}, {"ClusterName", "Status"}}, + MetricNameSelectors: []string{ + "kueue_pending_workloads", + }, + }, + { + Dimensions: [][]string{{"ClusterName"}, {"ClusterName", "ClusterQueue"}, {"ClusterName", "ClusterQueue", "Reason"}, {"ClusterName", "Reason"}}, + MetricNameSelectors: []string{ + "kueue_evicted_workloads_total", + }, + }, + { + Dimensions: [][]string{{"ClusterName"}, {"ClusterName", "ClusterQueue"}}, + MetricNameSelectors: []string{ + "kueue_admitted_active_workloads", + }, + }, + { + Dimensions: [][]string{{"ClusterName"}, {"ClusterName", "ClusterQueue"}, {"ClusterName", "ClusterQueue", "Resource"}, {"ClusterName", "ClusterQueue", "Resource", "Flavor"}, {"ClusterName", "ClusterQueue", "Flavor"}}, + MetricNameSelectors: []string{ + "kueue_cluster_queue_resource_usage", + "kueue_cluster_queue_nominal_quota", + }, + }, + }, + "metric_descriptors": nilMetricDescriptorsSlice, + "local_mode": false, + }, + }, + } + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + conf := confmap.NewFromStringMap(testCase.input) + got, err := tt.Translate(conf) + require.Equal(t, testCase.wantErr, err) + require.Truef(t, legacytranslator.IsTranslateSuccess(), "Error in legacy translation rules: %v", legacytranslator.ErrorMessages) + if err == nil { + require.NotNil(t, got) + gotCfg, ok := got.(*awsemfexporter.Config) + require.True(t, ok) + assert.Equal(t, testCase.want["namespace"], gotCfg.Namespace) + assert.Equal(t, testCase.want["log_group_name"], gotCfg.LogGroupName) + assert.Equal(t, testCase.want["log_stream_name"], gotCfg.LogStreamName) + assert.Equal(t, testCase.want["dimension_rollup_option"], gotCfg.DimensionRollupOption) + assert.Equal(t, testCase.want["disable_metric_extraction"], gotCfg.DisableMetricExtraction) + assert.Equal(t, testCase.want["enhanced_container_insights"], gotCfg.EnhancedContainerInsights) + assert.Equal(t, testCase.want["parse_json_encoded_attr_values"], gotCfg.ParseJSONEncodedAttributeValues) + assert.Equal(t, testCase.want["output_destination"], gotCfg.OutputDestination) + assert.Equal(t, testCase.want["eks_fargate_container_insights_enabled"], gotCfg.EKSFargateContainerInsightsEnabled) + assert.Equal(t, testCase.want["resource_to_telemetry_conversion"], gotCfg.ResourceToTelemetrySettings) + assert.ElementsMatch(t, testCase.want["metric_declarations"], gotCfg.MetricDeclarations) + assert.ElementsMatch(t, testCase.want["metric_descriptors"], gotCfg.MetricDescriptors) + assert.Equal(t, testCase.want["local_mode"], gotCfg.LocalMode) + assert.Equal(t, "/ca/bundle", gotCfg.CertificateFilePath) + assert.Equal(t, "global_arn", gotCfg.RoleARN) + assert.Equal(t, "us-east-1", gotCfg.Region) + assert.NotNil(t, gotCfg.MiddlewareID) + assert.Equal(t, "agenthealth/logs", gotCfg.MiddlewareID.String()) + } + }) + } +} + func TestTranslateAppSignals(t *testing.T) { t.Setenv(envconfig.AWS_CA_BUNDLE, "/ca/bundle") agent.Global_Config.Region = "us-east-1" diff --git a/translator/translate/otel/pipeline/containerinsights/translator.go b/translator/translate/otel/pipeline/containerinsights/translator.go index bb316bb68f..a69ad9a645 100644 --- a/translator/translate/otel/pipeline/containerinsights/translator.go +++ b/translator/translate/otel/pipeline/containerinsights/translator.go @@ -14,8 +14,15 @@ import ( "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/extension/agenthealth" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/processor/batchprocessor" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/processor/gpu" + "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/processor/kueue" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/processor/metricstransformprocessor" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/receiver/awscontainerinsight" + "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/receiver/awscontainerinsightskueue" +) + +const ( + ciPipelineName = common.PipelineNameContainerInsights + kueuePipelineName = "kueueContainerInsights" ) var ( @@ -25,16 +32,21 @@ var ( ) type translator struct { + pipelineName string } var _ common.Translator[*common.ComponentTranslators] = (*translator)(nil) func NewTranslator() common.Translator[*common.ComponentTranslators] { - return &translator{} + return NewTranslatorWithName(ciPipelineName) +} + +func NewTranslatorWithName(pipelineName string) common.Translator[*common.ComponentTranslators] { + return &translator{pipelineName: pipelineName} } func (t *translator) ID() component.ID { - return component.NewIDWithName(component.DataTypeMetrics, common.PipelineNameContainerInsights) + return component.NewIDWithName(component.DataTypeMetrics, t.pipelineName) } // Translate creates a pipeline for container insights if the logs.metrics_collected.ecs or logs.metrics_collected.kubernetes @@ -44,27 +56,41 @@ func (t *translator) Translate(conf *confmap.Conf) (*common.ComponentTranslators return nil, &common.MissingKeyError{ID: t.ID(), JsonKey: fmt.Sprint(ecsKey, " or ", eksKey)} } - // Append the metricstransformprocessor only if enhanced container insights is enabled - enhancedContainerInsightsEnabled := awscontainerinsight.EnhancedContainerInsightsEnabled(conf) - if enhancedContainerInsightsEnabled { - processors := common.NewTranslatorMap(metricstransformprocessor.NewTranslatorWithName(common.PipelineNameContainerInsights)) - acceleratedComputeMetricsEnabled := awscontainerinsight.AcceleratedComputeMetricsEnabled(conf) - if acceleratedComputeMetricsEnabled { - processors.Set(gpu.NewTranslatorWithName(common.PipelineNameContainerInsights)) + // create processor map with default batch processor based on pipeline name + processors := common.NewTranslatorMap(batchprocessor.NewTranslatorWithNameAndSection(t.pipelineName, common.LogsKey)) + // create exporter map with default emf exporter based on pipeline name + exporters := common.NewTranslatorMap(awsemf.NewTranslatorWithName(t.pipelineName)) + // create extensions map based on pipeline name + extensions := common.NewTranslatorMap(agenthealth.NewTranslator(component.DataTypeLogs, []string{agenthealth.OperationPutLogEvents})) + // create variable for receivers, use switch block below to assign + var receivers common.TranslatorMap[component.Config] + + switch t.pipelineName { + case ciPipelineName: + // add aws container insights receiver + receivers = common.NewTranslatorMap(awscontainerinsight.NewTranslator()) + // Append the metricstransformprocessor only if enhanced container insights is enabled + enhancedContainerInsightsEnabled := awscontainerinsight.EnhancedContainerInsightsEnabled(conf) + if enhancedContainerInsightsEnabled { + // add metricstransformprocessor to processors for enhanced container insights + processors.Set(metricstransformprocessor.NewTranslatorWithName(t.pipelineName)) + acceleratedComputeMetricsEnabled := awscontainerinsight.AcceleratedComputeMetricsEnabled(conf) + if acceleratedComputeMetricsEnabled { + processors.Set(gpu.NewTranslatorWithName(t.pipelineName)) + } } - processors.Set(batchprocessor.NewTranslatorWithNameAndSection(common.PipelineNameContainerInsights, common.LogsKey)) - return &common.ComponentTranslators{ - Receivers: common.NewTranslatorMap(awscontainerinsight.NewTranslator()), - Processors: processors, // EKS & ECS CI sit under metrics_collected in "logs" - Exporters: common.NewTranslatorMap(awsemf.NewTranslatorWithName(common.PipelineNameContainerInsights)), - Extensions: common.NewTranslatorMap(agenthealth.NewTranslator(component.DataTypeLogs, []string{agenthealth.OperationPutLogEvents})), - }, nil + case kueuePipelineName: + // add prometheus receiver for kueue + receivers = common.NewTranslatorMap((awscontainerinsightskueue.NewTranslator())) + processors.Set(kueue.NewTranslatorWithName(t.pipelineName)) + default: + return nil, fmt.Errorf("unknown container insights pipeline name: %s", t.pipelineName) } return &common.ComponentTranslators{ - Receivers: common.NewTranslatorMap(awscontainerinsight.NewTranslator()), - Processors: common.NewTranslatorMap(batchprocessor.NewTranslatorWithNameAndSection(common.PipelineNameContainerInsights, common.LogsKey)), // EKS & ECS CI sit under metrics_collected in "logs" - Exporters: common.NewTranslatorMap(awsemf.NewTranslatorWithName(common.PipelineNameContainerInsights)), - Extensions: common.NewTranslatorMap(agenthealth.NewTranslator(component.DataTypeLogs, []string{agenthealth.OperationPutLogEvents})), + Receivers: receivers, + Processors: processors, // EKS & ECS CI sit under metrics_collected in "logs" + Exporters: exporters, + Extensions: extensions, }, nil } diff --git a/translator/translate/otel/pipeline/containerinsights/translator_test.go b/translator/translate/otel/pipeline/containerinsights/translator_test.go index d3a4bb67a1..3fe281a167 100644 --- a/translator/translate/otel/pipeline/containerinsights/translator_test.go +++ b/translator/translate/otel/pipeline/containerinsights/translator_test.go @@ -67,23 +67,60 @@ func TestTranslator(t *testing.T) { extensions: []string{"agenthealth/logs"}, }, }, - "WithKubernetes/WithEnhancedContainerInsights": { + } + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + conf := confmap.NewFromStringMap(testCase.input) + got, err := cit.Translate(conf) + require.Equal(t, testCase.wantErr, err) + if testCase.want == nil { + require.Nil(t, got) + } else { + require.NotNil(t, got) + assert.Equal(t, testCase.want.receivers, collections.MapSlice(got.Receivers.Keys(), component.ID.String)) + assert.Equal(t, testCase.want.processors, collections.MapSlice(got.Processors.Keys(), component.ID.String)) + assert.Equal(t, testCase.want.exporters, collections.MapSlice(got.Exporters.Keys(), component.ID.String)) + assert.Equal(t, testCase.want.extensions, collections.MapSlice(got.Extensions.Keys(), component.ID.String)) + } + }) + } +} + +func TestKueueTranslator(t *testing.T) { + type want struct { + pipelineType string + receivers []string + processors []string + exporters []string + extensions []string + } + cit := NewTranslatorWithName(kueuePipelineName) + require.EqualValues(t, "metrics/kueueContainerInsights", cit.ID().String()) + testCases := map[string]struct { + input map[string]interface{} + want *want + wantErr error + }{ + "WithKueueContainerInsights": { input: map[string]interface{}{ "logs": map[string]interface{}{ "metrics_collected": map[string]interface{}{ "kubernetes": map[string]interface{}{ - "enhanced_container_insights": true, - "cluster_name": "TestCluster", + "kueue_container_insights": true, + "cluster_name": "TestCluster", }, }, }, }, want: &want{ - pipelineType: "metrics/containerinsights", - receivers: []string{"awscontainerinsightreceiver"}, - processors: []string{"metricstransform/containerinsights", "gpuattributes/containerinsights", "batch/containerinsights"}, - exporters: []string{"awsemf/containerinsights"}, - extensions: []string{"agenthealth/logs"}, + pipelineType: "metrics/kueueContainerInsights", + receivers: []string{"awscontainerinsightskueuereceiver"}, + processors: []string{ + "batch/kueueContainerInsights", + "kueueattributes/kueueContainerInsights", + }, + exporters: []string{"awsemf/kueueContainerInsights"}, + extensions: []string{"agenthealth/logs"}, }, }, } diff --git a/translator/translate/otel/pipeline/containerinsights/translators.go b/translator/translate/otel/pipeline/containerinsights/translators.go new file mode 100644 index 0000000000..ae986cc10e --- /dev/null +++ b/translator/translate/otel/pipeline/containerinsights/translators.go @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package containerinsights + +import ( + "go.opentelemetry.io/collector/confmap" + + "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/common" + "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/pipeline" +) + +var ( + LogsKey = common.ConfigKey(common.LogsKey, common.MetricsCollectedKey) +) + +func NewTranslators(conf *confmap.Conf) pipeline.TranslatorMap { + translators := common.NewTranslatorMap[*common.ComponentTranslators]() + // create default container insights translator + ciTranslator := NewTranslatorWithName(ciPipelineName) + translators.Set(ciTranslator) + // create kueue container insights translator + KueueContainerInsightsEnabled := common.KueueContainerInsightsEnabled(conf) + if KueueContainerInsightsEnabled { + kueueTranslator := NewTranslatorWithName(kueuePipelineName) + translators.Set(kueueTranslator) + } + // return the translator map + return translators +} diff --git a/translator/translate/otel/pipeline/containerinsights/translators_test.go b/translator/translate/otel/pipeline/containerinsights/translators_test.go new file mode 100644 index 0000000000..178a0159b3 --- /dev/null +++ b/translator/translate/otel/pipeline/containerinsights/translators_test.go @@ -0,0 +1,106 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package containerinsights + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/confmap" + + "github.com/aws/amazon-cloudwatch-agent/internal/util/collections" + _ "github.com/aws/amazon-cloudwatch-agent/translator/registerrules" + "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/common" +) + +func TestTranslators(t *testing.T) { + type want struct { + receivers []string + exporters []string + } + testCases := map[string]struct { + input map[string]any + want map[string]want + }{ + "WithContainerInsights": { + input: map[string]interface{}{ + "logs": map[string]interface{}{ + "metrics_collected": map[string]interface{}{ + "kubernetes": map[string]interface{}{ + "cluster_name": "TestCluster", + }, + }, + }, + }, + want: map[string]want{ + "metrics/containerinsights": { + receivers: []string{"awscontainerinsightreceiver"}, + exporters: []string{"awsemf/containerinsights"}, + }, + }, + }, + "WithEnhancedContainerInsights": { + input: map[string]interface{}{ + "logs": map[string]interface{}{ + "metrics_collected": map[string]interface{}{ + "kubernetes": map[string]interface{}{ + "enhanced_container_insights": true, + "cluster_name": "TestCluster", + }, + }, + }, + }, + want: map[string]want{ + "metrics/containerinsights": { + receivers: []string{"awscontainerinsightreceiver"}, + exporters: []string{"awsemf/containerinsights"}, + }, + }, + }, + "WithContainerInsightsAndKueueMetrics": { + input: map[string]interface{}{ + "logs": map[string]interface{}{ + "metrics_collected": map[string]interface{}{ + "kubernetes": map[string]interface{}{ + "kueue_container_insights": true, + "cluster_name": "TestCluster", + }, + }, + }, + }, + want: map[string]want{ + "metrics/containerinsights": { + receivers: []string{"awscontainerinsightreceiver"}, + exporters: []string{"awsemf/containerinsights"}, + }, + "metrics/kueueContainerInsights": { + receivers: []string{"awscontainerinsightskueuereceiver"}, + exporters: []string{"awsemf/kueueContainerInsights"}, + }, + }, + }, + } + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + conf := confmap.NewFromStringMap(testCase.input) + got := NewTranslators(conf) + if testCase.want == nil { + require.Nil(t, got) + } else { + require.NotNil(t, got) + assert.Equal(t, len(testCase.want), got.Len()) + got.Range(func(tr common.Translator[*common.ComponentTranslators]) { + w, ok := testCase.want[tr.ID().String()] + require.True(t, ok) + g, err := tr.Translate(conf) + assert.NoError(t, err) + assert.Equal(t, w.receivers, collections.MapSlice(g.Receivers.Keys(), component.ID.String)) + assert.Equal(t, w.exporters, collections.MapSlice(g.Exporters.Keys(), component.ID.String)) + }) + } + }) + } +} diff --git a/translator/translate/otel/processor/kueue/translator.go b/translator/translate/otel/processor/kueue/translator.go new file mode 100644 index 0000000000..d072144b4d --- /dev/null +++ b/translator/translate/otel/processor/kueue/translator.go @@ -0,0 +1,33 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package kueue + +import ( + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/confmap" + "go.opentelemetry.io/collector/processor" + + "github.com/aws/amazon-cloudwatch-agent/plugins/processors/kueueattributes" + "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/common" +) + +type translator struct { + name string + factory processor.Factory +} + +var _ common.Translator[component.Config] = (*translator)(nil) + +func NewTranslatorWithName(name string) common.Translator[component.Config] { + return &translator{name, kueueattributes.NewFactory()} +} + +func (t *translator) ID() component.ID { + return component.NewIDWithName(t.factory.Type(), t.name) +} + +func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { + cfg := t.factory.CreateDefaultConfig().(*kueueattributes.Config) + return cfg, nil +} diff --git a/translator/translate/otel/receiver/awscontainerinsightskueue/translator.go b/translator/translate/otel/receiver/awscontainerinsightskueue/translator.go new file mode 100644 index 0000000000..0e3582f6ff --- /dev/null +++ b/translator/translate/otel/receiver/awscontainerinsightskueue/translator.go @@ -0,0 +1,74 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package awscontainerinsightskueue + +import ( + "errors" + "time" + + "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightskueuereceiver" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/confmap" + "go.opentelemetry.io/collector/receiver" + + "github.com/aws/amazon-cloudwatch-agent/translator/translate/logs/util" + "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/common" +) + +const ( + defaultMetricsCollectionInterval = time.Minute +) + +type translator struct { + name string + factory receiver.Factory +} + +var _ common.Translator[component.Config] = (*translator)(nil) + +// NewTranslator creates a new aws container insight receiver translator. +func NewTranslator() common.Translator[component.Config] { + return NewTranslatorWithName("") +} + +func NewTranslatorWithName(name string) common.Translator[component.Config] { + return &translator{ + name: name, + factory: awscontainerinsightskueuereceiver.NewFactory(), + } +} + +func (t *translator) ID() component.ID { + return component.NewIDWithName(t.factory.Type(), t.name) +} + +// Translate creates an aws container insights receiver config if either +// of the sections defined in the services exist. +func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { + cfg := t.factory.CreateDefaultConfig().(*awscontainerinsightskueuereceiver.Config) + intervalKeyChain := []string{ + common.ConfigKey(common.AgentKey, common.MetricsCollectionIntervalKey), + } + cfg.CollectionInterval = common.GetOrDefaultDuration(conf, intervalKeyChain, defaultMetricsCollectionInterval) + + if err := t.setClusterName(conf, cfg); err != nil { + return nil, err + } + + return cfg, nil +} + +func (t *translator) setClusterName(conf *confmap.Conf, cfg *awscontainerinsightskueuereceiver.Config) error { + clusterNameKey := common.ConfigKey(common.LogsKey, common.MetricsCollectedKey, common.KubernetesKey, "cluster_name") + if clusterName, ok := common.GetString(conf, clusterNameKey); ok { + cfg.ClusterName = clusterName + } else { + cfg.ClusterName = util.GetClusterNameFromEc2Tagger() + } + + if cfg.ClusterName == "" { + return errors.New("cluster name is not provided and was not auto-detected from EC2 tags") + } + return nil +} diff --git a/translator/translate/otel/receiver/awscontainerinsightskueue/translator_test.go b/translator/translate/otel/receiver/awscontainerinsightskueue/translator_test.go new file mode 100644 index 0000000000..1216605538 --- /dev/null +++ b/translator/translate/otel/receiver/awscontainerinsightskueue/translator_test.go @@ -0,0 +1,79 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package awscontainerinsightskueue + +import ( + "testing" + "time" + + "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightskueuereceiver" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/confmap" + + "github.com/aws/amazon-cloudwatch-agent/translator/context" +) + +func TestTranslator(t *testing.T) { + acit := NewTranslator() + require.EqualValues(t, "awscontainerinsightskueuereceiver", acit.ID().String()) + testCases := map[string]struct { + input map[string]interface{} + isSystemd bool + want *awscontainerinsightskueuereceiver.Config + wantErr error + }{ + "WithClusterName": { + input: map[string]interface{}{ + "logs": map[string]interface{}{ + "metrics_collected": map[string]interface{}{ + "kubernetes": map[string]interface{}{ + "cluster_name": "TestCluster", + "kueue_container_insights": true, + }, + }, + }, + }, + isSystemd: true, + want: &awscontainerinsightskueuereceiver.Config{ + CollectionInterval: 60 * time.Second, + ClusterName: "TestCluster", + }, + }, + "WithClusterNameAndCollectionInterval": { + input: map[string]interface{}{ + "logs": map[string]interface{}{ + "metrics_collected": map[string]interface{}{ + "kubernetes": map[string]interface{}{ + "cluster_name": "TestCluster", + "kueue_container_insights": true, + }, + }, + }, + "agent": map[string]interface{}{ + "metrics_collection_interval": 30, + }, + }, + isSystemd: true, + want: &awscontainerinsightskueuereceiver.Config{ + CollectionInterval: 30 * time.Second, + ClusterName: "TestCluster", + }, + }, + } + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + context.CurrentContext().SetRunInContainer(!testCase.isSystemd) + conf := confmap.NewFromStringMap(testCase.input) + got, err := acit.Translate(conf) + require.Equal(t, testCase.wantErr, err) + if err == nil { + require.NotNil(t, got) + gotCfg, ok := got.(*awscontainerinsightskueuereceiver.Config) + require.True(t, ok) + require.Equal(t, testCase.want.CollectionInterval, gotCfg.CollectionInterval) + require.Equal(t, testCase.want.ClusterName, gotCfg.ClusterName) + } + }) + } +} diff --git a/translator/translate/otel/translate_otel.go b/translator/translate/otel/translate_otel.go index 40f400bf2e..c1ece60e04 100644 --- a/translator/translate/otel/translate_otel.go +++ b/translator/translate/otel/translate_otel.go @@ -66,9 +66,10 @@ func Translate(jsonConfig interface{}, os string) (*otelcol.Config, error) { return nil, err } translators.Merge(logsHostTranslators) + containerInsightsTranslators := containerinsights.NewTranslators(conf) + translators.Merge(containerInsightsTranslators) translators.Set(applicationsignals.NewTranslator(component.DataTypeTraces)) translators.Set(applicationsignals.NewTranslator(component.DataTypeMetrics)) - translators.Set(containerinsights.NewTranslator()) translators.Merge(prometheus.NewTranslators(conf)) translators.Set(emf_logs.NewTranslator()) translators.Set(xray.NewTranslator())