From 03e04bc17f4f98e1dca3dbd8c4253825a0b0f19e Mon Sep 17 00:00:00 2001 From: Mikko Ylinen Date: Tue, 3 Oct 2023 08:00:05 +0300 Subject: [PATCH] Add Misc cgroup support to Prometheus metrics --- docs/storage/prometheus.md | 2 + metrics/prometheus.go | 37 +++++++++++++++++++ metrics/prometheus_fake.go | 10 +++++ metrics/testdata/prometheus_metrics | 8 ++++ .../prometheus_metrics_whitelist_filtered | 8 ++++ 5 files changed, 65 insertions(+) diff --git a/docs/storage/prometheus.md b/docs/storage/prometheus.md index 7bb3c465ea..fa096078a2 100644 --- a/docs/storage/prometheus.md +++ b/docs/storage/prometheus.md @@ -50,6 +50,8 @@ Metric name | Type | Description | Unit (where applicable) | option parameter | `container_hugetlb_failcnt` | Counter | Number of hugepage usage hits limits | | hugetlb | `container_hugetlb_max_usage_bytes` | Gauge | Maximum hugepage usages recorded | bytes | hugetlb | `container_hugetlb_usage_bytes` | Gauge | Current hugepage usage | bytes | hugetlb | +`container_misc_usage` | Gauge | Current usage of the misc scalar resource specified by the label | | misc | +`container_misc_events` | Counter | Number of times the usage for the misc scalar resource specified by the label was about to go over the max boundary | | misc | `container_last_seen` | Gauge | Last time a container was seen by the exporter | timestamp | - | `container_llc_occupancy_bytes` | Gauge | Last level cache usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM). | bytes | resctrl | `container_memory_bandwidth_bytes` | Gauge | Total memory bandwidth usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM). | bytes | resctrl | diff --git a/metrics/prometheus.go b/metrics/prometheus.go index 86064819d3..21bc78745d 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -368,6 +368,43 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri }, }...) } + if includedMetrics.Has(container.MiscMetrics) { + c.containerMetrics = append(c.containerMetrics, []containerMetric{ + { + name: "container_misc_usage", + help: "Current usage of the misc scalar resource specified by the label", + valueType: prometheus.GaugeValue, + extraLabels: []string{"resource"}, + getValues: func(s *info.ContainerStats) metricValues { + values := make(metricValues, 0, len(s.Misc)) + for k, v := range s.Misc { + values = append(values, metricValue{ + value: float64(v.Usage), + labels: []string{k}, + timestamp: s.Timestamp, + }) + } + return values + }, + }, { + name: "container_misc_events", + help: "Number of times the usage for the misc scalar resource specified by the label was about to go over the max boundary", + valueType: prometheus.CounterValue, + extraLabels: []string{"resource"}, + getValues: func(s *info.ContainerStats) metricValues { + values := make(metricValues, 0, len(s.Misc)) + for k, v := range s.Misc { + values = append(values, metricValue{ + value: float64(v.Events), + labels: []string{k}, + timestamp: s.Timestamp, + }) + } + return values + }, + }, + }...) + } if includedMetrics.Has(container.MemoryUsageMetrics) { c.containerMetrics = append(c.containerMetrics, []containerMetric{ { diff --git a/metrics/prometheus_fake.go b/metrics/prometheus_fake.go index fd43b78148..0aa4a66e39 100644 --- a/metrics/prometheus_fake.go +++ b/metrics/prometheus_fake.go @@ -371,6 +371,16 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req Failcnt: 0, }, }, + Misc: map[string]info.MiscStats{ + "res_a": { + Usage: 1, + Events: 42, + }, + "res_b": { + Usage: 2, + Events: 42, + }, + }, Network: info.NetworkStats{ InterfaceStats: info.InterfaceStats{ Name: "eth0", diff --git a/metrics/testdata/prometheus_metrics b/metrics/testdata/prometheus_metrics index a385e50689..63831f3220 100644 --- a/metrics/testdata/prometheus_metrics +++ b/metrics/testdata/prometheus_metrics @@ -136,6 +136,14 @@ container_hugetlb_max_usage_bytes{container_env_foo_env="prod",container_label_f # TYPE container_hugetlb_usage_bytes gauge container_hugetlb_usage_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",pagesize="1Gi",zone_name="hello"} 0 1395066363000 container_hugetlb_usage_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",pagesize="2Mi",zone_name="hello"} 4 1395066363000 +# HELP container_misc_usage Current usage of the resource +# TYPE container_misc_usage gauge +container_misc_usage{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",resource="res_a",zone_name="hello"} 1 1395066363000 +container_misc_usage{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",resource="res_b",zone_name="hello"} 2 1395066363000 +# HELP container_misc_events Number of times the usage for the resource was about to go over the max boundary +# TYPE container_misc_events counter +container_misc_events{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",resource="res_a",zone_name="hello"} 42 1395066363000 +container_misc_events{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",resource="res_b",zone_name="hello"} 42 1395066363000 # HELP container_last_seen Last time a container was seen by the exporter # TYPE container_last_seen gauge container_last_seen{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1.395066363e+09 1395066363000 diff --git a/metrics/testdata/prometheus_metrics_whitelist_filtered b/metrics/testdata/prometheus_metrics_whitelist_filtered index 921b2e1106..72d5700ac1 100644 --- a/metrics/testdata/prometheus_metrics_whitelist_filtered +++ b/metrics/testdata/prometheus_metrics_whitelist_filtered @@ -136,6 +136,14 @@ container_hugetlb_max_usage_bytes{container_env_foo_env="prod",id="testcontainer # TYPE container_hugetlb_usage_bytes gauge container_hugetlb_usage_bytes{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",pagesize="1Gi",zone_name="hello"} 0 1395066363000 container_hugetlb_usage_bytes{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",pagesize="2Mi",zone_name="hello"} 4 1395066363000 +# HELP container_misc_usage Current usage of the resource +# TYPE container_misc_usage gauge +container_misc_usage{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",resource="res_a",zone_name="hello"} 1 1395066363000 +container_misc_usage{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",resource="res_b",zone_name="hello"} 2 1395066363000 +# HELP container_misc_events Number of times the usage for the resource was about to go over the max boundary +# TYPE container_misc_events counter +container_misc_events{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",resource="res_a",zone_name="hello"} 42 1395066363000 +container_misc_events{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",resource="res_b",zone_name="hello"} 42 1395066363000 # HELP container_last_seen Last time a container was seen by the exporter # TYPE container_last_seen gauge container_last_seen{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1.395066363e+09 1395066363000