Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add misc controller metrics #3420

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/cadvisor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ func TestToIncludedMetrics(t *testing.T) {
container.ProcessMetrics: struct{}{},
container.AppMetrics: struct{}{},
container.HugetlbUsageMetrics: struct{}{},
container.MiscMetrics: struct{}{},
container.PerfMetrics: struct{}{},
container.ReferencedMemoryMetrics: struct{}{},
container.CPUTopologyMetrics: struct{}{},
Expand Down
8 changes: 8 additions & 0 deletions container/common/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,14 @@ func getSpecInternal(cgroupPaths map[string]string, machineInfoFactory info.Mach
}
}

// Misc controller
miscRoot, ok := cgroupPaths["misc"]
if ok {
if utils.FileExists(miscRoot) {
spec.HasMisc = true
}
}

// Processes, read it's value from pids path directly
pidsRoot, ok := GetControllerPath(cgroupPaths, "pids", cgroup2UnifiedMode)
if ok {
Expand Down
2 changes: 2 additions & 0 deletions container/common/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ func TestGetSpecCgroupV1(t *testing.T) {
assert.EqualValues(t, spec.Processes.Limit, 1027)

assert.False(t, spec.HasHugetlb)
assert.False(t, spec.HasMisc)
assert.False(t, spec.HasDiskIo)
}

Expand Down Expand Up @@ -168,6 +169,7 @@ func TestGetSpecCgroupV2(t *testing.T) {
assert.EqualValues(t, spec.Processes.Limit, 1027)

assert.False(t, spec.HasHugetlb)
assert.False(t, spec.HasMisc)
assert.True(t, spec.HasDiskIo)
}

Expand Down
2 changes: 2 additions & 0 deletions container/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ const (
AppMetrics MetricKind = "app"
ProcessMetrics MetricKind = "process"
HugetlbUsageMetrics MetricKind = "hugetlb"
MiscMetrics MetricKind = "misc"
PerfMetrics MetricKind = "perf_event"
ReferencedMemoryMetrics MetricKind = "referenced_memory"
CPUTopologyMetrics MetricKind = "cpu_topology"
Expand All @@ -85,6 +86,7 @@ var AllMetrics = MetricSet{
ProcessMetrics: struct{}{},
AppMetrics: struct{}{},
HugetlbUsageMetrics: struct{}{},
MiscMetrics: struct{}{},
PerfMetrics: struct{}{},
ReferencedMemoryMetrics: struct{}{},
CPUTopologyMetrics: struct{}{},
Expand Down
13 changes: 13 additions & 0 deletions container/libcontainer/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,16 @@ func setHugepageStats(s *cgroups.Stats, ret *info.ContainerStats) {
}
}

func setMiscStats(s *cgroups.Stats, ret *info.ContainerStats) {
ret.Misc = make(map[string]info.MiscStats)
for k, v := range s.MiscStats {
ret.Misc[k] = info.MiscStats{
Usage: v.Usage,
Events: v.Events,
}
}
}

// read from pids path not cpu
func setThreadsStats(s *cgroups.Stats, ret *info.ContainerStats) {
if s != nil {
Expand All @@ -909,6 +919,9 @@ func newContainerStats(cgroupStats *cgroups.Stats, includedMetrics container.Met
if includedMetrics.Has(container.HugetlbUsageMetrics) {
setHugepageStats(s, ret)
}
if includedMetrics.Has(container.MiscMetrics) {
setMiscStats(s, ret)
}
if includedMetrics.Has(container.CPUSetMetrics) {
setCPUSetStats(s, ret)
}
Expand Down
1 change: 1 addition & 0 deletions container/libcontainer/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ var supportedSubsystems = map[string]container.MetricKind{
"cpuacct": container.CpuUsageMetrics,
"memory": container.MemoryUsageMetrics,
"hugetlb": container.HugetlbUsageMetrics,
"misc": container.MiscMetrics,
"pids": container.ProcessMetrics,
"cpuset": container.CPUSetMetrics,
"blkio": container.DiskIOMetrics,
Expand Down
4 changes: 2 additions & 2 deletions docs/runtime_options.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ cAdvisor stores the latest historical data in memory. How long of a history it s
--application_metrics_count_limit=100: Max number of application metrics to store (per container) (default 100)
--collector_cert="": Collector's certificate, exposed to endpoints for certificate based authentication.
--collector_key="": Key for the collector's certificate
--disable_metrics=<metrics>: comma-separated list of metrics to be disabled. Options are advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp. (default advtcp,cpu_topology,cpuset,hugetlb,memory_numa,process,referenced_memory,resctrl,sched,tcp,udp)
--enable_metrics=<metrics>: comma-separated list of metrics to be enabled. If set, overrides 'disable_metrics'. Options are advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp.
--disable_metrics=<metrics>: comma-separated list of metrics to be disabled. Options are advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,misc,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp. (default advtcp,cpu_topology,cpuset,hugetlb,memory_numa,misc,process,referenced_memory,resctrl,sched,tcp,udp)
--enable_metrics=<metrics>: comma-separated list of metrics to be enabled. If set, overrides 'disable_metrics'. Options are advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,misc,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp.
--prometheus_endpoint="/metrics": Endpoint to expose Prometheus metrics on (default "/metrics")
--disable_root_cgroup_stats=false: Disable collecting root Cgroup stats
```
Expand Down
2 changes: 2 additions & 0 deletions docs/storage/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ Metric name | Type | Description | Unit (where applicable) | option parameter |
`container_hugetlb_failcnt` | Counter | Number of hugepage usage hits limits | | hugetlb |
`container_hugetlb_max_usage_bytes` | Gauge | Maximum hugepage usages recorded | bytes | hugetlb |
`container_hugetlb_usage_bytes` | Gauge | Current hugepage usage | bytes | hugetlb |
`container_misc_usage` | Gauge | Current usage of the misc scalar resource specified by the label | | misc |
`container_misc_events` | Counter | Number of times the usage for the misc scalar resource specified by the label was about to go over the max boundary | | misc |
`container_last_seen` | Gauge | Last time a container was seen by the exporter | timestamp | - |
`container_llc_occupancy_bytes` | Gauge | Last level cache usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM). | bytes | resctrl |
`container_memory_bandwidth_bytes` | Gauge | Total memory bandwidth usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM). | bytes | resctrl |
Expand Down
16 changes: 16 additions & 0 deletions info/v1/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ type ContainerSpec struct {

HasHugetlb bool `json:"has_hugetlb"`

HasMisc bool `json:"has_misc"`

HasNetwork bool `json:"has_network"`

HasProcesses bool `json:"has_processes"`
Expand Down Expand Up @@ -205,6 +207,9 @@ func (s *ContainerSpec) Eq(b *ContainerSpec) bool {
if s.HasHugetlb != b.HasHugetlb {
return false
}
if s.HasMisc != b.HasMisc {
return false
}
if s.HasNetwork != b.HasNetwork {
return false
}
Expand Down Expand Up @@ -364,6 +369,13 @@ type HugetlbStats struct {
Failcnt uint64 `json:"failcnt"`
}

type MiscStats struct {
// current resource usage for a key in misc
Usage uint64 `json:"usage,omitempty"`
// number of times the resource was about to go over the max boundary
Events uint64 `json:"events,omitempty"`
}

type MemoryStats struct {
// Current memory usage, this includes all memory regardless of when it was
// accessed.
Expand Down Expand Up @@ -947,6 +959,7 @@ type ContainerStats struct {
DiskIo DiskIoStats `json:"diskio,omitempty"`
Memory MemoryStats `json:"memory,omitempty"`
Hugetlb map[string]HugetlbStats `json:"hugetlb,omitempty"`
Misc map[string]MiscStats `json:"misc,omitempty"`
Network NetworkStats `json:"network,omitempty"`
// Filesystem statistics
Filesystem []FsStats `json:"filesystem,omitempty"`
Expand Down Expand Up @@ -1016,6 +1029,9 @@ func (a *ContainerStats) StatsEq(b *ContainerStats) bool {
if !reflect.DeepEqual(a.Hugetlb, b.Hugetlb) {
return false
}
if !reflect.DeepEqual(a.Misc, b.Misc) {
return false
}
if !reflect.DeepEqual(a.DiskIo, b.DiskIo) {
return false
}
Expand Down
4 changes: 4 additions & 0 deletions info/v2/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ type ContainerSpec struct {

HasHugetlb bool `json:"has_hugetlb"`

HasMisc bool `json:"has_misc"`

HasCustomMetrics bool `json:"has_custom_metrics"`
CustomMetrics []v1.MetricSpec `json:"custom_metrics,omitempty"`

Expand Down Expand Up @@ -163,6 +165,8 @@ type ContainerStats struct {
Memory *v1.MemoryStats `json:"memory,omitempty"`
// Hugepage statistics
Hugetlb *map[string]v1.HugetlbStats `json:"hugetlb,omitempty"`
// Misc statistics
Misc *map[string]v1.MiscStats `json:"misc,omitempty"`
// Network statistics
Network *NetworkStats `json:"network,omitempty"`
// Processes statistics
Expand Down
4 changes: 4 additions & 0 deletions info/v2/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ func ContainerStatsFromV1(containerName string, spec *v1.ContainerSpec, stats []
if spec.HasHugetlb {
stat.Hugetlb = &val.Hugetlb
}
if spec.HasMisc {
stat.Misc = &val.Misc
}
if spec.HasNetwork {
// TODO: Handle TcpStats
stat.Network = &NetworkStats{
Expand Down Expand Up @@ -288,6 +291,7 @@ func ContainerSpecFromV1(specV1 *v1.ContainerSpec, aliases []string, namespace s
HasCpu: specV1.HasCpu,
HasMemory: specV1.HasMemory,
HasHugetlb: specV1.HasHugetlb,
HasMisc: specV1.HasMisc,
HasFilesystem: specV1.HasFilesystem,
HasNetwork: specV1.HasNetwork,
HasProcesses: specV1.HasProcesses,
Expand Down
4 changes: 4 additions & 0 deletions info/v2/conversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ func TestContainerSpecFromV1(t *testing.T) {
SwapLimit: 8192,
},
HasHugetlb: true,
HasMisc: true,
HasNetwork: true,
HasProcesses: true,
HasFilesystem: true,
Expand Down Expand Up @@ -82,6 +83,7 @@ func TestContainerSpecFromV1(t *testing.T) {
SwapLimit: 8192,
},
HasHugetlb: true,
HasMisc: true,
HasNetwork: true,
HasProcesses: true,
HasFilesystem: true,
Expand Down Expand Up @@ -121,6 +123,7 @@ func TestContainerStatsFromV1(t *testing.T) {
SwapLimit: 8192,
},
HasHugetlb: true,
HasMisc: true,
HasNetwork: true,
HasProcesses: true,
HasFilesystem: true,
Expand Down Expand Up @@ -263,6 +266,7 @@ func TestContainerStatsFromV1(t *testing.T) {
DiskIo: &v1Stats.DiskIo,
Memory: &v1Stats.Memory,
Hugetlb: &v1Stats.Hugetlb,
Misc: &v1Stats.Misc,
Processes: &v1Stats.Processes,
Network: &NetworkStats{
Interfaces: v1Stats.Network.Interfaces,
Expand Down
37 changes: 37 additions & 0 deletions metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,43 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
},
}...)
}
if includedMetrics.Has(container.MiscMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_misc_usage",
help: "Current usage of the misc scalar resource specified by the label",
valueType: prometheus.GaugeValue,
extraLabels: []string{"resource"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.Misc))
for k, v := range s.Misc {
values = append(values, metricValue{
value: float64(v.Usage),
labels: []string{k},
timestamp: s.Timestamp,
})
}
return values
},
}, {
name: "container_misc_events",
help: "Number of times the usage for the misc scalar resource specified by the label was about to go over the max boundary",
valueType: prometheus.CounterValue,
extraLabels: []string{"resource"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.Misc))
for k, v := range s.Misc {
values = append(values, metricValue{
value: float64(v.Events),
labels: []string{k},
timestamp: s.Timestamp,
})
}
return values
},
Comment on lines +378 to +404
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This same anon function is duplicated also for s.HugeTlb. Maybe it would make sense to split it into a generic (template) helper function?

},
}...)
}
if includedMetrics.Has(container.MemoryUsageMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
Expand Down
10 changes: 10 additions & 0 deletions metrics/prometheus_fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,16 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req
Failcnt: 0,
},
},
Misc: map[string]info.MiscStats{
"res_a": {
Usage: 1,
Events: 42,
},
"res_b": {
Usage: 2,
Events: 42,
},
},
Network: info.NetworkStats{
InterfaceStats: info.InterfaceStats{
Name: "eth0",
Expand Down
8 changes: 8 additions & 0 deletions metrics/testdata/prometheus_metrics
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ container_hugetlb_max_usage_bytes{container_env_foo_env="prod",container_label_f
# TYPE container_hugetlb_usage_bytes gauge
container_hugetlb_usage_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",pagesize="1Gi",zone_name="hello"} 0 1395066363000
container_hugetlb_usage_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",pagesize="2Mi",zone_name="hello"} 4 1395066363000
# HELP container_misc_usage Current usage of the resource
# TYPE container_misc_usage gauge
container_misc_usage{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",resource="res_a",zone_name="hello"} 1 1395066363000
container_misc_usage{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",resource="res_b",zone_name="hello"} 2 1395066363000
# HELP container_misc_events Number of times the usage for the resource was about to go over the max boundary
# TYPE container_misc_events counter
container_misc_events{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",resource="res_a",zone_name="hello"} 42 1395066363000
container_misc_events{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",resource="res_b",zone_name="hello"} 42 1395066363000
# HELP container_last_seen Last time a container was seen by the exporter
# TYPE container_last_seen gauge
container_last_seen{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1.395066363e+09 1395066363000
Expand Down
8 changes: 8 additions & 0 deletions metrics/testdata/prometheus_metrics_whitelist_filtered
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ container_hugetlb_max_usage_bytes{container_env_foo_env="prod",id="testcontainer
# TYPE container_hugetlb_usage_bytes gauge
container_hugetlb_usage_bytes{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",pagesize="1Gi",zone_name="hello"} 0 1395066363000
container_hugetlb_usage_bytes{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",pagesize="2Mi",zone_name="hello"} 4 1395066363000
# HELP container_misc_usage Current usage of the resource
# TYPE container_misc_usage gauge
container_misc_usage{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",resource="res_a",zone_name="hello"} 1 1395066363000
container_misc_usage{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",resource="res_b",zone_name="hello"} 2 1395066363000
# HELP container_misc_events Number of times the usage for the resource was about to go over the max boundary
# TYPE container_misc_events counter
container_misc_events{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",resource="res_a",zone_name="hello"} 42 1395066363000
container_misc_events{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",resource="res_b",zone_name="hello"} 42 1395066363000
# HELP container_last_seen Last time a container was seen by the exporter
# TYPE container_last_seen gauge
container_last_seen{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1.395066363e+09 1395066363000
Expand Down