From 95c489e72ddc81dd62bf44788db9c707148a82e4 Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Fri, 3 Sep 2021 18:25:19 +0500 Subject: [PATCH] OOM kill tracking (#252) --- includes/bpf_helpers.h | 2 ++ includes/netdata_ebpf.h | 3 ++- includes/netdata_oomkill.h | 16 ++++++++++++++++ kernel/Makefile | 1 + kernel/README.md | 1 + kernel/oomkill_kern.c | 29 +++++++++++++++++++++++++++++ 6 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 includes/netdata_oomkill.h create mode 100644 kernel/oomkill_kern.c diff --git a/includes/bpf_helpers.h b/includes/bpf_helpers.h index ead9c51d..c6e83597 100644 --- a/includes/bpf_helpers.h +++ b/includes/bpf_helpers.h @@ -21,6 +21,8 @@ static int (*bpf_map_delete_elem)(void *map, void *key) = (void *) BPF_FUNC_map_delete_elem; static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = (void *) BPF_FUNC_probe_read; +static int (*bpf_probe_read_str)(void *dst, int size, void *unsafe_ptr) = + (void *)BPF_FUNC_probe_read_str; static unsigned long long (*bpf_ktime_get_ns)(void) = (void *) BPF_FUNC_ktime_get_ns; static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = diff --git a/includes/netdata_ebpf.h b/includes/netdata_ebpf.h index 4f5706cc..655cd1cf 100644 --- a/includes/netdata_ebpf.h +++ b/includes/netdata_ebpf.h @@ -19,6 +19,7 @@ This header has the common definitions for all `.c` files. #include "netdata_fs.h" #include "netdata_hardirq.h" #include "netdata_mount.h" +#include "netdata_oomkill.h" #include "netdata_process.h" #include "netdata_socket.h" #include "netdata_softirq.h" @@ -115,7 +116,7 @@ static inline void libnetdata_update_u32(u32 *res, u32 value) return; __sync_fetch_and_add(res, value); - if ( (0xFFFFFFFFFFFFFFFF - *res) <= value) { + if ( (0xFFFFFFFF - *res) <= value) { *res = value; } } diff --git a/includes/netdata_oomkill.h b/includes/netdata_oomkill.h new file mode 100644 index 00000000..68e82c82 --- /dev/null +++ b/includes/netdata_oomkill.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef _NETDATA_OOMKILL_H_ +#define _NETDATA_OOMKILL_H_ 1 + +// to try and only use 4096 bytes in the map and no more given 4 byte keys & 1 +// byte values, we choose a very small number. +#define NETDATA_OOMKILL_MAX_ENTRIES 64 + +// /sys/kernel/debug/tracing/events/oom/mark_victim/ +struct netdata_oom_mark_victim_entry { + u64 pad; // This is not used with eBPF + int pid; // offset:8; size:4; signed:1; +}; + +#endif /* _NETDATA_OOMKILL_H_ */ diff --git a/kernel/Makefile b/kernel/Makefile index cff026b7..30238def 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -44,6 +44,7 @@ NETDATA_APPS= btrfs \ mount \ msync \ nfs \ + oomkill \ process \ socket \ softirq \ diff --git a/kernel/README.md b/kernel/README.md index 6321b33c..aa476c21 100644 --- a/kernel/README.md +++ b/kernel/README.md @@ -26,6 +26,7 @@ Right now we have the following `eBPF` program collectors: - `mount_kern.c` : monitor calls for syscalls `mount` and `umount`. - `msync_kern.c` : monitor calls for syscall `msync`. - `nfs_kern.c` : provides nfs monitoring. +- `oomkill_kern.c` : provides info on which processes got OOM killed. - `process_kern.c` : provides process, file and VFS stats. - `socket_kern.c` : provides network stats; - `softirq_kern.c` : provides software interrupt (soft IRQ) latency monitoring. diff --git a/kernel/oomkill_kern.c b/kernel/oomkill_kern.c new file mode 100644 index 00000000..86fcc661 --- /dev/null +++ b/kernel/oomkill_kern.c @@ -0,0 +1,29 @@ +#define KBUILD_MODNAME "oomkill_netdata" +#include +#include +#include +#include + +#include "bpf_helpers.h" +#include "netdata_ebpf.h" + +struct bpf_map_def SEC("maps") tbl_oomkill = { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0)) + .type = BPF_MAP_TYPE_HASH, +#else + .type = BPF_MAP_TYPE_PERCPU_HASH, +#endif + .key_size = sizeof(int), + .value_size = sizeof(__u8), + .max_entries = NETDATA_OOMKILL_MAX_ENTRIES +}; + +SEC("tracepoint/oom/mark_victim") +int netdata_oom_mark_victim(struct netdata_oom_mark_victim_entry *ptr) { + int key = ptr->pid; + u8 val = 0; + bpf_map_update_elem(&tbl_oomkill, &key, &val, BPF_ANY); + return 0; +} + +char _license[] SEC("license") = "GPL";