Skip to content

Commit

Permalink
Add BPF-based processes_events table on Linux.
Browse files Browse the repository at this point in the history
This is still experimental and may require further tuning to improve
the data being returned.
  • Loading branch information
rsmmr committed Jan 12, 2024
1 parent 327e8ae commit db68783
Show file tree
Hide file tree
Showing 10 changed files with 361 additions and 6 deletions.
1 change: 1 addition & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ jobs:
ZEEK_VERSION: ${{ matrix.zeek.version }}
ZEEK_TAG: ${{ matrix.zeek.tag }}
ZEEK_AGENT_CONFIGURE_ADDL: ${{ matrix.configure }}
LD_LIBRARY_PATH: /usr/lib/llvm-17/lib/clang/17/lib/linux

steps:
- name: Prepare
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021 by the Zeek Project. See LICENSE for details.
# Copyright (c) 2021-2024 by the Zeek Project. See LICENSE for details.

cmake_minimum_required(VERSION 3.15.1)

Expand Down
2 changes: 1 addition & 1 deletion auxil/run-clang-tidy
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /bin/sh
#
# Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
# Copyright (c) 2021-2024 by the Zeek Project. See LICENSE for details.

usage() {
echo "Usage: $(basename $0) [--fixit] [-j <proc>] [--clang-tidy-path <clang-tidy>] [--clang-tidy-arg <addl-arg>] <build-directory> [<files>]"
Expand Down
2 changes: 1 addition & 1 deletion cmake/CheckCompiler.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021 by the Zeek Project. See LICENSE for details.
# Copyright (c) 2021-2024 by the Zeek Project. See LICENSE for details.
#
# Adapted from Zeek.

Expand Down
2 changes: 1 addition & 1 deletion cmake/Util.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021 by the Zeek Project. See LICENSE for details.
# Copyright (c) 2021-2024 by the Zeek Project. See LICENSE for details.
#
# A collection of small helpers for the Zeek Agent build system.

Expand Down
1 change: 1 addition & 0 deletions src/tables/processes/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ if ( HAVE_DARWIN )
endif ()

if ( HAVE_LINUX )
generate_bpf_code(zeek-agent processes processes.linux.bpf.c)
target_sources(zeek-agent PRIVATE processes.linux.cc)
target_link_libraries(zeek-agent PRIVATE pfs)
endif ()
Expand Down
3 changes: 2 additions & 1 deletion src/tables/processes/processes.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class ProcessesEventsCommon : public EventTable {
.description = R"(
The table reports processes starting and stopping on the endpoint.
)",
.platforms = { Platform::Darwin },
.platforms = { Platform::Darwin, Platform::Linux },
.columns = {
{.name = "time", .type = value::Type::Time, .summary = "timestamp"},
{.name = "name", .type = value::Type::Text, .summary = "name of process"},
Expand All @@ -70,4 +70,5 @@ class ProcessesEventsCommon : public EventTable {
};
}
};

} // namespace zeek::agent::table
228 changes: 228 additions & 0 deletions src/tables/processes/processes.linux.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
// Copyright (c) 2021 by the Zeek Project. See LICENSE for details.
//
// TODO: - Expire map state on inactivity.
// - This isn't capture all processes yet I believe (see TODO on empty names below; maybe more).
// - Is our collection of executions times and memory usage correct for multiple threads? Do we need to aggregate?

#include "processes.linux.event.h"

// clang-format off
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/bpf_perf_event.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
// clang-format on

#include <string.h>

#include <sys/types.h>

char LICENSE[] SEC("license") = "Dual BSD/GPL"; // don't change; must be a license known by kernel

// Ringer buffer for passing events to user land.
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 256 * 1024);
} ring_buffer SEC(".maps");

// State maintained in map during process' lifetime.
struct bpfProcess {
__s64 start_time;
struct bpfProcessEvent event; // current event, filled out as much as possible
};

// Table tracking active processes.
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, const void*); // process key
__type(value, struct bpfProcess);
__uint(max_entries, 1000);
} process_table SEC(".maps");

static struct bpfProcess* startNewProcess(const void* key, int seed_name) {
struct bpfProcess process;
bzero(&process, sizeof(process));

process.start_time = (__s64)bpf_ktime_get_boot_ns();

// Pre-fill event information that we got here. May be updated later.
process.event.pid = (bpf_get_current_pid_tgid() >> 32);
process.event.uid = (bpf_get_current_uid_gid() & 0xffffffff);
process.event.gid = (bpf_get_current_uid_gid() >> 32);
process.event.state = BPF_PROCESS_STATE_UNKNOWN;

if ( seed_name ) {
// Pre-seed with the current process name.
char name[BPF_PROCESS_NAME_MAX];
bpf_get_current_comm(process.event.name, BPF_PROCESS_NAME_MAX);
}

bpf_map_update_elem(&process_table, &key, &process, BPF_ANY);
return bpf_map_lookup_elem(&process_table, &key);
}

static void removeProcess(const void* key) { bpf_map_delete_elem(&process_table, &key); }
static struct bpfProcess* lookupProcess(const void* key) { return bpf_map_lookup_elem(&process_table, &key); }

// From vmlinux.
typedef struct {
uid_t val;
} kuid_t;

// From vmlinux.
typedef struct {
gid_t val;
} kgid_t;

// From vmlinux.
struct cred {
kuid_t uid;
kgid_t gid;
kuid_t suid;
kgid_t sgid;
kuid_t euid;
kgid_t egid;
kuid_t fsuid;
kgid_t fsgid;
};

// From vmlinux.
typedef struct {
__s64 counter;
} atomic64_t;

// From vmlinux.
typedef atomic64_t atomic_long_t;

// From vmlinux.
enum {
MM_FILEPAGES, /* Resident file mapping pages */
MM_ANONPAGES, /* Resident anonymous pages */
MM_SWAPENTS, /* Anonymous swap entries */
MM_SHMEMPAGES, /* Resident shared memory pages */
NR_MM_COUNTERS
};

// From vmlinux.
struct mm_rss_stat {
atomic_long_t count[4];
};

// From vmlinux.
//
// Accessed through CO-RE, so only declaring fields we need.
struct mm_struct {
struct {
struct mm_rss_stat rss_stat;
long unsigned int total_vm;
};
// ...
};

// From vmlinux.
//
// Accessed through CO-RE, so only declaring fields we need.
struct task_struct {
pid_t pid;
pid_t tgid;
int prio;
const struct cred* cred;
const struct cred* real_cred;
struct task_struct* real_parent;
__u64 utime; // in nsecs since 4.11.0
__u64 stime; // in nsecs since 4.11.0
struct mm_struct* mm;
// ...
};

static void sendProcessEvent(struct bpfProcess* process, struct task_struct* task, enum bpfProcessState state) {
struct bpfProcessEvent* ev = bpf_ringbuf_reserve(&ring_buffer, sizeof(struct bpfProcessEvent), 0);
if ( ! ev )
return; // no space (TODO: log)

if ( process )
memcpy(ev, &process->event, sizeof(*ev));
else
bzero(ev, sizeof(*ev));

ev->uid = BPF_CORE_READ(task, cred, euid.val);
ev->gid = BPF_CORE_READ(task, cred, egid.val);
ev->life_time = (__s64)(process->start_time >= 0 ? (bpf_ktime_get_boot_ns() - process->start_time) : -1);
ev->ruid = BPF_CORE_READ(task, cred, uid.val);
ev->rgid = BPF_CORE_READ(task, cred, gid.val);
ev->ppid = BPF_CORE_READ(task, real_parent, pid);
ev->priority = BPF_CORE_READ(task, prio);

ev->utime = BPF_CORE_READ(task, utime);
ev->stime = BPF_CORE_READ(task, stime);

// This follows:
// https://elixir.bootlin.com/linux/v5.8/source/fs/proc/task_mmu.c#L82,
// which is what /proc/<PID>/stat uses as well.
__s64 file_pages = BPF_CORE_READ(task, mm, rss_stat.count[MM_FILEPAGES].counter);
__s64 shmem_pages = BPF_CORE_READ(task, mm, rss_stat.count[MM_SHMEMPAGES].counter);
__s64 anon_pages = BPF_CORE_READ(task, mm, rss_stat.count[MM_ANONPAGES].counter);
ev->rsize = (file_pages + shmem_pages + anon_pages);
ev->vsize = BPF_CORE_READ(task, mm, total_vm);

ev->state = state;
bpf_ringbuf_submit(ev, 0);
}

SEC("ksyscall/execve")
int BPF_KSYSCALL(execve, const char* filename, const char* const* argv, const char* const* envp) {
struct task_struct* task = (struct task_struct*)bpf_get_current_task();

char name[BPF_PROCESS_NAME_MAX];
long name_len = bpf_probe_read_user_str(name, sizeof(name), filename);

// TODO: An empty name means reading from an FD I believe. Not sure how to
// handle that, ignoring for now.
if ( name_len <= 0 )
return 0;

struct bpfProcess* process = lookupProcess(task);
if ( ! process )
process = startNewProcess(task, 0);

if ( ! process )
return 0; // make verifier happy

if ( name_len > 1 )
bpf_probe_read_user_str(process->event.name, sizeof(process->event.name), filename);

return 0;
}

SEC("kretsyscall/execve")
int BPF_KSYSCALL(execve_ret, int rc) {
struct task_struct* task = (struct task_struct*)bpf_get_current_task();

struct bpfProcess* process = lookupProcess(task);
if ( process )
sendProcessEvent(process, task, BPF_PROCESS_STATE_STARTED);

return 0;
}


SEC("kprobe/do_exit")
int BPF_KPROBE(do_exit, long code) {
struct task_struct* task = (struct task_struct*)bpf_get_current_task();

struct bpfProcess* process = lookupProcess(task);
if ( ! process ) {
// Missed the beginning, create a temporary process with whatever information we have.
process = startNewProcess(task, 1);
if ( ! process )
return 0; // make verifier happy

process->start_time = -1;
}

sendProcessEvent(process, task, BPF_PROCESS_STATE_STOPPED);
removeProcess(task);

return 0;
}
Loading

0 comments on commit db68783

Please sign in to comment.