diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d7217c0b..d96ee469 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -14,6 +14,7 @@ jobs: ZEEK_VERSION: ${{ matrix.zeek.version }} ZEEK_TAG: ${{ matrix.zeek.tag }} ZEEK_AGENT_CONFIGURE_ADDL: ${{ matrix.configure }} + LD_LIBRARY_PATH: /usr/lib/llvm-17/lib/clang/17/lib/linux steps: - name: Prepare diff --git a/CMakeLists.txt b/CMakeLists.txt index 8277f158..397ace2e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2021 by the Zeek Project. See LICENSE for details. +# Copyright (c) 2021-2024 by the Zeek Project. See LICENSE for details. cmake_minimum_required(VERSION 3.15.1) diff --git a/auxil/run-clang-tidy b/auxil/run-clang-tidy index b3c1610a..7fa5c234 100755 --- a/auxil/run-clang-tidy +++ b/auxil/run-clang-tidy @@ -1,6 +1,6 @@ #! /bin/sh # -# Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details. +# Copyright (c) 2021-2024 by the Zeek Project. See LICENSE for details. usage() { echo "Usage: $(basename $0) [--fixit] [-j ] [--clang-tidy-path ] [--clang-tidy-arg ] []" diff --git a/cmake/CheckCompiler.cmake b/cmake/CheckCompiler.cmake index 63b4a083..c41d766b 100644 --- a/cmake/CheckCompiler.cmake +++ b/cmake/CheckCompiler.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2021 by the Zeek Project. See LICENSE for details. +# Copyright (c) 2021-2024 by the Zeek Project. See LICENSE for details. # # Adapted from Zeek. diff --git a/cmake/Util.cmake b/cmake/Util.cmake index cfb41e49..06833a75 100644 --- a/cmake/Util.cmake +++ b/cmake/Util.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2021 by the Zeek Project. See LICENSE for details. +# Copyright (c) 2021-2024 by the Zeek Project. See LICENSE for details. # # A collection of small helpers for the Zeek Agent build system. diff --git a/src/tables/processes/CMakeLists.txt b/src/tables/processes/CMakeLists.txt index a4a321c2..a2ab6685 100644 --- a/src/tables/processes/CMakeLists.txt +++ b/src/tables/processes/CMakeLists.txt @@ -7,6 +7,7 @@ if ( HAVE_DARWIN ) endif () if ( HAVE_LINUX ) + generate_bpf_code(zeek-agent processes processes.linux.bpf.c) target_sources(zeek-agent PRIVATE processes.linux.cc) target_link_libraries(zeek-agent PRIVATE pfs) endif () diff --git a/src/tables/processes/processes.h b/src/tables/processes/processes.h index 545ec761..74bc02d1 100644 --- a/src/tables/processes/processes.h +++ b/src/tables/processes/processes.h @@ -48,7 +48,7 @@ class ProcessesEventsCommon : public EventTable { .description = R"( The table reports processes starting and stopping on the endpoint. )", - .platforms = { Platform::Darwin }, + .platforms = { Platform::Darwin, Platform::Linux }, .columns = { {.name = "time", .type = value::Type::Time, .summary = "timestamp"}, {.name = "name", .type = value::Type::Text, .summary = "name of process"}, @@ -70,4 +70,5 @@ class ProcessesEventsCommon : public EventTable { }; } }; + } // namespace zeek::agent::table diff --git a/src/tables/processes/processes.linux.bpf.c b/src/tables/processes/processes.linux.bpf.c new file mode 100644 index 00000000..82a41d9c --- /dev/null +++ b/src/tables/processes/processes.linux.bpf.c @@ -0,0 +1,228 @@ +// Copyright (c) 2021 by the Zeek Project. See LICENSE for details. +// +// TODO: - Expire map state on inactivity. +// - This isn't capture all processes yet I believe (see TODO on empty names below; maybe more). +// - Is our collection of executions times and memory usage correct for multiple threads? Do we need to aggregate? + +#include "processes.linux.event.h" + +// clang-format off +#include +#include +#include +#include +#include +// clang-format on + +#include + +#include + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; // don't change; must be a license known by kernel + +// Ringer buffer for passing events to user land. +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); +} ring_buffer SEC(".maps"); + +// State maintained in map during process' lifetime. +struct bpfProcess { + __s64 start_time; + struct bpfProcessEvent event; // current event, filled out as much as possible +}; + +// Table tracking active processes. +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, const void*); // process key + __type(value, struct bpfProcess); + __uint(max_entries, 1000); +} process_table SEC(".maps"); + +static struct bpfProcess* startNewProcess(const void* key, int seed_name) { + struct bpfProcess process; + bzero(&process, sizeof(process)); + + process.start_time = (__s64)bpf_ktime_get_boot_ns(); + + // Pre-fill event information that we got here. May be updated later. + process.event.pid = (bpf_get_current_pid_tgid() >> 32); + process.event.uid = (bpf_get_current_uid_gid() & 0xffffffff); + process.event.gid = (bpf_get_current_uid_gid() >> 32); + process.event.state = BPF_PROCESS_STATE_UNKNOWN; + + if ( seed_name ) { + // Pre-seed with the current process name. + char name[BPF_PROCESS_NAME_MAX]; + bpf_get_current_comm(process.event.name, BPF_PROCESS_NAME_MAX); + } + + bpf_map_update_elem(&process_table, &key, &process, BPF_ANY); + return bpf_map_lookup_elem(&process_table, &key); +} + +static void removeProcess(const void* key) { bpf_map_delete_elem(&process_table, &key); } +static struct bpfProcess* lookupProcess(const void* key) { return bpf_map_lookup_elem(&process_table, &key); } + +// From vmlinux. +typedef struct { + uid_t val; +} kuid_t; + +// From vmlinux. +typedef struct { + gid_t val; +} kgid_t; + +// From vmlinux. +struct cred { + kuid_t uid; + kgid_t gid; + kuid_t suid; + kgid_t sgid; + kuid_t euid; + kgid_t egid; + kuid_t fsuid; + kgid_t fsgid; +}; + +// From vmlinux. +typedef struct { + __s64 counter; +} atomic64_t; + +// From vmlinux. +typedef atomic64_t atomic_long_t; + +// From vmlinux. +enum { + MM_FILEPAGES, /* Resident file mapping pages */ + MM_ANONPAGES, /* Resident anonymous pages */ + MM_SWAPENTS, /* Anonymous swap entries */ + MM_SHMEMPAGES, /* Resident shared memory pages */ + NR_MM_COUNTERS +}; + +// From vmlinux. +struct mm_rss_stat { + atomic_long_t count[4]; +}; + +// From vmlinux. +// +// Accessed through CO-RE, so only declaring fields we need. +struct mm_struct { + struct { + struct mm_rss_stat rss_stat; + long unsigned int total_vm; + }; + // ... +}; + +// From vmlinux. +// +// Accessed through CO-RE, so only declaring fields we need. +struct task_struct { + pid_t pid; + pid_t tgid; + int prio; + const struct cred* cred; + const struct cred* real_cred; + struct task_struct* real_parent; + __u64 utime; // in nsecs since 4.11.0 + __u64 stime; // in nsecs since 4.11.0 + struct mm_struct* mm; + // ... +}; + +static void sendProcessEvent(struct bpfProcess* process, struct task_struct* task, enum bpfProcessState state) { + struct bpfProcessEvent* ev = bpf_ringbuf_reserve(&ring_buffer, sizeof(struct bpfProcessEvent), 0); + if ( ! ev ) + return; // no space (TODO: log) + + if ( process ) + memcpy(ev, &process->event, sizeof(*ev)); + else + bzero(ev, sizeof(*ev)); + + ev->uid = BPF_CORE_READ(task, cred, euid.val); + ev->gid = BPF_CORE_READ(task, cred, egid.val); + ev->life_time = (__s64)(process->start_time >= 0 ? (bpf_ktime_get_boot_ns() - process->start_time) : -1); + ev->ruid = BPF_CORE_READ(task, cred, uid.val); + ev->rgid = BPF_CORE_READ(task, cred, gid.val); + ev->ppid = BPF_CORE_READ(task, real_parent, pid); + ev->priority = BPF_CORE_READ(task, prio); + + ev->utime = BPF_CORE_READ(task, utime); + ev->stime = BPF_CORE_READ(task, stime); + + // This follows: + // https://elixir.bootlin.com/linux/v5.8/source/fs/proc/task_mmu.c#L82, + // which is what /proc//stat uses as well. + __s64 file_pages = BPF_CORE_READ(task, mm, rss_stat.count[MM_FILEPAGES].counter); + __s64 shmem_pages = BPF_CORE_READ(task, mm, rss_stat.count[MM_SHMEMPAGES].counter); + __s64 anon_pages = BPF_CORE_READ(task, mm, rss_stat.count[MM_ANONPAGES].counter); + ev->rsize = (file_pages + shmem_pages + anon_pages); + ev->vsize = BPF_CORE_READ(task, mm, total_vm); + + ev->state = state; + bpf_ringbuf_submit(ev, 0); +} + +SEC("ksyscall/execve") +int BPF_KSYSCALL(execve, const char* filename, const char* const* argv, const char* const* envp) { + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + char name[BPF_PROCESS_NAME_MAX]; + long name_len = bpf_probe_read_user_str(name, sizeof(name), filename); + + // TODO: An empty name means reading from an FD I believe. Not sure how to + // handle that, ignoring for now. + if ( name_len <= 0 ) + return 0; + + struct bpfProcess* process = lookupProcess(task); + if ( ! process ) + process = startNewProcess(task, 0); + + if ( ! process ) + return 0; // make verifier happy + + if ( name_len > 1 ) + bpf_probe_read_user_str(process->event.name, sizeof(process->event.name), filename); + + return 0; +} + +SEC("kretsyscall/execve") +int BPF_KSYSCALL(execve_ret, int rc) { + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + struct bpfProcess* process = lookupProcess(task); + if ( process ) + sendProcessEvent(process, task, BPF_PROCESS_STATE_STARTED); + + return 0; +} + + +SEC("kprobe/do_exit") +int BPF_KPROBE(do_exit, long code) { + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + struct bpfProcess* process = lookupProcess(task); + if ( ! process ) { + // Missed the beginning, create a temporary process with whatever information we have. + process = startNewProcess(task, 1); + if ( ! process ) + return 0; // make verifier happy + + process->start_time = -1; + } + + sendProcessEvent(process, task, BPF_PROCESS_STATE_STOPPED); + removeProcess(task); + + return 0; +} diff --git a/src/tables/processes/processes.linux.cc b/src/tables/processes/processes.linux.cc index 26399b70..448796e9 100644 --- a/src/tables/processes/processes.linux.cc +++ b/src/tables/processes/processes.linux.cc @@ -7,8 +7,17 @@ #include "core/database.h" #include "core/logger.h" #include "core/table.h" +#include "processes.linux.event.h" #include "util/fmt.h" +// clang-format off +#include "platform/linux/bpf.h" + +#define _Bool bool +#include "autogen/bpf/processes.skel.h" +#undef _Bool +// clang-format on + #include namespace zeek::agent::table { @@ -23,7 +32,7 @@ class ProcessesLinux : public ProcessesCommon { }; namespace { -database::RegisterTable _; +database::RegisterTable _1; } EventTable::Init ProcessesLinux::init() { @@ -71,4 +80,92 @@ std::vector> ProcessesLinux::snapshot(const std::vector _2; +} + +template +Value to_val(const S& i) { + return i ? Value(static_cast(i)) : Value(); +} + + +static int handle_event(void* ctx, void* data, size_t data_sz) { + auto table = reinterpret_cast(ctx); + auto ev = reinterpret_cast(data); + + auto name = (ev->name[0] ? Value(ev->name) : Value()); + auto pid = Value(static_cast(ev->pid)); + auto ppid = Value(static_cast(ev->ppid)); + auto uid = Value(static_cast(ev->uid)); + auto gid = Value(static_cast(ev->gid)); + auto ruid = Value(static_cast(ev->ruid)); + auto rgid = Value(static_cast(ev->rgid)); + auto priority = Value(std::to_string(ev->priority - 100)); // TODO: That's MAX_RT_PRIO, require kernel header? + auto startup = (ev->life_time >= 0 ? Value(to_interval_from_ns(ev->life_time)) : Value()); + auto vsize = Value(static_cast(ev->vsize)); + auto rsize = Value(static_cast(ev->rsize * getpagesize())); + auto utime = Value(to_interval_from_ns(ev->utime)); + auto stime = Value(to_interval_from_ns(ev->stime)); + + Value state; + switch ( ev->state ) { + case BPF_PROCESS_STATE_STARTED: state = "started"; break; + case BPF_PROCESS_STATE_STOPPED: state = "stopped"; break; + case BPF_PROCESS_STATE_UNKNOWN: break; // leave unset + } + + table->newEvent({table->systemTime(), name, pid, ppid, uid, gid, ruid, rgid, priority, startup, vsize, rsize, utime, + stime, state}); + + return 1; +} + +EventTable::Init ProcessesEventsLinux::init() { + auto bpf = platform::linux::bpf(); + if ( ! bpf->isAvailable() ) + return Init::PermanentlyUnavailable; + + auto skel = platform::linux::BPF::Skeleton{.name = "Processes", + .open = reinterpret_cast(processes__open), + .load = reinterpret_cast(processes__load), + .attach = reinterpret_cast(processes__attach), + .detach = reinterpret_cast(processes__detach), + .destroy = reinterpret_cast(processes__destroy), + .event_callback = handle_event, + .event_context = this}; + + auto our_bpf = bpf->load(std::move(skel)); + if ( ! our_bpf ) { + logger()->warn(frmt("could not load BPF program: {}", our_bpf.error())); + return Init::PermanentlyUnavailable; + } + + if ( auto rc = bpf->init("Processes", (*our_bpf)->maps.ring_buffer); ! rc ) { + logger()->warn(frmt("could not initialize BPF program: {}", our_bpf.error())); + return Init::PermanentlyUnavailable; + } + + return Init::Available; +} + +void ProcessesEventsLinux::activate() { + if ( auto rc = platform::linux::bpf()->attach("Processes"); ! rc ) + logger()->error(frmt("could not attach BPF program: {}", rc.error())); +} + +void ProcessesEventsLinux::deactivate() { + if ( auto rc = platform::linux::bpf()->detach("Processes"); ! rc ) + logger()->error(frmt("could not detach BPF program: {}", rc.error())); +} + + } // namespace zeek::agent::table diff --git a/src/tables/processes/processes.linux.event.h b/src/tables/processes/processes.linux.event.h new file mode 100644 index 00000000..00415168 --- /dev/null +++ b/src/tables/processes/processes.linux.event.h @@ -0,0 +1,27 @@ +// Copyright (c) 2021 by the Zeek Project. See LICENSE for details. + +#pragma once + +#include + +#define BPF_PROCESS_NAME_MAX 128 +#define BPF_PROCESS_PRIORITY_MAX 16 + +enum bpfProcessState { BPF_PROCESS_STATE_UNKNOWN = 0, BPF_PROCESS_STATE_STARTED, BPF_PROCESS_STATE_STOPPED }; + +struct bpfProcessEvent { + char name[BPF_PROCESS_NAME_MAX]; + __u64 pid; + __u64 ppid; + __u64 uid; + __u64 gid; + __u64 ruid; + __u64 rgid; + __s64 life_time; // -1 for unknown + int priority; // + MAX_RT_PRIO + __u64 vsize; // bytes + __u64 rsize; // pages + __u64 utime; // nsecs + __u64 stime; // nsecs + enum bpfProcessState state; +};