diff --git a/_drgn.pyi b/_drgn.pyi index 9863725d3..2e7217726 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -453,6 +453,33 @@ class Program: return an :class:`Object` or ``None`` if not found. """ ... + def add_symbol_finder( + self, fn: Callable[[Optional[str], Optional[int], bool], Sequence[Symbol]] + ) -> None: + """ + Register a callback for finding symbols in the program. + + The callback should take three arguments: a search name, a search + address, and a boolean flag 'one' indicating whether to return only + the single best match. When the 'one' flag is True, the callback should + return a list containing at most one :class:`Symbol`. When the flag is + False, the callback should return a list of all matching + :class:`Symbol`\\ s. Both the name and address arguments are optional. + If both are provided, then the result(s) should match both. If neither + are provided, the finder should return all available symbols. If no + result is found, the return should be an empty list. + + Callbacks are called in reverse order of the order they were added + (i.e,, the most recently added callback is called first). When the + 'one' flag is set, the search will short-circuit after the first + finder which returns a result, and subsequent finders will not be + called. Otherwise, all callbacks will be called, and all results will be + returned. + + :param fn: Callable taking name, address, and 'one' flag, and + returning a sequence of :class:`Symbol`\\ s. + """ + ... def set_core_dump(self, path: Union[Path, int]) -> None: """ Set the program to a core dump. @@ -1585,6 +1612,73 @@ class Symbol: kind: Final[SymbolKind] """Kind of entity represented by this symbol.""" +class KallsymsFinder: + """ + A symbol finder which uses vmlinux kallsyms data + """ + + def __init__( + self, + prog: Program, + kallsyms_names: int, + kallsyms_token_table: int, + kallsyms_token_index: int, + kallsyms_num_syms: int, + kallsyms_offsets: int, + kallsyms_relative_base: int, + kallsyms_addresses: int, + _stext: int, + ) -> None: + """ + Manually construct a ``KallsymsFinder`` given all symbol addresses + + .. note:: + + This class should not normally be instantiated manually. See + :func:`drgn.helpers.linux.kallsyms.make_kallsyms_vmlinux_finder` + instead for a way of automatically creating the finder via + information found in the ``VMCOREINFO``. + + The finder is capable of searching the compressed table of symbol names + and addresses stored within kernel memory. It requires + ``CONFIG_KALLSYMS=y`` and ``CONFIG_KALLSYMS_ALL=y`` in your kernel + configuration -- this is common on desktop and server Linux + distributions. However, the quality of symbol information is not + excellent: the :meth:`Symbol.binding` and :meth:`Symbol.kind` values are + inferred from type code information provided by kallsyms which was + originally generated by ``nm(1)``. Further, the :meth:`Symbol.size` is + computed using the offset of the next symbol after it in memory. This + can create some unusual results. + + In order to create a ``KallsymsFinder``, drgn must know the location of + several symbols, which creates a bit of a chicken-and-egg problem. + Thankfully, starting with Linux 6.0, these symbol addresses are included + in the VMCOREINFO note. The required symbols are addresses of variables + in the vmcore: + + - ``kallsyms_names``: an array of compressed symbol name data. + - ``kallsyms_token_table``, ``kallsyms_token_index``: tables used in + decompressing symbol names. + - ``kallsyms_num_syms``: the number of kallsyms symbols + - ``_stext``: the start of the kernel text segment. This symbol addresss + is necessary for verifying decoded kallsyms data. + + Depending on the way that kallsyms is configured (see + ``CONFIG_KALLSYMS_ABSOLUTE_PERCPU`` and + ``CONFIG_KALLSYMS_BASE_RELATIVE``), the following symbols are needed. If + the symbol names are not present, they should be given as zero. + + - ``kallsyms_offsets`` + - ``kallsyms_realtive_base`` + - ``kallsyms_addresses`` + + :param prog: Program to create a finder for + :returns: A callable object suitable to provide to + :meth:`Program.add_symbol_finder()`. + """ + __call__: Callable[[Optional[str], Optional[int], bool], List[Symbol]] + """Lookup symbol by name, address, or both.""" + class SymbolBinding(enum.Enum): """ A ``SymbolBinding`` describes the linkage behavior and visibility of a diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 16c0e65a4..ffbfb379b 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -108,6 +108,7 @@ Symbols .. drgndoc:: Symbol .. drgndoc:: SymbolBinding .. drgndoc:: SymbolKind +.. drgndoc:: KallsymsFinder Stack Traces ------------ diff --git a/drgn/__init__.py b/drgn/__init__.py index 1df95b5fd..64b060be7 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -52,6 +52,7 @@ FaultError, FindObjectFlags, IntegerLike, + KallsymsFinder, Language, MissingDebugInfoError, NoDefaultProgramError, @@ -105,6 +106,7 @@ "FaultError", "FindObjectFlags", "IntegerLike", + "KallsymsFinder", "Language", "MissingDebugInfoError", "NULL", diff --git a/drgn/helpers/linux/kallsyms.py b/drgn/helpers/linux/kallsyms.py new file mode 100644 index 000000000..52d9a04b2 --- /dev/null +++ b/drgn/helpers/linux/kallsyms.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# Copyright (c) 2023 Oracle and/or its affiliates +# SPDX-License-Identifier: LGPL-2.1-or-later +""" +Kallsyms +-------- + +The kallsyms module contains helpers which allow you to use the built-in +kallsyms symbol table for drgn object lookup. Combined with an alternative type +information source, this can enable debugging Linux kernel core dumps without +the corresponding DWARF debuginfo files. +""" +import re +from typing import Dict + +from drgn import KallsymsFinder, Program + +__all__ = ("make_kallsyms_vmlinux_finder",) + + +def _vmcoreinfo_symbols(prog: Program) -> Dict[str, int]: + vmcoreinfo_data = prog["VMCOREINFO"].string_().decode("ascii") + vmcoreinfo_symbols = {} + sym_re = re.compile(r"SYMBOL\(([^)]+)\)=([A-Fa-f0-9]+)") + for line in vmcoreinfo_data.strip().split("\n"): + match = sym_re.fullmatch(line) + if match: + vmcoreinfo_symbols[match.group(1)] = int(match.group(2), 16) + return vmcoreinfo_symbols + + +def make_kallsyms_vmlinux_finder(prog: Program) -> KallsymsFinder: + """ + Create a vmlinux kallsyms finder, which may be passed to + :meth:`drgn.Program.add_symbol_finder`. + + This function automatically finds the necessary information to create a + ``KallsymsFinder`` from the program's VMCOREINFO data. It may fail if the + information is not present. Please note that the debugged Linux kernel must + be 6.0 or later to find this information. + + :returns: a callable symbol finder object + """ + symbol_reqd = [ + "kallsyms_names", + "kallsyms_token_table", + "kallsyms_token_index", + "kallsyms_num_syms", + "kallsyms_offsets", + "kallsyms_relative_base", + "kallsyms_addresses", + "_stext", + ] + symbols = _vmcoreinfo_symbols(prog) + args = [] + for sym in symbol_reqd: + args.append(symbols.get(sym, 0)) + return KallsymsFinder(prog, *args) diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index dfa706374..ce1888259 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -66,6 +66,8 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ helpers.h \ io.c \ io.h \ + kallsyms.c \ + kallsyms.h \ language.c \ language.h \ language_c.c \ @@ -157,6 +159,7 @@ _drgn_la_SOURCES = python/constants.c \ python/drgnpy.h \ python/error.c \ python/helpers.c \ + python/kallsyms_finder.c \ python/language.c \ python/main.c \ python/object.c \ diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index ff30b55af..44f1cd343 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -2024,6 +2024,148 @@ struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, goto out; } +struct elf_symbols_search_arg { + const char *name; + uint64_t address; + enum drgn_find_symbol_flags flags; + struct drgn_error *err; + struct drgn_symbol_result_builder *builder; +}; + +static bool elf_symbol_match(struct elf_symbols_search_arg *arg, GElf_Addr addr, + const GElf_Sym *sym, const char *name) +{ + if ((arg->flags & DRGN_FIND_SYMBOL_NAME) && strcmp(name, arg->name) != 0) + return false; + if ((arg->flags & DRGN_FIND_SYMBOL_ADDR) && + (arg->address < addr || arg->address >= addr + sym->st_size)) + return false; + return true; +} + +static bool elf_symbol_store_match(struct elf_symbols_search_arg *arg, + GElf_Sym *elf_sym, GElf_Addr addr, + const char *name) +{ + struct drgn_symbol *sym; + if (arg->flags == (DRGN_FIND_SYMBOL_ONE | DRGN_FIND_SYMBOL_NAME)) { + int binding = GELF_ST_BIND(elf_sym->st_info); + /* + * The order of precedence is + * GLOBAL = UNIQUE > WEAK > LOCAL = everything else + * + * If we found a global or unique symbol, return it + * immediately. If we found a weak symbol, then save it, + * which may overwrite a previously found weak or local + * symbol. Otherwise, save the symbol only if we haven't + * found another symbol. + */ + if (binding != STB_GLOBAL + && binding != STB_GNU_UNIQUE + && binding != STB_WEAK + && drgn_symbol_result_builder_count(arg->builder) > 0) + return false; + sym = malloc(sizeof(*sym)); + if (!sym) { + arg->err = &drgn_enomem; + return true; + } + drgn_symbol_from_elf(name, addr, elf_sym, sym); + if (!drgn_symbol_result_builder_add(arg->builder, sym)) { + arg->err = &drgn_enomem; + drgn_symbol_destroy(sym); + } + + /* Abort on error, or short-circuit if we found a global or + * unique symbol */ + return (arg->err || sym->binding == DRGN_SYMBOL_BINDING_GLOBAL + || sym->binding == DRGN_SYMBOL_BINDING_UNIQUE); + } else { + sym = malloc(sizeof(*sym)); + if (!sym) { + arg->err = &drgn_enomem; + return true; + } + drgn_symbol_from_elf(name, addr, elf_sym, sym); + if (!drgn_symbol_result_builder_add(arg->builder, sym)) { + arg->err = &drgn_enomem; + drgn_symbol_destroy(sym); + } + /* Abort on error, or short-circuit for single lookup */ + return (arg->err || (arg->flags & DRGN_FIND_SYMBOL_ONE)); + } +} + +static int elf_symbols_search_cb(Dwfl_Module *dwfl_module, void **userdatap, + const char *module_name, Dwarf_Addr base, + void *cb_arg) +{ + struct elf_symbols_search_arg *arg = cb_arg; + + int symtab_len = dwfl_module_getsymtab(dwfl_module); + if (symtab_len == -1) + return DWARF_CB_OK; + + /* Ignore the zeroth null symbol */ + for (int i = 1; i < symtab_len; i++) { + GElf_Sym elf_sym; + GElf_Addr elf_addr; + const char *name = dwfl_module_getsym_info(dwfl_module, i, + &elf_sym, &elf_addr, + NULL, NULL, NULL); + if (!name || !elf_symbol_match(arg, elf_addr, &elf_sym, name)) + continue; + if (elf_symbol_store_match(arg, &elf_sym, elf_addr, name)) + return DWARF_CB_ABORT; + } + return DWARF_CB_OK; +} + +static struct drgn_error * +elf_symbols_search(const char *name, uint64_t addr, enum drgn_find_symbol_flags flags, + void *data, struct drgn_symbol_result_builder *builder) +{ + Dwfl_Module *dwfl_module = NULL; + struct drgn_program *prog = data; + struct elf_symbols_search_arg arg = { + .name = name, + .address = addr, + .flags = flags, + .err = NULL, + .builder = builder, + }; + + if (arg.flags & DRGN_FIND_SYMBOL_ADDR) { + dwfl_module = dwfl_addrmodule(prog->dbinfo.dwfl, arg.address); + if (!dwfl_module) + return NULL; + } + + if ((arg.flags & (DRGN_FIND_SYMBOL_ADDR | DRGN_FIND_SYMBOL_ONE)) + == (DRGN_FIND_SYMBOL_ADDR | DRGN_FIND_SYMBOL_ONE)) { + GElf_Off offset; + GElf_Sym elf_sym; + const char *name = dwfl_module_addrinfo( + dwfl_module, addr, &offset, + &elf_sym, NULL, NULL, NULL); + if (!name) + return NULL; + struct drgn_symbol *sym = malloc(sizeof(*sym)); + if (!sym) + return &drgn_enomem; + drgn_symbol_from_elf(name, addr - offset, &elf_sym, sym); + if (!drgn_symbol_result_builder_add(builder, sym)) { + arg.err = &drgn_enomem; + drgn_symbol_destroy(sym); + } + } else if (dwfl_module) { + elf_symbols_search_cb(dwfl_module, NULL, NULL, 0, &arg); + } else { + dwfl_getmodules(prog->dbinfo.dwfl, elf_symbols_search_cb, &arg, 0); + } + return arg.err; +} + bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, const char *name) { @@ -2044,6 +2186,8 @@ void drgn_debug_info_init(struct drgn_debug_info *dbinfo, drgn_program_add_object_finder_impl(prog, &dbinfo->object_finder, drgn_debug_info_find_object, dbinfo); + drgn_program_add_symbol_finder_impl(prog, &dbinfo->symbol_finder, + elf_symbols_search, prog); drgn_module_table_init(&dbinfo->modules); c_string_set_init(&dbinfo->module_names); drgn_dwarf_info_init(dbinfo); diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index 64e8bb863..0b689106b 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -23,6 +23,7 @@ #include "object_index.h" #include "orc_info.h" #include "string_builder.h" +#include "symbol.h" #include "type.h" #include "vector.h" @@ -137,6 +138,7 @@ struct drgn_debug_info { struct drgn_type_finder type_finder; struct drgn_object_finder object_finder; + struct drgn_symbol_finder symbol_finder; /** DWARF frontend library handle. */ Dwfl *dwfl; diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index f223acc62..705592f2b 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -930,6 +930,73 @@ struct drgn_error *drgn_program_find_symbols_by_address(struct drgn_program *pro struct drgn_symbol ***syms_ret, size_t *count_ret); +/** Flags for @ref drgn_symbol_find_fn() */ +enum drgn_find_symbol_flags { + /** Find symbols whose name matches the name argument */ + DRGN_FIND_SYMBOL_NAME = 1 << 0, + /** Find symbols whose address matches the addr argument */ + DRGN_FIND_SYMBOL_ADDR = 1 << 1, + /** Find only one symbol */ + DRGN_FIND_SYMBOL_ONE = 1 << 2, +}; + +/** Result builder for @ref drgn_symbol_find_fn() */ +struct drgn_symbol_result_builder; + +/** + * Add or set the return value for a symbol search + * + * Symbol finders should call this with each symbol search result. If the symbol + * search was @ref DRGN_FIND_SYMBOL_ONE, then only the most recent symbol added + * to the builder will be returned. Otherwise, all symbols added to the builder + * are returned. Returns true on success, or false on an allocation failure. + */ +bool +drgn_symbol_result_builder_add(struct drgn_symbol_result_builder *builder, + struct drgn_symbol *symbol); + +/** Get the current number of results in a symbol search result. */ +size_t drgn_symbol_result_builder_count(const struct drgn_symbol_result_builder *builder); + +/** + * Callback for finding one or more symbols. + * + * The callback should perform a symbol lookup based on the flags given in @a + * flags. When multiple flags are provided, the effect should be treated as a + * logical AND. Symbol results should be added to the result builder @a builder, + * via @ref drgn_symbol_result_builder_add(). When @ref DRGN_FIND_SYMBOL_ONE is + * set, then the finding function should only return the single best symbol + * result, and short-circuit return. + * + * When no symbol is found, simply do not add any result to the builder. No + * error should be returned in this case. + * + * @param[in] name Name of the symbol to match + * @param[in] addr Address of the symbol to match + * @param[in] flags Flags indicating the desired behavior of the search + * @param[in] arg Argument passed to @ref drgn_program_add_symbol_finder(). + * @param[in] builder Used to build the resulting symbol output + */ +typedef struct drgn_error * +(*drgn_symbol_find_fn)(const char *name, uint64_t addr, + enum drgn_find_symbol_flags flags, void *arg, + struct drgn_symbol_result_builder *builder); + +/** + * Register a symbol finding callback. + * + * Callbacks are called in reverse order that they were originally added. In + * case of a search for multiple symbols, then the results of all callbacks are + * concatenated. If the search is for a single symbol, then the first callback + * which finds a symbol will short-circuit the search. + * + * @param[in] fn Symbol search function + * @param[in] arg Argument to pass to the callback + */ +struct drgn_error * +drgn_program_add_symbol_finder(struct drgn_program *prog, + drgn_symbol_find_fn fn, void *arg); + /** Element type and size. */ struct drgn_element_info { /** Type of the element. */ @@ -2882,7 +2949,7 @@ enum drgn_symbol_binding { DRGN_SYMBOL_BINDING_GLOBAL, DRGN_SYMBOL_BINDING_WEAK, DRGN_SYMBOL_BINDING_UNIQUE = 11, /* STB_GNU_UNIQUE + 1 */ -}; +} __attribute__((__packed__)); /** Kind of entity represented by a symbol. */ enum drgn_symbol_kind { @@ -2898,7 +2965,45 @@ enum drgn_symbol_kind { DRGN_SYMBOL_KIND_COMMON, DRGN_SYMBOL_KIND_TLS, DRGN_SYMBOL_KIND_IFUNC = 10, /* STT_GNU_IFUNC */ -}; +} __attribute__((__packed__)); + +/** Describes the lifetime of an object provided to drgn */ +enum drgn_lifetime { + /** + * DRGN_LIFETIME_STATIC: the object is guaranteed to outlive the + * drgn_program itself. drgn will not free or copy the object. + */ + DRGN_LIFETIME_STATIC, + /** + * DRGN_LIFETIME_EXTERNAL: the object is externally managed. It will + * live as long as the object it is associated with, but may be freed + * after. drgn will never free the object. If drgn must copy a data + * structure, the object will be duplicated, and drgn will own the new + * object. + */ + DRGN_LIFETIME_EXTERNAL, + /** + * DRGN_LIFETIME_OWNED: the object lifetime is managed by drgn. It + * should be freed when the containing object is freed. If the + * containing object is copied, it must also be copied. + */ + DRGN_LIFETIME_OWNED, +} __attribute__((__packed__)); + +/** + * Create a new @ref drgn_symbol with the given values + * + * All parameters should be self-explanatory, except for @a name_lifetime. + * Clients can use this to describe how drgn should treat the string @a name. + * Strings with lifetime @c STATIC will never be copied or freed. Strings with + * lifetime @c OWNED will always be copied or and freed with the symbol. Strings + * with lifetime EXTERNAL will not be freed, but if the Symbol is copied, they + * will be copied. + */ +struct drgn_error * +drgn_symbol_create(const char *name, uint64_t address, uint64_t size, + enum drgn_symbol_binding binding, enum drgn_symbol_kind kind, + enum drgn_lifetime name_lifetime, struct drgn_symbol **ret); /** Destroy a @ref drgn_symbol. */ void drgn_symbol_destroy(struct drgn_symbol *sym); diff --git a/libdrgn/kallsyms.c b/libdrgn/kallsyms.c new file mode 100644 index 000000000..77a545a9f --- /dev/null +++ b/libdrgn/kallsyms.c @@ -0,0 +1,891 @@ +// Copyright (c) 2023 Oracle and/or its affiliates +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include +#include + +#include "kallsyms.h" +#include "program.h" +#include "drgn.h" + +/** + * This struct contains the tables necessary to reconstruct kallsyms names. + * + * vmlinux (core kernel) kallsyms names are compressed using table compression. + * There is some description of it in the kernel's "scripts/kallsyms.c", but + * this is a brief overview that should make the code below comprehensible. + * + * Table compression uses the remaining 128 characters not defined by ASCII and + * maps them to common substrings (e.g. the prefix "write_"). Each name is + * represented as a sequence of bytes which refers to strings in this table. + * The two arrays below comprise this table: + * + * - token_table: this is one long string with all of the tokens concatenated + * together, e.g. "a\0b\0c\0...z\0write_\0read_\0..." + * - token_index: this is a 256-entry long array containing the index into + * token_table where you'll find that token's string. + * + * To decode a string, for each byte you simply index into token_index, then use + * that to index into token_table, and copy that string into your buffer. + * + * The actual kallsyms symbol names are concatenated into a buffer called + * "names". The first byte in a name is the length (in tokens, not decoded + * bytes) of the symbol name. The remaining "length" bytes are decoded via the + * table as described above. The first decoded byte is a character representing + * what type of symbol this is (e.g. text, data structure, etc). + */ +struct kallsyms_reader { + uint32_t num_syms; + uint8_t *names; + char *token_table; + uint16_t *token_index; + bool long_names; +}; + +/* + * We determine symbol length by the start of the subsequent symbol. + * Unfortunately, there can be large gaps in the symbol table, for instance the + * Linux kernel has percpu symbols near the beginning of the address space, and + * a large gap before normal kernel symbols. The result of this is that we can + * create symbols with incredibly large sizes, and then drgn's symbolization + * will print addresses using that symbol and a very large offset, which is + * absolutely meaningless. + * + * To avoid this, we set a cap on the length of a symbol. Unfortunately, this is + * a heuristic. It's entirely possible to have very large data symbols. This + * value is chosen somewhat arbitrarily, but seems to produce decent results. + */ +#define MAX_SYMBOL_LENGTH 0x10000 + +/* + * Since 73bbb94466fd3 ("kallsyms: support "big" kernel symbols"), the + * "kallsyms_names" array may use the most significant bit to indicate that the + * initial element for each symbol (normally representing the number of tokens + * in the symbol) requires two bytes. + * + * Unfortunately, that means that values 128-255 are now ambiguous: on older + * kernels, they should be interpreted literally, but on newer kernels, they + * require treating as a two byte sequence. Since the commit included no changes + * to the symbol names or vmcoreinfo, there's no way to detect it except via + * heuristics. + * + * The commit in question is a new feature and not likely to be backported to + * stable, so our heuristic is that it was first included in kernel 6.1. + * However, we first check the environment variable DRGN_KALLSYMS_LONG: if it + * exists, then we use its first character to determine our behavior: 1, y, Y + * all indicate that we should use long names. 0, n, N all indicate that we + * should not. + */ +static bool guess_long_names(struct drgn_program *prog) +{ + const char *env = getenv("DRGN_KALLSYMS_LONG"); + const char *osrelease; + int i; + int major = 0, minor = 0; + + if (env) { + if (*env == '1' || *env == 'y' || *env == 'Y') + return true; + else if (*env == '0' || *env == 'n' || *env == 'N') + return false; + } + + osrelease = prog->vmcoreinfo.osrelease; + for (i = 0; i < sizeof(prog->vmcoreinfo.osrelease) && osrelease[i]; i++) { + char c = osrelease[i]; + if (c < '0' || c > '9') + break; + major *= 10; + major += osrelease[i] - '0'; + } + for (i = i + 1; i < sizeof(prog->vmcoreinfo.osrelease) && osrelease[i] && osrelease[i] != '.'; i++) { + char c = osrelease[i]; + if (c < '0' || c > '9') + break; + minor *= 10; + minor += osrelease[i] - '0'; + } + return (major == 6 && minor >= 1) || major > 6; +} + +/** + * Copy the kallsyms names tables from the program into host memory. + * @param prog Program to read from + * @param kr kallsyms_reader to populate + * @param vi vmcoreinfo for the program + */ +static struct drgn_error * +kallsyms_copy_tables(struct drgn_program *prog, struct kallsyms_reader *kr, + struct kallsyms_locations *loc) +{ + struct drgn_error *err; + const size_t token_index_size = (UINT8_MAX + 1) * sizeof(uint16_t); + uint64_t last_token; + size_t token_table_size, names_idx; + char data; + uint8_t len_u8; + int len; + bool bswap; + + err = drgn_program_bswap(prog, &bswap); + if (err) + return err; + + /* Read num_syms from vmcore */ + err = drgn_program_read_u32(prog, + loc->kallsyms_num_syms, + false, &kr->num_syms); + if (err) + return err; + if (bswap) + kr->num_syms = bswap_32(kr->num_syms); + + /* Read the constant-sized token_index table (256 entries) */ + kr->token_index = malloc(token_index_size); + if (!kr->token_index) + return &drgn_enomem; + err = drgn_program_read_memory(prog, kr->token_index, + loc->kallsyms_token_index, + token_index_size, false); + if (err) + return err; + if (bswap) { + for (size_t i = 0; i < kr->num_syms; i++) { + kr->token_index[i] = bswap_16(kr->token_index[i]); + } + } + + /* + * Find the end of the last token, so we get the overall length of + * token_table. Then copy the token_table into host memory. + */ + last_token = loc->kallsyms_token_table + kr->token_index[UINT8_MAX]; + do { + err = drgn_program_read_u8(prog, last_token, false, + (uint8_t *)&data); + if (err) + return err; + + last_token++; + } while (data); + token_table_size = last_token - loc->kallsyms_token_table + 1; + kr->token_table = malloc(token_table_size); + if (!kr->token_table) + return &drgn_enomem; + err = drgn_program_read_memory(prog, kr->token_table, + loc->kallsyms_token_table, + token_table_size, false); + if (err) + return err; + + /* Now find the end of the names array by skipping through it, then copy + * that into host memory. */ + names_idx = 0; + kr->long_names = guess_long_names(prog); + for (size_t i = 0; i < kr->num_syms; i++) { + err = drgn_program_read_u8(prog, + loc->kallsyms_names + names_idx, + false, &len_u8); + if (err) + return err; + len = len_u8; + if ((len & 0x80) && kr->long_names) { + err = drgn_program_read_u8(prog, + loc->kallsyms_names + names_idx + 1, + false, &len_u8); + if (err) + return err; + len = (len & 0x7F) | (len_u8 << 7); + names_idx++; + } + names_idx += len + 1; + } + kr->names = malloc(names_idx); + if (!kr->names) + return &drgn_enomem; + err = drgn_program_read_memory(prog, kr->names, + loc->kallsyms_names, + names_idx, false); + if (err) + return err; + + return NULL; +} + +/** + * Write the symbol starting at @a offset into @a result. + * @param kr Registry containing kallsyms data + * @param offset Starting index within "names" array for this symbol + * @param result Buffer to write output symbol to + * @param maxlen Size of output buffer, to avoid overruns + * @param[out] kind_ret Where to write the symbol kind data + * @param[out] bytes_ret How many bytes were output (incl. NUL) + * @returns The offset of the next symbol + */ +static unsigned int +kallsyms_expand_symbol(struct kallsyms_reader *kr, unsigned int offset, + char *result, size_t maxlen, char *kind_ret, + size_t *bytes_ret) +{ + uint8_t *data = &kr->names[offset]; + unsigned int len = *data; + bool skipped_first = false; + size_t bytes = 0; + + if ((len & 0x80) && kr->long_names) { + data++; + offset++; + len = (0x7F & len) | (*data << 7); + } + + offset += len + 1; + data += 1; + while (len) { + char *token_ptr = &kr->token_table[kr->token_index[*data]]; + while (*token_ptr) { + if (skipped_first) { + if (maxlen <= 1) + goto tail; + *result = *token_ptr; + result++; + maxlen--; + bytes++; + } else { + if (kind_ret) + *kind_ret = *token_ptr; + skipped_first = true; + } + token_ptr++; + } + + data++; + len--; + } + +tail: + *result = '\0'; + bytes++; + *bytes_ret = bytes; + return offset; +} + +/** Decode all symbol names from @a kr and place them into @a reg */ +static struct drgn_error * +kallsyms_create_symbol_array(struct kallsyms_finder *reg, struct kallsyms_reader *kr) +{ + uint8_t token_lengths[UINT8_MAX+1]; + + /* Compute the length of each token */ + for (int i = 0; i <= UINT8_MAX; i++) { + token_lengths[i] = strlen(&kr->token_table[kr->token_index[i]]); + } + + /* Now compute the length of all symbols together */ + size_t names_idx = 0; + size_t length = 0; + for (int i = 0; i < kr->num_syms; i++) { + unsigned int num_tokens = kr->names[names_idx]; + if ((num_tokens & 0x80) && kr->long_names) + num_tokens = (num_tokens & 0x7F) | (kr->names[++names_idx] << 7); + for (int j = names_idx + 1; j < names_idx + num_tokens + 1; j++) + length += token_lengths[kr->names[j]]; + length++; /* nul terminator */ + names_idx += num_tokens + 1; + } + + /* We use uint32_t to index into the array of strings. That allows for + * 4GiB of names which should be plenty, but still: check for overflow. */ + if (length >= UINT32_MAX) + return drgn_error_format(DRGN_ERROR_OUT_OF_BOUNDS, + "kallsyms string table is too large: %lu", + length); + + reg->strings = malloc(length); + reg->strings_len = length; + reg->names = calloc(kr->num_syms, sizeof(*reg->names)); + reg->types = malloc(kr->num_syms); + reg->num_syms = kr->num_syms; + if (!reg->strings || !reg->names || !reg->types) + return &drgn_enomem; + + names_idx = 0; + uint32_t symbols_idx = 0; + for (int i = 0; i < kr->num_syms; i++) { + size_t bytes = 0; + names_idx = kallsyms_expand_symbol(kr, names_idx, + reg->strings + symbols_idx, + length - symbols_idx, ®->types[i], + &bytes); + reg->names[i] = symbols_idx; + symbols_idx += (uint32_t) bytes; + } + return NULL; +} + +static int kallsyms_name_compar(const void *lhs, const void *rhs, void *arg) +{ + struct kallsyms_finder *kr = arg; + uint32_t left_ix = *(const uint32_t *)lhs; + uint32_t right_ix = *(const uint32_t *)rhs; + return strcmp(&kr->strings[kr->names[left_ix]], + &kr->strings[kr->names[right_ix]]); +} + +static struct drgn_error * +kallsyms_create_htab(struct kallsyms_finder *kr) +{ + /* + * A sorted list of symbol indices. Entries of the hash table will point + * into this list for a certain number of elements. + */ + kr->sorted = malloc(kr->num_syms * sizeof(kr->sorted[0])); + for (uint32_t i = 0; i < kr->num_syms; i++) + kr->sorted[i] = i; + + qsort_r(kr->sorted, kr->num_syms, sizeof(kr->sorted[0]), + kallsyms_name_compar, kr); + + if (!drgn_kallsyms_names_reserve(&kr->htab, kr->num_syms)) + return &drgn_enomem; + + /* For each unique symbol name, insert the index, and number of + * occurrences into the hash table. */ + struct drgn_kallsyms_names_entry entry; + uint32_t current = 0; + while (current < kr->num_syms) { + char *current_str = &kr->strings[kr->names[kr->sorted[current]]]; + uint32_t next = current + 1; + while (next < kr->num_syms) { + char *next_str = &kr->strings[kr->names[kr->sorted[next]]]; + if (strcmp(current_str, next_str) != 0) + break; + next++; + } + + entry.key = current_str; + entry.value.start = current; + entry.value.end = next; + drgn_kallsyms_names_insert(&kr->htab, &entry, NULL); + current = next; + } + return NULL; +} + +/** Copies and decodes symbol names from the program. */ +static struct drgn_error * +kallsyms_load_names(struct kallsyms_finder *reg, struct kallsyms_locations *loc) +{ + struct drgn_error *err; + struct kallsyms_reader reader = {0}; + + err = kallsyms_copy_tables(reg->prog, &reader, loc); + if (err) + goto out; + + err = kallsyms_create_symbol_array(reg, &reader); +out: + free(reader.names); + free(reader.token_index); + free(reader.token_table); + return err; +} + +/** Lookup @a name in the registry @a kr, and return the index of the symbol */ +static int drgn_kallsyms_lookup(struct kallsyms_finder *kr, const char *name) +{ + struct drgn_kallsyms_names_iterator it = + drgn_kallsyms_names_search(&kr->htab, (char **)&name); + if (it.entry) { + return kr->sorted[it.entry->value.start]; + } + return -1; +} + +/** Return the address of symbol at @a index*/ +static uint64_t +kallsyms_address(struct kallsyms_finder *kr, unsigned int index) +{ + return kr->addresses[index]; +} + +static void drgn_symbol_from_kallsyms(struct kallsyms_finder *kr, int index, + struct drgn_symbol *ret) +{ + char kind = kr->types[index]; + char kind_lower = tolower(kind); + ret->name = &kr->strings[kr->names[index]]; + ret->address = kallsyms_address(kr, index); + if (index < kr->num_syms) { + size_t size = kallsyms_address(kr, index + 1) - ret->address; + if (size < MAX_SYMBOL_LENGTH) + ret->size = size; + else + ret->size = 0; + } else { + ret->size = 0; + } + + ret->binding = DRGN_SYMBOL_BINDING_GLOBAL; + if (kind == 'u') + ret->binding = DRGN_SYMBOL_BINDING_UNIQUE; + else if (kind_lower == 'v' || kind_lower == 'w') + ret->binding = DRGN_SYMBOL_BINDING_WEAK; + else if (isupper(kind)) + ret->binding = DRGN_SYMBOL_BINDING_GLOBAL; + else + /* If lowercase, the symbol is usually local, but it's + * not guaranteed. Use unknown for safety here. */ + ret->binding = DRGN_SYMBOL_BINDING_UNKNOWN; + + switch (kind_lower) { + case 'b': /* bss */ + case 'c': /* uninitialized data */ + case 'd': /* initialized data */ + case 'g': /* initialized data (small objects) */ + case 'r': /* read-only data */ + ret->kind = DRGN_SYMBOL_KIND_OBJECT; + break; + case 't': /* text */ + ret->kind = DRGN_SYMBOL_KIND_FUNC; + break; + default: + ret->kind = DRGN_SYMBOL_KIND_UNKNOWN; + } + /* NOTE: The name field is owned by the kallsyms finder. + * Once the kallsyms finder is bound to the program, it cannot be + * unbound, and so it shares lifetime with the Program. + */ + ret->name_lifetime = DRGN_LIFETIME_STATIC; +} + +static int kallsyms_addr_compar(const void *key_void, const void *memb_void) +{ + const uint64_t *key = key_void; + const uint64_t *memb = memb_void; + + /* We are guaranteed that: (min <= key <= max), so we can fearlessly + * index one beyond memb, so long as we've checked that key > memb. + */ + if (*key == *memb) + return 0; + else if (*key < *memb) + return -1; + else if (*key < memb[1]) + return 0; + else + return 1; +} + +static inline struct drgn_error * +add_result(struct kallsyms_finder *kr, struct drgn_symbol_result_builder *builder, int index) +{ + struct drgn_symbol *symbol = malloc(sizeof(*symbol)); + if (!symbol) + return &drgn_enomem; + drgn_symbol_from_kallsyms(kr, index, symbol); + if (drgn_symbol_result_builder_add(builder, symbol)) { + return NULL; + } else { + free(symbol); + return &drgn_enomem; + } +} + +struct drgn_error * +drgn_kallsyms_symbol_finder(const char *name, uint64_t address, + enum drgn_find_symbol_flags flags, void *arg, + struct drgn_symbol_result_builder *builder) +{ + struct kallsyms_finder *kr = arg; + uint64_t begin = kallsyms_address(kr, 0); + uint64_t end = kallsyms_address(kr, kr->num_syms - 1); + struct drgn_error *err = NULL; + + /* We assume the last symbol is "zero length" for simplicity. + * Short-circuit the search when we're searching outside the address + * range. + */ + if (flags & DRGN_FIND_SYMBOL_ADDR) { + uint64_t *res; + if (address < begin || address > end) + return NULL; + res = bsearch(&address, kr->addresses, kr->num_syms, sizeof(address), + kallsyms_addr_compar); + /* If the gap between symbols > MAX_SYMBOL_LENGTH, then we infer that + * the symbol doesn't contain the address, so fail. */ + if (!res || res[1] - res[0] > MAX_SYMBOL_LENGTH) + return NULL; + return add_result(kr, builder, res - kr->addresses); + } else if (flags & DRGN_FIND_SYMBOL_NAME) { + struct drgn_kallsyms_names_iterator it = + drgn_kallsyms_names_search(&kr->htab, (char **)&name); + if (!it.entry) + return NULL; + for (uint32_t i = it.entry->value.start; i < it.entry->value.end; i++) { + err = add_result(kr, builder, kr->sorted[i]); + it = drgn_kallsyms_names_next(it); + if (err || flags & DRGN_FIND_SYMBOL_ONE) + break; + } + return err; + } else { + for (int i = 0; i < kr->num_syms; i++) + if ((err = add_result(kr, builder, i)) + || (flags & DRGN_FIND_SYMBOL_ONE)) + return err; + } + return NULL; +} + +/** Compute an address via the CONFIG_KALLSYMS_ABSOLUTE_PERCPU method*/ +static uint64_t absolute_percpu(uint64_t base, int32_t val) +{ + if (val >= 0) + return (uint64_t) val; + else + return base - 1 - val; +} + +/** + * Load the kallsyms address information from @a prog + * + * Just as symbol name loading is complex, so is address loading. Addresses may + * be stored directly as an array of pointers, but more commonly, they are + * stored as an array of 32-bit integers which are related to an offset. This + * function decodes the addresses into a plain array of 64-bit addresses. + * + * @param prog The program to read from + * @param kr The symbol registry to fill + * @param vi vmcoreinfo containing necessary symbols + * @returns NULL on success, or error + */ +static struct drgn_error * +kallsyms_load_addresses(struct drgn_program *prog, struct kallsyms_finder *kr, + struct kallsyms_locations *loc) +{ + struct drgn_error *err = NULL; + bool bswap, bits64; + uint32_t *addr32; + + err = drgn_program_bswap(prog, &bswap); + if (err) + return err; + err = drgn_program_is_64_bit(prog, &bits64); + if (err) + return err; + + kr->addresses = malloc(kr->num_syms * sizeof(uint64_t)); + if (!kr->addresses) + return &drgn_enomem; + + if (loc->kallsyms_addresses) { + /* + * The kallsyms addresses are stored as plain addresses in an + * array of unsigned long! Read the appropriate size array and + * do any necessary byte swaps. + */ + if (!bits64) { + addr32 = malloc(kr->num_syms * sizeof(uint32_t)); + if (!addr32) + return &drgn_enomem; + + err = drgn_program_read_memory(prog, addr32, + loc->kallsyms_addresses, + kr->num_syms * sizeof(uint32_t), + false); + if (err) { + free(addr32); + return err; + } + for (int i = 0; i < kr->num_syms; i++) { + if (bswap) + kr->addresses[i] = bswap_32(addr32[i]); + else + kr->addresses[i] = addr32[i]; + } + free(addr32); + } else { + err = drgn_program_read_memory(prog, kr->addresses, + loc->kallsyms_addresses, + kr->num_syms * sizeof(uint32_t), + false); + if (err) + return err; + if (bswap) + for (int i = 0; i < kr->num_syms; i++) + kr->addresses[i] = bswap_64(kr->addresses[i]); + } + } else { + /* + * The kallsyms addresses are stored in an array of 4-byte + * values, which can be interpreted in two ways: + * (1) if CONFIG_KALLSYMS_ABSOLUTE_PERCPU is enabled, then + * positive values are addresses, and negative values are + * offsets from a base address. + * (2) otherwise, the 4-byte values are directly used as + * addresses + * First, read the values, then figure out which way to + * interpret them. + */ + uint64_t relative_base; + if (bits64) { + err = drgn_program_read_u64(prog, loc->kallsyms_relative_base, + false, &relative_base); + if (err) + return err; + if (bswap) + relative_base = bswap_64(relative_base); + } else { + uint32_t rel32; + err = drgn_program_read_u32(prog, loc->kallsyms_relative_base, + false, &rel32); + if (err) + return err; + if (bswap) + rel32 = bswap_32(rel32); + relative_base = rel32; + } + addr32 = malloc(kr->num_syms * sizeof(uint32_t)); + if (!addr32) + return &drgn_enomem; + + err = drgn_program_read_memory(prog, addr32, + loc->kallsyms_offsets, + kr->num_syms * sizeof(uint32_t), + false); + if (err) { + free(addr32); + return err; + } + if (bswap) + for (int i = 0; i < kr->num_syms; i++) + addr32[i] = bswap_32(addr32[i]); + + /* + * Now that we've read the offsets data, we need to determine + * how to interpret them. To do this, use the _stext symbol. We + * have the correct value from vmcoreinfo. Compute it both ways + * and pick the correct interpretation. + */ + int stext_idx = drgn_kallsyms_lookup(kr,"_stext"); + if (stext_idx < 0) { + free(addr32); + return drgn_error_create( + DRGN_ERROR_OTHER, + "Could not find _stext symbol in kallsyms"); + } + + uint64_t stext_abs = relative_base + addr32[stext_idx]; + uint64_t stext_pcpu = absolute_percpu(relative_base, (int32_t)addr32[stext_idx]); + if (stext_abs == loc->_stext) { + for (int i = 0; i < kr->num_syms; i++) + kr->addresses[i] = relative_base + addr32[i]; + } else if (stext_pcpu == loc->_stext) { + for (int i = 0; i < kr->num_syms; i++) + kr->addresses[i] = absolute_percpu(relative_base, (int32_t)addr32[i]); + } else { + err = drgn_error_create( + DRGN_ERROR_OTHER, + "Unable to interpret kallsyms address data"); + } + free(addr32); + } + return err; +} + +/** Free all data held by @a kr */ +void drgn_kallsyms_destroy(struct kallsyms_finder *kr) +{ + if (kr) { + drgn_kallsyms_names_deinit(&kr->htab); + free(kr->sorted); + free(kr->addresses); + free(kr->strings); + free(kr->names); + free(kr->types); + } +} + +/** Load kallsyms data from vmcore + vmcoreinfo data */ +static struct drgn_error * +drgn_kallsyms_from_vmcore(struct kallsyms_finder *kr, struct drgn_program *prog, + struct kallsyms_locations *loc) +{ + struct drgn_error *err; + + memset(kr, 0, sizeof(*kr)); + kr->prog = prog; + drgn_kallsyms_names_init(&kr->htab); + + err = kallsyms_load_names(kr, loc); + if (err) + goto out; + + err = kallsyms_create_htab(kr); + if (err) + goto out; + + err = kallsyms_load_addresses(prog, kr, loc); + if (err) + goto out; + + return NULL; + +out: + drgn_kallsyms_destroy(kr); + return err; +} + +struct allocated { + uint32_t symbols; + size_t symbol_buffer; +}; + +/** Append a symbol onto the kallsyms finder, expanding the allocations if needed. */ +static struct drgn_error * +kallsyms_append(struct kallsyms_finder *kr, struct allocated *a, const char *name, uint64_t address, char type) +{ + size_t name_len = strlen(name) + 1; + if (kr->num_syms == a->symbols) { + a->symbols = a->symbols ? a->symbols * 2 : 1024; + kr->names = realloc(kr->names, a->symbols * sizeof(kr->names[0])); + kr->addresses = realloc(kr->addresses, a->symbols * sizeof(kr->addresses[0])); + kr->types = realloc(kr->types, a->symbols); + if (!kr->names || !kr->addresses || !kr->types) + return &drgn_enomem; + } + + while (kr->strings_len + name_len > a->symbol_buffer) { + a->symbol_buffer = a->symbol_buffer ? a->symbol_buffer * 2 : 4096; + kr->strings = realloc(kr->strings, a->symbol_buffer); + if (!kr->strings) + return &drgn_enomem; + } + memcpy(&kr->strings[kr->strings_len], name, name_len); + /* + * We can't just store the pointer, since symbol_buffer may move during + * reallocation. Store the index of the string in the buffer, and when + * we finalize everything, we will fix it up. + */ + kr->names[kr->num_syms] = kr->strings_len; + kr->addresses[kr->num_syms] = address; + kr->types[kr->num_syms] = type; + kr->num_syms++; + kr->strings_len += name_len; + return NULL; +} + +/** Reallocate buffers to fit contents, and fixup the symbol array */ +static struct drgn_error * +kallsyms_finalize(struct kallsyms_finder *kr) +{ + kr->names = realloc(kr->names, kr->num_syms * sizeof(kr->names[0])); + kr->addresses = realloc(kr->addresses, kr->num_syms * sizeof(kr->addresses[0])); + kr->types = realloc(kr->types, kr->num_syms * sizeof(kr->types[0])); + kr->strings = realloc(kr->strings, kr->strings_len); + if (!kr->names || !kr->addresses || !kr->types || !kr->strings) + return &drgn_enomem; + return NULL; +} + +/** Load kallsyms directly from the /proc/kallsyms file */ +static struct drgn_error *drgn_kallsyms_from_proc(struct kallsyms_finder *kr, + struct drgn_program *prog) +{ + char *line = NULL; + size_t line_size = 0; + ssize_t res; + size_t line_number = 1; + struct allocated allocated = {0}; + struct drgn_error *err = NULL; + FILE *fp = fopen("/proc/kallsyms", "r"); + if (!fp) + return drgn_error_create_os("Error opening kallsyms", errno, "/proc/kallsyms"); + + memset(kr, 0, sizeof(*kr)); + kr->prog = prog; + drgn_kallsyms_names_init(&kr->htab); + + while ((res = getline(&line, &line_size, fp)) != -1) { + char *save = NULL; + char *name, *addr_str, *type_str, *mod, *addr_rem; + char type; + uint64_t addr; + + addr_str = strtok_r(line, " \t\r\n", &save); + type_str = strtok_r(NULL," \t\r\n", &save); + name = strtok_r(NULL," \t\r\n", &save); + mod = strtok_r(NULL," \t\r\n", &save); + + if (!addr_str || !type_str || !name) { + err = drgn_error_format(DRGN_ERROR_SYNTAX, "Error parsing kallsyms line %zu", line_number); + break; + } + if (mod) + break; + type = *type_str; + addr = strtoull(addr_str, &addr_rem, 16); + if (*addr_rem) { + /* addr_rem should be set to the first un-parsed character, and + * since the entire string should be a valid base 16 integer, + * we expect it to be \0 */ + err = drgn_error_format(DRGN_ERROR_SYNTAX, + "Invalid address \"%s\" in kallsyms line %zu", + addr_str, line_number); + break; + } + err = kallsyms_append(kr, &allocated, name, addr, type); + if (err) + break; + line_number++; + } + + if (!err && ferror(fp)) + err = drgn_error_create_os("Error reading kallsyms", errno, "/proc/kallsyms"); + else + err = kallsyms_finalize(kr); + if (!err) + err = kallsyms_create_htab(kr); + fclose(fp); + free(line); + if (err) + drgn_kallsyms_destroy(kr); + return err; +} + +struct drgn_error *drgn_kallsyms_init(struct kallsyms_finder *kr, + struct drgn_program *prog, + struct kallsyms_locations *loc) +{ + /* + * There are two ways to parse kallsyms data: by using /proc/kallsyms, + * or by finding the necessary symbols in the vmcoreinfo and using them + * to read out the kallsyms data from the vmcore. + * + * Reading /proc/kallsyms is more straightforward, performant, and it + * has broader kernel version support: it should be preferred for live + * systems. + * + * Parsing kallsyms from a core dump is more involved, and it requires + * that the kernel publish some symbol addresses in the VMCOREINFO note. + * The following kernel commits are required, and were introduced in + * 6.0: + * + * - 5fd8fea935a10 ("vmcoreinfo: include kallsyms symbols") + * - f09bddbd86619 ("vmcoreinfo: add kallsyms_num_syms symbol") + */ + if (prog->flags & DRGN_PROGRAM_IS_LIVE) + return drgn_kallsyms_from_proc(kr, prog); + else if (loc->kallsyms_names && loc->kallsyms_token_table + && loc->kallsyms_token_index && loc->kallsyms_num_syms) + return drgn_kallsyms_from_vmcore(kr, prog, loc); + else + return drgn_error_create( + DRGN_ERROR_MISSING_DEBUG_INFO, + "The symbols: kallsyms_names, kallsyms_token_table, " + "kallsyms_token_index, and kallsyms_num_syms were not " + "found in VMCOREINFO, and the program is not live, " + "so /proc/kallsyms cannot be used. There is not enough " + "information to use the kallsyms symbol finder." + ); +} diff --git a/libdrgn/kallsyms.h b/libdrgn/kallsyms.h new file mode 100644 index 000000000..e8c9a710a --- /dev/null +++ b/libdrgn/kallsyms.h @@ -0,0 +1,134 @@ +// Copyright (c) 2023 Oracle and/or its affiliates +// SPDX-License-Identifier: LGPL-2.1-or-later + +/** + * @file + * + * Kallsyms data handling + * + * See @ref Kallsyms + */ + +#ifndef DRGN_KALLSYMS_H +#define DRGN_KALLSYMS_H + +#include +#include + +#include "hash_table.h" + +struct drgn_program; +struct drgn_module; +struct vmcoreinfo; +enum drgn_find_symbol_flags; +struct drgn_symbol_result_builder; + +struct kallsyms_locations { + uint64_t kallsyms_names; + uint64_t kallsyms_token_table; + uint64_t kallsyms_token_index; + uint64_t kallsyms_num_syms; + uint64_t kallsyms_offsets; + uint64_t kallsyms_relative_base; + uint64_t kallsyms_addresses; + uint64_t _stext; +}; + +/** + * @ingroup KernelInfo + * + * @defgroup Kallsyms Kallsyms symbol table + * + * Using the kallsyms data from within the program as a symbol table. + * + * @{ + */ + +struct symbol_entry { + uint32_t start; + uint32_t end; +}; + +DEFINE_HASH_MAP(drgn_kallsyms_names, char *, struct symbol_entry, + c_string_key_hash_pair, c_string_key_eq); + +/** + * Holds kallsyms data copied from the kernel + * + * Kallsyms data are in increasing sorted order by address. Each symbol is + * identified by its index, which we can assume fits in a uint32_t. The + * essential data is stored in arrays of length "num_syms": the memory address, + * the symbol type, and the index into the string table. + * + * Strings are stored in a single buffer, all concatenated together and + * separated by nul bytes. + */ +struct kallsyms_finder { + /** Program owning this registry */ + struct drgn_program *prog; + + /** Number of symbols */ + uint32_t num_syms; + /** Array of symbol addresses */ + uint64_t *addresses; + /** Array of one-character type codes*/ + char *types; + /** Array of symbol names */ + uint32_t *names; + + /** Buffer backing the symbols array, all point into here */ + char *strings; + /** Bytes used of symbol buffer array */ + uint32_t strings_len; + + /** Array of symbol indices, sorted by name. Used by the htab. */ + uint32_t *sorted; + /** Map of symbol names to index */ + struct drgn_kallsyms_names htab; +}; + + +/** + * Initialize kallsyms data + * + * Search for a kallsyms symbol table, and if found, attempt to load it. On + * success, a kallsyms registry is returned in @a ret. If the kallsyms data is + * not found (a common failure mode), NULL will be returned to indicate no + * error, but @a ret will not be set. This indicates that initialization should + * continue. If an error occurs parsing the kallsyms data once it is found, the + * error will be returned. + * + * @param prog Program to search + * @param vi vmcoreinfo from the crash dump + * @param[out] ret Created registry + * @returns NULL on success, or when kallsyms data is not found + */ +struct drgn_error *drgn_kallsyms_init(struct kallsyms_finder *reg, + struct drgn_program *prog, + struct kallsyms_locations *locations); + +/** + * Find a symbol using the symbol finder object + * + * This object may be passed to drgn_program_add_symbol_finder, along with a + * pointer to the struct kallsyms_finder, in order to find symbols in the + * vmlinux kallsyms. + */ +struct drgn_error * +drgn_kallsyms_symbol_finder(const char *name, uint64_t address, + enum drgn_find_symbol_flags flags, void *arg, + struct drgn_symbol_result_builder *builder); + +/** + * Destroy kallsyms data + * + * Frees all resources held by the kallsyms finder. Please note that if the + * finder has been added to the program, then this *will* cause errors. + * + * @param kr Finder to destroy + */ +void drgn_kallsyms_destroy(struct kallsyms_finder *kr); + +/** @} */ + +#endif // DRGN_KALLSYMS_H diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index bd36b773f..4d0f27455 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -1261,9 +1261,8 @@ c_format_pointer_object(const struct drgn_object *obj, bool c_string = ((flags & DRGN_FORMAT_OBJECT_STRING) && is_character_type(drgn_type_type(underlying_type).type)); - bool have_symbol; uint64_t uvalue; - struct drgn_symbol sym; + _cleanup_symbol_ struct drgn_symbol *sym = NULL; size_t start, type_start, type_end, value_start, value_end; start = sb->len; @@ -1287,18 +1286,17 @@ c_format_pointer_object(const struct drgn_object *obj, if (err) return err; - have_symbol = ((flags & DRGN_FORMAT_OBJECT_SYMBOLIZE) && - drgn_program_find_symbol_by_address_internal(drgn_object_program(obj), - uvalue, - NULL, - &sym)); - if (have_symbol && dereference && !c_string && + if ((flags & DRGN_FORMAT_OBJECT_SYMBOLIZE) && + (err = drgn_program_find_symbol_by_address_internal(drgn_object_program(obj), + uvalue, &sym))) + return err; + if (sym && dereference && !c_string && !string_builder_appendc(sb, '(')) return &drgn_enomem; value_start = sb->len; - if (have_symbol && - !string_builder_appendf(sb, "%s+0x%" PRIx64 " = ", sym.name, - uvalue - sym.address)) + if (sym && + !string_builder_appendf(sb, "%s+0x%" PRIx64 " = ", sym->name, + uvalue - sym->address)) return &drgn_enomem; if (!string_builder_appendf(sb, "0x%" PRIx64, uvalue)) @@ -1307,7 +1305,7 @@ c_format_pointer_object(const struct drgn_object *obj, return NULL; value_end = sb->len; - if ((have_symbol && dereference && !c_string && + if ((sym && dereference && !c_string && !string_builder_appendc(sb, ')')) || !string_builder_append(sb, " = ")) return &drgn_enomem; diff --git a/libdrgn/program.c b/libdrgn/program.c index db2ff07d7..1f1d8a4f2 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -112,6 +112,17 @@ void drgn_program_init(struct drgn_program *prog, drgn_object_init(&prog->vmemmap, prog); } +static void drgn_program_deinit_symbol_finders(struct drgn_program *prog) +{ + struct drgn_symbol_finder *finder = prog->symbol_finders; + while (finder) { + struct drgn_symbol_finder *next = finder->next; + if (finder->free) + free(finder); + finder = next; + } +} + void drgn_program_deinit(struct drgn_program *prog) { if (prog->core_dump_notes_cached) { @@ -135,6 +146,7 @@ void drgn_program_deinit(struct drgn_program *prog) drgn_object_deinit(&prog->vmemmap); drgn_object_index_deinit(&prog->oindex); + drgn_program_deinit_symbol_finders(prog); drgn_program_deinit_types(prog); drgn_memory_reader_deinit(&prog->reader); @@ -1770,27 +1782,6 @@ drgn_program_find_object(struct drgn_program *prog, const char *name, ret); } -bool drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, - uint64_t address, - Dwfl_Module *module, - struct drgn_symbol *ret) -{ - if (!module) { - module = dwfl_addrmodule(prog->dbinfo.dwfl, address); - if (!module) - return false; - } - - GElf_Off offset; - GElf_Sym elf_sym; - const char *name = dwfl_module_addrinfo(module, address, &offset, - &elf_sym, NULL, NULL, NULL); - if (!name) - return false; - drgn_symbol_from_elf(name, address - offset, &elf_sym, ret); - return true; -} - struct drgn_error *drgn_error_symbol_not_found(uint64_t address) { return drgn_error_format(DRGN_ERROR_LOOKUP, @@ -1798,117 +1789,50 @@ struct drgn_error *drgn_error_symbol_not_found(uint64_t address) address); } -LIBDRGN_PUBLIC struct drgn_error * -drgn_program_find_symbol_by_address(struct drgn_program *prog, uint64_t address, - struct drgn_symbol **ret) +static struct drgn_error * +drgn_program_symbols_search(struct drgn_program *prog, const char *name, + uint64_t addr, enum drgn_find_symbol_flags flags, + struct drgn_symbol_result_builder *builder) { - struct drgn_symbol *sym; - - sym = malloc(sizeof(*sym)); - if (!sym) - return &drgn_enomem; - if (!drgn_program_find_symbol_by_address_internal(prog, address, NULL, - sym)) { - free(sym); - return drgn_error_symbol_not_found(address); + struct drgn_error *err = NULL; + struct drgn_symbol_finder *finder = prog->symbol_finders; + while (finder) { + err = finder->fn(name, addr, flags, finder->arg, builder); + if (err || + ((flags & DRGN_FIND_SYMBOL_ONE) + && drgn_symbol_result_builder_count(builder) > 0)) + break; + finder = finder->next; } - *ret = sym; - return NULL; -} - -DEFINE_VECTOR(symbolp_vector, struct drgn_symbol *); - -enum { - SYMBOLS_SEARCH_NAME = (1 << 0), - SYMBOLS_SEARCH_ADDRESS = (1 << 1), - SYMBOLS_SEARCH_ALL = (1 << 2), -}; - -struct symbols_search_arg { - const char *name; - uint64_t address; - struct symbolp_vector results; - unsigned int flags; -}; - -static bool symbol_match(struct symbols_search_arg *arg, GElf_Addr addr, - const GElf_Sym *sym, const char *name) -{ - if (arg->flags & SYMBOLS_SEARCH_ALL) - return true; - if ((arg->flags & SYMBOLS_SEARCH_NAME) && strcmp(name, arg->name) == 0) - return true; - if ((arg->flags & SYMBOLS_SEARCH_ADDRESS) && - arg->address >= addr && arg->address < addr + sym->st_size) - return true; - return false; + return err; } -static int symbols_search_cb(Dwfl_Module *dwfl_module, void **userdatap, - const char *module_name, Dwarf_Addr base, - void *cb_arg) +struct drgn_error * +drgn_program_add_symbol_finder_impl(struct drgn_program *prog, + struct drgn_symbol_finder *finder, + drgn_symbol_find_fn fn, void *arg) { - struct symbols_search_arg *arg = cb_arg; - - int symtab_len = dwfl_module_getsymtab(dwfl_module); - if (symtab_len == -1) - return DWARF_CB_OK; - - /* Ignore the zeroth null symbol */ - for (int i = 1; i < symtab_len; i++) { - GElf_Sym elf_sym; - GElf_Addr elf_addr; - const char *name = dwfl_module_getsym_info(dwfl_module, i, - &elf_sym, &elf_addr, - NULL, NULL, NULL); - if (!name || !symbol_match(arg, elf_addr, &elf_sym, name)) - continue; - - struct drgn_symbol *sym = malloc(sizeof(*sym)); - if (!sym) - return DWARF_CB_ABORT; - drgn_symbol_from_elf(name, elf_addr, &elf_sym, sym); - if (!symbolp_vector_append(&arg->results, &sym)) { - drgn_symbol_destroy(sym); - return DWARF_CB_ABORT; - } + if (finder) { + finder->free = false; + } else { + finder = malloc(sizeof(*finder)); + if (!finder) + return &drgn_enomem; + finder->free = true; } - return DWARF_CB_OK; + finder->fn = fn; + finder->arg = arg; + finder->next = prog->symbol_finders; + prog->symbol_finders = finder; + return NULL; } -static struct drgn_error * -symbols_search(struct drgn_program *prog, struct symbols_search_arg *arg, - struct drgn_symbol ***syms_ret, size_t *count_ret) +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_add_symbol_finder(struct drgn_program *prog, + drgn_symbol_find_fn fn, + void *arg) { - struct drgn_error *err; - - symbolp_vector_init(&arg->results); - - /* - * When searching for addresses, we can identify the exact module to - * search. Otherwise we need to fall back to an exhaustive search. - */ - err = NULL; - if (arg->flags & SYMBOLS_SEARCH_ADDRESS) { - Dwfl_Module *module = dwfl_addrmodule(prog->dbinfo.dwfl, - arg->address); - if (module && symbols_search_cb(module, NULL, NULL, 0, arg)) - err = &drgn_enomem; - } else { - if (dwfl_getmodules(prog->dbinfo.dwfl, symbols_search_cb, arg, - 0)) - err = &drgn_enomem; - } - - if (err) { - vector_for_each(symbolp_vector, symbolp, &arg->results) - drgn_symbol_destroy(*symbolp); - symbolp_vector_deinit(&arg->results); - } else { - symbolp_vector_shrink_to_fit(&arg->results); - symbolp_vector_steal(&arg->results, syms_ret, count_ret); - } - return err; + return drgn_program_add_symbol_finder_impl(prog, NULL, fn, arg); } LIBDRGN_PUBLIC struct drgn_error * @@ -1916,11 +1840,17 @@ drgn_program_find_symbols_by_name(struct drgn_program *prog, const char *name, struct drgn_symbol ***syms_ret, size_t *count_ret) { - struct symbols_search_arg arg = { - .name = name, - .flags = name ? SYMBOLS_SEARCH_NAME : SYMBOLS_SEARCH_ALL, - }; - return symbols_search(prog, &arg, syms_ret, count_ret); + struct drgn_symbol_result_builder builder; + enum drgn_find_symbol_flags flags = name ? DRGN_FIND_SYMBOL_NAME : 0; + + drgn_symbol_result_builder_init(&builder, false); + struct drgn_error *err = drgn_program_symbols_search(prog, name, 0, + flags, &builder); + if (err) + drgn_symbol_result_builder_abort(&builder); + else + drgn_symbol_result_builder_array(&builder, syms_ret, count_ret); + return err; } LIBDRGN_PUBLIC struct drgn_error * @@ -1929,88 +1859,81 @@ drgn_program_find_symbols_by_address(struct drgn_program *prog, struct drgn_symbol ***syms_ret, size_t *count_ret) { - struct symbols_search_arg arg = { - .address = address, - .flags = SYMBOLS_SEARCH_ADDRESS, - }; - return symbols_search(prog, &arg, syms_ret, count_ret); -} + struct drgn_symbol_result_builder builder; + enum drgn_find_symbol_flags flags = DRGN_FIND_SYMBOL_ADDR; -struct find_symbol_by_name_arg { - const char *name; - GElf_Sym sym; - GElf_Addr addr; - bool found; - bool bad_symtabs; -}; + drgn_symbol_result_builder_init(&builder, false); + struct drgn_error *err = drgn_program_symbols_search(prog, NULL, address, + flags, &builder); + if (err) + drgn_symbol_result_builder_abort(&builder); + else + drgn_symbol_result_builder_array(&builder, syms_ret, count_ret); + return err; +} -static int find_symbol_by_name_cb(Dwfl_Module *dwfl_module, void **userdatap, - const char *module_name, Dwarf_Addr base, - void *cb_arg) +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_find_symbol_by_name(struct drgn_program *prog, + const char *name, struct drgn_symbol **ret) { - struct find_symbol_by_name_arg *arg = cb_arg; - int symtab_len = dwfl_module_getsymtab(dwfl_module); - if (symtab_len == -1) { - arg->bad_symtabs = true; - return DWARF_CB_OK; - } - /* - * Global symbols are after local symbols, so by iterating backwards we - * might find a global symbol faster. Ignore the zeroth null symbol. - */ - for (int i = symtab_len - 1; i > 0; i--) { - GElf_Sym sym; - GElf_Addr addr; - const char *name = dwfl_module_getsym_info(dwfl_module, i, &sym, - &addr, NULL, NULL, - NULL); - if (name && strcmp(arg->name, name) == 0) { - /* - * The order of precedence is - * GLOBAL = GNU_UNIQUE > WEAK > LOCAL = everything else - * - * If we found a global or unique symbol, return it - * immediately. If we found a weak symbol, then save it, - * which may overwrite a previously found weak or local - * symbol. Otherwise, save the symbol only if we haven't - * found another symbol. - */ - if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL || - GELF_ST_BIND(sym.st_info) == STB_GNU_UNIQUE || - GELF_ST_BIND(sym.st_info) == STB_WEAK || - !arg->found) { - arg->sym = sym; - arg->addr = addr; - arg->found = true; - } - if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL || - GELF_ST_BIND(sym.st_info) == STB_GNU_UNIQUE) - return DWARF_CB_ABORT; - } + struct drgn_symbol_result_builder builder; + enum drgn_find_symbol_flags flags = DRGN_FIND_SYMBOL_NAME | DRGN_FIND_SYMBOL_ONE; + + drgn_symbol_result_builder_init(&builder, true); + struct drgn_error *err = drgn_program_symbols_search(prog, name, 0, + flags, &builder); + if (err) { + drgn_symbol_result_builder_abort(&builder); + return err; } - return DWARF_CB_OK; + + if (!drgn_symbol_result_builder_count(&builder)) + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find symbol with name '%s'", name); + + *ret = drgn_symbol_result_builder_single(&builder); + return err; } LIBDRGN_PUBLIC struct drgn_error * -drgn_program_find_symbol_by_name(struct drgn_program *prog, - const char *name, struct drgn_symbol **ret) +drgn_program_find_symbol_by_address(struct drgn_program *prog, uint64_t address, + struct drgn_symbol **ret) { - struct find_symbol_by_name_arg arg = { - .name = name, - }; - dwfl_getmodules(prog->dbinfo.dwfl, find_symbol_by_name_cb, &arg, 0); - if (arg.found) { - struct drgn_symbol *sym = malloc(sizeof(*sym)); - if (!sym) - return &drgn_enomem; - drgn_symbol_from_elf(name, arg.addr, &arg.sym, sym); - *ret = sym; - return NULL; + struct drgn_symbol_result_builder builder; + enum drgn_find_symbol_flags flags = DRGN_FIND_SYMBOL_ADDR | DRGN_FIND_SYMBOL_ONE; + + drgn_symbol_result_builder_init(&builder, true); + struct drgn_error *err = drgn_program_symbols_search(prog, NULL, address, + flags, &builder); + + if (err) { + drgn_symbol_result_builder_abort(&builder); + return err; } - return drgn_error_format(DRGN_ERROR_LOOKUP, - "could not find symbol with name '%s'%s", name, - arg.bad_symtabs ? - " (could not get some symbol tables)" : ""); + + if (!drgn_symbol_result_builder_count(&builder)) + return drgn_error_symbol_not_found(address); + + *ret = drgn_symbol_result_builder_single(&builder); + return err; +} + +struct drgn_error * +drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, + uint64_t address, + struct drgn_symbol **ret) +{ + struct drgn_symbol_result_builder builder; + enum drgn_find_symbol_flags flags = DRGN_FIND_SYMBOL_ADDR | DRGN_FIND_SYMBOL_ONE; + + drgn_symbol_result_builder_init(&builder, true); + struct drgn_error *err = drgn_program_symbols_search(prog, NULL, address, + flags, &builder); + if (err) + drgn_symbol_result_builder_abort(&builder); + else + *ret = drgn_symbol_result_builder_single(&builder); + return err; } LIBDRGN_PUBLIC struct drgn_error * diff --git a/libdrgn/program.h b/libdrgn/program.h index 8b30e052f..2f6c1f5de 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -27,6 +27,7 @@ #include "object_index.h" #include "platform.h" #include "pp.h" +#include "symbol.h" #include "type.h" #include "vector.h" @@ -109,6 +110,7 @@ struct drgn_program { */ struct drgn_object_index oindex; struct drgn_debug_info dbinfo; + struct drgn_symbol_finder *symbol_finders; /* * Program information. @@ -351,19 +353,22 @@ struct drgn_error *drgn_program_cache_prstatus_entry(struct drgn_program *prog, uint32_t *ret); /* - * Like @ref drgn_program_find_symbol_by_address(), but @p ret is already - * allocated, we may already know the module, and doesn't return a @ref - * drgn_error. + * Like @ref drgn_program_find_symbol_by_address(), but returns @c NULL rather + * than a lookup error if the symbol was not found. * - * @param[in] module Module containing the address. May be @c NULL, in which - * case this will look it up. - * @return Whether the symbol was found. + * @param[in] address Address to search for. + * @param [out] ret The symbol found by the lookup (if found) + * @return @c NULL unless an error (unrelated to a lookup error) was encountered */ -bool drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, - uint64_t address, - Dwfl_Module *module, - struct drgn_symbol *ret); +struct drgn_error * +drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, + uint64_t address, + struct drgn_symbol **ret); +struct drgn_error * +drgn_program_add_symbol_finder_impl(struct drgn_program *prog, + struct drgn_symbol_finder *finder, + drgn_symbol_find_fn fn, void *arg); /** * Call before a blocking (I/O or long-running) operation. * diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index d3c3e8505..b44f9a26f 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -109,6 +109,11 @@ typedef struct { PyObject *attr_cache; } DrgnType; +typedef struct { + PyObject_HEAD + struct kallsyms_finder *finder; +} KallsymsFinder; + typedef struct { PyObject_HEAD /* @@ -173,7 +178,7 @@ typedef struct { typedef struct { PyObject_HEAD - Program *prog; + PyObject *name_obj; /* object owning the reference to the symbol name */ struct drgn_symbol *sym; } Symbol; @@ -225,6 +230,7 @@ extern PyObject *TypeKind_class; extern PyTypeObject DrgnObject_type; extern PyTypeObject DrgnType_type; extern PyTypeObject FaultError_type; +extern PyTypeObject KallsymsFinder_type; extern PyTypeObject Language_type; extern PyTypeObject ObjectIterator_type; extern PyTypeObject Platform_type; @@ -288,7 +294,9 @@ Program *program_from_core_dump(PyObject *self, PyObject *args, PyObject *kwds); Program *program_from_kernel(PyObject *self); Program *program_from_pid(PyObject *self, PyObject *args, PyObject *kwds); -PyObject *Symbol_wrap(struct drgn_symbol *sym, Program *prog); +PyObject *Symbol_wrap(struct drgn_symbol *sym, PyObject *name_obj); +PyObject *Symbol_list_wrap(struct drgn_symbol **symbols, size_t count, + Program *prog); PyObject *Thread_wrap(struct drgn_thread *drgn_thread); diff --git a/libdrgn/python/kallsyms_finder.c b/libdrgn/python/kallsyms_finder.c new file mode 100644 index 000000000..35a32dbad --- /dev/null +++ b/libdrgn/python/kallsyms_finder.c @@ -0,0 +1,147 @@ +// Copyright (c) 2023 Oracle and/or its affiliates +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "drgn.h" +#include "drgnpy.h" +#include "kallsyms.h" +#include "modsupport.h" +#include "pyerrors.h" +#include "symbol.h" + +static void KallsymsFinder_dealloc(KallsymsFinder *self) +{ + /* This can't be called if the finder has been added to the program. The + * program should take a reference and prevent deallocation. */ + drgn_kallsyms_destroy(self->finder); + free(self->finder); + Py_TYPE(self)->tp_free((PyObject *)self); +} + + +static PyObject *KallsymsFinder_repr(KallsymsFinder *self) +{ + return (PyObject *)PyUnicode_FromString("KallsymsFinder()"); +} + +static PyObject *KallsymsFinder_call(KallsymsFinder *self, PyObject *args, PyObject *kwargs) +{ + PyObject *address_obj, *name_obj; + uint64_t address = 0; + const char *name = NULL; + static char *kwnames[] = {"name", "address", "one", NULL}; + unsigned int flags = 0; + bool single; + struct drgn_symbol_result_builder builder; + struct drgn_error *err; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOp:__call__", kwnames, + &name_obj, &address_obj, &single)) + return NULL; + + flags |= single ? DRGN_FIND_SYMBOL_ONE : 0; + + if (address_obj != Py_None) { + if (!PyLong_Check(address_obj)) { + PyErr_SetString(PyExc_TypeError, "address: an integer is required"); + return NULL; + } + flags |= DRGN_FIND_SYMBOL_ADDR; + address = PyLong_AsUint64(address_obj); + /* Overflow check */ + if (PyErr_Occurred()) + return NULL; + } + if (name_obj != Py_None) { + if (!PyUnicode_Check(name_obj)) { + PyErr_SetString(PyExc_TypeError, "name: a string is required"); + return NULL; + } + flags |= DRGN_FIND_SYMBOL_NAME; + name = PyUnicode_AsUTF8(name_obj); + } + + drgn_symbol_result_builder_init(&builder, flags & DRGN_FIND_SYMBOL_ONE); + + err = drgn_kallsyms_symbol_finder(name, address, flags, self->finder, &builder); + if (err) + goto error; + + /* We return a list regardless */ + if (single) { + _cleanup_pydecref_ PyObject *list = PyList_New(1); + if (!list) + goto error; + struct drgn_symbol* symbol = drgn_symbol_result_builder_single(&builder); + PyObject *prog_obj = (PyObject *)container_of(self->finder->prog, Program, prog); + PyObject *pysym = Symbol_wrap(symbol, prog_obj); + if (!pysym) + goto error; + PyList_SET_ITEM(list, 0, pysym); + return_ptr(list); + } else { + struct drgn_symbol **syms; + size_t count; + drgn_symbol_result_builder_array(&builder, &syms, &count); + return Symbol_list_wrap(syms, count, + container_of(self->finder->prog, Program, prog)); + } + + return NULL; +error: + drgn_symbol_result_builder_abort(&builder); + return err ? set_drgn_error(err) : NULL; +} + +static PyObject *KallsymsFinder_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) +{ + static char *kwnames[] = {"prog", "names", "token_table", "token_index", "num_syms", + "offsets", "relative_base", "addresses", "_stext", NULL}; + struct kallsyms_locations kl; + PyObject *prog_obj; + struct drgn_program *prog; + struct drgn_error *err; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OKKKKKKKK", kwnames, + &prog_obj, &kl.kallsyms_names, &kl.kallsyms_token_table, + &kl.kallsyms_token_index, &kl.kallsyms_num_syms, + &kl.kallsyms_offsets, &kl.kallsyms_relative_base, + &kl.kallsyms_addresses, &kl._stext)) + return NULL; + + if (!PyObject_TypeCheck(prog_obj, &Program_type)) + return PyErr_Format(PyExc_TypeError, "expected Program, not %s", + Py_TYPE(prog_obj)->tp_name); + + prog = &((Program *)prog_obj)->prog; + + struct kallsyms_finder *finder = calloc(1, sizeof(*finder)); + if (!finder) + return set_drgn_error(&drgn_enomem); + err = drgn_kallsyms_init(finder, prog, &kl); + if (err) + goto out; + + KallsymsFinder *finder_obj = call_tp_alloc(KallsymsFinder); + if (!finder_obj) { + drgn_kallsyms_destroy(finder); + goto out; + } + finder_obj->finder = finder; + Py_INCREF(prog_obj); + return (PyObject *)finder_obj; +out: + free(finder); + return set_drgn_error(err); +} + +PyTypeObject KallsymsFinder_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.KallsymsFinder", + .tp_basicsize = sizeof(KallsymsFinder), + .tp_dealloc = (destructor)KallsymsFinder_dealloc, + .tp_repr = (reprfunc)KallsymsFinder_repr, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_KallsymsFinder_DOC, + .tp_call = (ternaryfunc)KallsymsFinder_call, + .tp_new = KallsymsFinder_new, +}; diff --git a/libdrgn/python/main.c b/libdrgn/python/main.c index 2bc92517d..180e43055 100644 --- a/libdrgn/python/main.c +++ b/libdrgn/python/main.c @@ -264,6 +264,7 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) }) if (add_module_constants(m) || + add_type(m, &KallsymsFinder_type) || add_type(m, &Language_type) || add_languages() || add_type(m, &DrgnObject_type) || PyType_Ready(&ObjectIterator_type) || diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index af62218c4..0008acd1b 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -10,6 +10,7 @@ #include "../string_builder.h" #include "../util.h" #include "../vector.h" +#include "kallsyms.h" DEFINE_HASH_SET_FUNCTIONS(pyobjectp_set, ptr_key_hash_pair, scalar_key_eq); @@ -477,6 +478,70 @@ static struct drgn_error *py_object_find_fn(const char *name, size_t name_len, return drgn_object_copy(ret, &((DrgnObject *)obj)->obj); } +static struct drgn_error *py_symbol_find_fn(const char *name, uint64_t addr, + enum drgn_find_symbol_flags flags, + void *data, struct drgn_symbol_result_builder *builder) +{ + PyGILState_guard(); + + _cleanup_pydecref_ PyObject *name_obj = NULL; + if (flags & DRGN_FIND_SYMBOL_NAME) { + name_obj = PyUnicode_FromString(name); + if (!name_obj) + return drgn_error_from_python(); + } else { + name_obj = Py_None; + Py_INCREF(name_obj); + } + + _cleanup_pydecref_ PyObject *address_obj = NULL; + if (flags & DRGN_FIND_SYMBOL_ADDR) { + address_obj = PyLong_FromUnsignedLong(addr); + if (!address_obj) + return drgn_error_from_python(); + } else { + address_obj = Py_None; + Py_INCREF(address_obj); + } + + _cleanup_pydecref_ PyObject *one_obj = PyBool_FromLong(flags & DRGN_FIND_SYMBOL_ONE); + + _cleanup_pydecref_ PyObject *tmp = PyObject_CallFunction(data, "OOO", name_obj, + address_obj, one_obj); + if (!tmp) + return drgn_error_from_python(); + + _cleanup_pydecref_ PyObject *obj = + PySequence_Fast(tmp, "symbol finder must return a sequence"); + if (!obj) + return drgn_error_from_python(); + + size_t len = PySequence_Fast_GET_SIZE(obj); + if (len > 1 && (flags & DRGN_FIND_SYMBOL_ONE)) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "symbol finder returned multiple elements, but one was requested"); + } + + for (size_t i = 0; i < len; i++) { + PyObject *item = PySequence_Fast_GET_ITEM(obj, i); + if (!PyObject_TypeCheck(item, &Symbol_type)) + return drgn_error_create(DRGN_ERROR_TYPE, + "symbol finder results must be of type Symbol"); + _cleanup_free_ struct drgn_symbol *sym = malloc(sizeof(*sym)); + if (!sym) + return &drgn_enomem; + struct drgn_error *err = drgn_symbol_copy(sym, ((Symbol *)item)->sym); + if (err) + return err; + + if (!drgn_symbol_result_builder_add(builder, sym)) + return &drgn_enomem; + sym = NULL; // owned by the builder now + } + + return NULL; +} + static PyObject *Program_add_object_finder(Program *self, PyObject *args, PyObject *kwds) { @@ -506,6 +571,41 @@ static PyObject *Program_add_object_finder(Program *self, PyObject *args, Py_RETURN_NONE; } +static PyObject *Program_add_symbol_finder(Program *self, PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"fn", NULL}; + struct drgn_error *err; + PyObject *fn; + int ret; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:add_symbol_finder", + keywords, &fn)) + return NULL; + + if (!PyCallable_Check(fn)) { + PyErr_SetString(PyExc_TypeError, "fn must be callable"); + return NULL; + } + + ret = Program_hold_object(self, fn); + if (ret == -1) + return NULL; + + /* Fast path for the builtin kallsyms finder, avoidng Python object + * allocation overhead */ + if (PyObject_TypeCheck(fn, &KallsymsFinder_type)) + err = drgn_program_add_symbol_finder(&self->prog, + drgn_kallsyms_symbol_finder, + ((KallsymsFinder *)fn)->finder); + else + err = drgn_program_add_symbol_finder(&self->prog, py_symbol_find_fn, + fn); + if (err) + return set_drgn_error(err); + Py_RETURN_NONE; +} + static PyObject *Program_set_core_dump(Program *self, PyObject *args, PyObject *kwds) { @@ -894,23 +994,7 @@ static PyObject *Program_symbols(Program *self, PyObject *args) if (err) return set_drgn_error(err); - _cleanup_pydecref_ PyObject *list = PyList_New(count); - if (!list) { - drgn_symbols_destroy(symbols, count); - return NULL; - } - for (size_t i = 0; i < count; i++) { - PyObject *pysym = Symbol_wrap(symbols[i], self); - if (!pysym) { - /* Free symbols which aren't yet added to list. */ - drgn_symbols_destroy(symbols, count); - return NULL; - } - symbols[i] = NULL; - PyList_SET_ITEM(list, i, pysym); - } - free(symbols); - return_ptr(list); + return Symbol_list_wrap(symbols, count, self); } static PyObject *Program_symbol(Program *self, PyObject *arg) @@ -936,7 +1020,7 @@ static PyObject *Program_symbol(Program *self, PyObject *arg) } if (err) return set_drgn_error(err); - ret = Symbol_wrap(sym, self); + ret = Symbol_wrap(sym, (PyObject *)self); if (!ret) { drgn_symbol_destroy(sym); return NULL; @@ -1120,6 +1204,8 @@ static PyMethodDef Program_methods[] = { METH_VARARGS | METH_KEYWORDS, drgn_Program_add_type_finder_DOC}, {"add_object_finder", (PyCFunction)Program_add_object_finder, METH_VARARGS | METH_KEYWORDS, drgn_Program_add_object_finder_DOC}, + {"add_symbol_finder", (PyCFunction)Program_add_symbol_finder, + METH_VARARGS | METH_KEYWORDS, drgn_Program_add_symbol_finder_DOC}, {"set_core_dump", (PyCFunction)Program_set_core_dump, METH_VARARGS | METH_KEYWORDS, drgn_Program_set_core_dump_DOC}, {"set_kernel", (PyCFunction)Program_set_kernel, METH_NOARGS, diff --git a/libdrgn/python/stack_trace.c b/libdrgn/python/stack_trace.c index 3e43b182a..1112a8092 100644 --- a/libdrgn/python/stack_trace.c +++ b/libdrgn/python/stack_trace.c @@ -209,7 +209,7 @@ static PyObject *StackFrame_symbol(StackFrame *self) err = drgn_stack_frame_symbol(self->trace->trace, self->i, &sym); if (err) return set_drgn_error(err); - PyObject *ret = Symbol_wrap(sym, prog); + PyObject *ret = Symbol_wrap(sym, (PyObject *)prog); if (!ret) { drgn_symbol_destroy(sym); return NULL; diff --git a/libdrgn/python/symbol.c b/libdrgn/python/symbol.c index ea0d98cb3..53946a5a1 100644 --- a/libdrgn/python/symbol.c +++ b/libdrgn/python/symbol.c @@ -3,23 +3,85 @@ #include +#include "drgn.h" #include "drgnpy.h" +#include "symbol.h" -PyObject *Symbol_wrap(struct drgn_symbol *sym, Program *prog) +PyObject *Symbol_wrap(struct drgn_symbol *sym, PyObject *name_obj) { Symbol *ret = call_tp_alloc(Symbol); if (ret) { ret->sym = sym; - ret->prog = prog; - Py_INCREF(prog); + ret->name_obj = name_obj; + Py_XINCREF(name_obj); } return (PyObject *)ret; } +PyObject *Symbol_list_wrap(struct drgn_symbol **symbols, size_t count, + Program *prog) +{ + _cleanup_pydecref_ PyObject *list = PyList_New(count); + if (!list) { + drgn_symbols_destroy(symbols, count); + return NULL; + } + for (size_t i = 0; i < count; i++) { + PyObject *pysym = Symbol_wrap(symbols[i], (PyObject *)prog); + if (!pysym) { + /* Free symbols which aren't yet added to list. */ + drgn_symbols_destroy(symbols, count); + /* Free list and all symbols already added. */ + return NULL; + } + symbols[i] = NULL; + PyList_SET_ITEM(list, i, pysym); + } + free(symbols); + return_ptr(list); +} + +static PyObject *Symbol_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) +{ + struct drgn_symbol *sym; + static char *keywords[] = {"name", "address", "size", "binding", "kind", NULL}; + PyObject *name_obj; + struct index_arg address = {}, size = {}; + struct enum_arg binding = { + .type = SymbolBinding_class, + }; + struct enum_arg kind = { + .type = SymbolKind_class, + }; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&O&O&O&:Symbol", keywords, + &PyUnicode_Type, &name_obj, + index_converter, &address, + index_converter, &size, + enum_converter, &binding, + enum_converter, &kind)) + return NULL; + + const char *name = PyUnicode_AsUTF8(name_obj); + if (!name) + return NULL; + + struct drgn_error *err = drgn_symbol_create( + name, address.uvalue,size.uvalue, binding.value, kind.value, + DRGN_LIFETIME_EXTERNAL, &sym); + if (err) + return set_drgn_error(err); + + PyObject *ret = Symbol_wrap(sym, name_obj); + if (!ret) + drgn_symbol_destroy(sym); + return ret; +} + static void Symbol_dealloc(Symbol *self) { drgn_symbol_destroy(self->sym); - Py_XDECREF(self->prog); + Py_XDECREF(self->name_obj); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -100,4 +162,5 @@ PyTypeObject Symbol_type = { .tp_doc = drgn_Symbol_DOC, .tp_richcompare = (richcmpfunc)Symbol_richcompare, .tp_getset = Symbol_getset, + .tp_new = Symbol_new, }; diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index 75f85036f..7cb2032f6 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -109,6 +109,7 @@ drgn_stack_trace_num_frames(struct drgn_stack_trace *trace) LIBDRGN_PUBLIC struct drgn_error * drgn_format_stack_trace(struct drgn_stack_trace *trace, char **ret) { + struct drgn_error *err; STRING_BUILDER(str); for (size_t frame = 0; frame < trace->num_frames; frame++) { if (!string_builder_appendf(&str, "#%-2zu ", frame)) @@ -121,19 +122,19 @@ drgn_format_stack_trace(struct drgn_stack_trace *trace, char **ret) if (!string_builder_append(&str, name)) return &drgn_enomem; } else if ((pc = drgn_register_state_get_pc(regs)).has_value) { - Dwfl_Module *dwfl_module = - regs->module ? regs->module->dwfl_module : NULL; - struct drgn_symbol sym; - if (dwfl_module && - drgn_program_find_symbol_by_address_internal(trace->prog, - pc.value - !regs->interrupted, - dwfl_module, - &sym)) { + _cleanup_symbol_ struct drgn_symbol *sym = NULL; + err = drgn_program_find_symbol_by_address_internal(trace->prog, + pc.value - !regs->interrupted, + &sym); + if (err) + return err; + + if (sym) { if (!string_builder_appendf(&str, "%s+0x%" PRIx64 "/0x%" PRIx64, - sym.name, - pc.value - sym.address, - sym.size)) + sym->name, + pc.value - sym->address, + sym->size)) return &drgn_enomem; } else { if (!string_builder_appendf(&str, "0x%" PRIx64, @@ -173,6 +174,7 @@ drgn_format_stack_frame(struct drgn_stack_trace *trace, size_t frame, char **ret { STRING_BUILDER(str); struct drgn_register_state *regs = trace->frames[frame].regs; + struct drgn_error *err; if (!string_builder_appendf(&str, "#%zu at ", frame)) return &drgn_enomem; @@ -181,17 +183,15 @@ drgn_format_stack_frame(struct drgn_stack_trace *trace, size_t frame, char **ret if (!string_builder_appendf(&str, "%#" PRIx64, pc.value)) return &drgn_enomem; - Dwfl_Module *dwfl_module = - regs->module ? regs->module->dwfl_module : NULL; - struct drgn_symbol sym; - if (dwfl_module && - drgn_program_find_symbol_by_address_internal(trace->prog, - pc.value - !regs->interrupted, - dwfl_module, - &sym) && - !string_builder_appendf(&str, " (%s+0x%" PRIx64 "/0x%" PRIx64 ")", - sym.name, pc.value - sym.address, - sym.size)) + _cleanup_symbol_ struct drgn_symbol *sym; + err = drgn_program_find_symbol_by_address_internal(trace->prog, + pc.value - !regs->interrupted, + &sym); + if (err) + return err; + if (sym && !string_builder_appendf(&str, " (%s+0x%" PRIx64 "/0x%" PRIx64 ")", + sym->name, pc.value - sym->address, + sym->size)) return &drgn_enomem; } else { if (!string_builder_append(&str, "???")) @@ -368,17 +368,15 @@ drgn_stack_frame_symbol(struct drgn_stack_trace *trace, size_t frame, "program counter is not known at stack frame"); } pc.value -= !regs->interrupted; - Dwfl_Module *dwfl_module = - regs->module ? regs->module->dwfl_module : NULL; - if (!dwfl_module) - return drgn_error_symbol_not_found(pc.value); - _cleanup_free_ struct drgn_symbol *sym = malloc(sizeof(*sym)); + struct drgn_symbol *sym = NULL; + struct drgn_error *err; + err = drgn_program_find_symbol_by_address_internal(trace->prog, pc.value, + &sym); + if (err) + return err; if (!sym) - return &drgn_enomem; - if (!drgn_program_find_symbol_by_address_internal(trace->prog, pc.value, - dwfl_module, sym)) return drgn_error_symbol_not_found(pc.value); - *ret = no_cleanup_ptr(sym); + *ret = sym; return NULL; } diff --git a/libdrgn/symbol.c b/libdrgn/symbol.c index 89c92532b..581fdc265 100644 --- a/libdrgn/symbol.c +++ b/libdrgn/symbol.c @@ -11,6 +11,11 @@ LIBDRGN_PUBLIC void drgn_symbol_destroy(struct drgn_symbol *sym) { + if (sym && sym->name_lifetime == DRGN_LIFETIME_OWNED) + /* Cast here is necessary - we want symbol users to + * never modify sym->name, but when we own the name, + * we must modify it by freeing it. */ + free((char *)sym->name); free(sym); } @@ -26,6 +31,7 @@ void drgn_symbol_from_elf(const char *name, uint64_t address, const GElf_Sym *elf_sym, struct drgn_symbol *ret) { ret->name = name; + ret->name_lifetime = DRGN_LIFETIME_STATIC; ret->address = address; ret->size = elf_sym->st_size; int binding = GELF_ST_BIND(elf_sym->st_info); @@ -40,6 +46,43 @@ void drgn_symbol_from_elf(const char *name, uint64_t address, ret->kind = DRGN_SYMBOL_KIND_UNKNOWN; } +struct drgn_error * +drgn_symbol_copy(struct drgn_symbol *dst, struct drgn_symbol *src) +{ + if (src->name_lifetime == DRGN_LIFETIME_STATIC) { + dst->name = src->name; + dst->name_lifetime = DRGN_LIFETIME_STATIC; + } else { + dst->name = strdup(src->name); + if (!dst->name) + return &drgn_enomem; + dst->name_lifetime = DRGN_LIFETIME_OWNED; + } + dst->address = src->address; + dst->size = src->size; + dst->kind = src->kind; + dst->binding = src->binding; + return NULL; +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_symbol_create(const char *name, uint64_t address, uint64_t size, + enum drgn_symbol_binding binding, enum drgn_symbol_kind kind, + enum drgn_lifetime name_lifetime, struct drgn_symbol **ret) +{ + struct drgn_symbol *sym = malloc(sizeof(*sym)); + if (!sym) + return &drgn_enomem; + sym->name = name; + sym->address = address; + sym->size = size; + sym->binding = binding; + sym->kind = kind; + sym->name_lifetime = name_lifetime; + *ret = sym; + return NULL; +} + LIBDRGN_PUBLIC const char *drgn_symbol_name(struct drgn_symbol *sym) { return sym->name; @@ -73,3 +116,61 @@ LIBDRGN_PUBLIC bool drgn_symbol_eq(struct drgn_symbol *a, struct drgn_symbol *b) a->size == b->size && a->binding == b->binding && a->kind == b->kind); } + +DEFINE_VECTOR_FUNCTIONS(symbolp_vector); + +LIBDRGN_PUBLIC bool +drgn_symbol_result_builder_add(struct drgn_symbol_result_builder *builder, + struct drgn_symbol *symbol) +{ + if (builder->one) { + if (builder->single) + drgn_symbol_destroy(builder->single); + builder->single = symbol; + } else if (!symbolp_vector_append(&builder->vector, &symbol)) { + return false; + } + return true; +} + +LIBDRGN_PUBLIC size_t +drgn_symbol_result_builder_count(const struct drgn_symbol_result_builder *builder) +{ + if (builder->one) + return builder->single ? 1 : 0; + else + return symbolp_vector_size(&builder->vector); +} + +void drgn_symbol_result_builder_init(struct drgn_symbol_result_builder *builder, + bool one) +{ + memset(builder, 0, sizeof(*builder)); + builder->one = one; + if (!one) + symbolp_vector_init(&builder->vector); +} + +void drgn_symbol_result_builder_abort(struct drgn_symbol_result_builder *builder) +{ + if (builder->one) { + drgn_symbol_destroy(builder->single); + } else { + vector_for_each(symbolp_vector, symbolp, &builder->vector) + drgn_symbol_destroy(*symbolp); + symbolp_vector_deinit(&builder->vector); + } +} + +struct drgn_symbol * +drgn_symbol_result_builder_single(struct drgn_symbol_result_builder *builder) +{ + return builder->single; +} + +void drgn_symbol_result_builder_array(struct drgn_symbol_result_builder *builder, + struct drgn_symbol ***syms_ret, size_t *count_ret) +{ + symbolp_vector_shrink_to_fit(&builder->vector); + symbolp_vector_steal(&builder->vector, syms_ret, count_ret); +} diff --git a/libdrgn/symbol.h b/libdrgn/symbol.h index e136e86ee..b2e880af4 100644 --- a/libdrgn/symbol.h +++ b/libdrgn/symbol.h @@ -6,7 +6,9 @@ #include +#include "cleanup.h" #include "drgn.h" +#include "vector.h" struct drgn_symbol { const char *name; @@ -14,10 +16,52 @@ struct drgn_symbol { uint64_t size; enum drgn_symbol_binding binding; enum drgn_symbol_kind kind; + enum drgn_lifetime name_lifetime; }; +struct drgn_symbol_finder { + drgn_symbol_find_fn fn; + void *arg; + struct drgn_symbol_finder *next; + bool free; +}; + +DEFINE_VECTOR_TYPE(symbolp_vector, struct drgn_symbol *); + +struct drgn_symbol_result_builder { + bool one; + union { + struct symbolp_vector vector; + struct drgn_symbol *single; + }; +}; + +#define _cleanup_symbol_ _cleanup_(drgn_symbol_cleanup) +static inline void drgn_symbol_cleanup(struct drgn_symbol **p) +{ + drgn_symbol_destroy(*p); +} + /** Initialize a @ref drgn_symbol from an ELF symbol. */ void drgn_symbol_from_elf(const char *name, uint64_t address, const GElf_Sym *elf_sym, struct drgn_symbol *ret); +/** Destroy the contents of the result builder */ +void drgn_symbol_result_builder_abort(struct drgn_symbol_result_builder *builder); + +/** Initialize result builder */ +void drgn_symbol_result_builder_init(struct drgn_symbol_result_builder *builder, + bool one); + +/** Return single result */ +struct drgn_symbol * +drgn_symbol_result_builder_single(struct drgn_symbol_result_builder *builder); + +/** Return array result */ +void drgn_symbol_result_builder_array(struct drgn_symbol_result_builder *builder, + struct drgn_symbol ***syms_ret, size_t *count_ret); + +struct drgn_error * +drgn_symbol_copy(struct drgn_symbol *dst, struct drgn_symbol *src); + #endif /* DRGN_SYMBOL_H */ diff --git a/tests/test_symbol.py b/tests/test_symbol.py index 3ff949325..72c35af1d 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -1,9 +1,8 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later import tempfile -from typing import NamedTuple -from drgn import Program, SymbolBinding, SymbolKind +from drgn import Program, Symbol, SymbolBinding, SymbolKind from tests import TestCase from tests.dwarfwriter import dwarf_sections from tests.elf import ET, PT, SHT, STB, STT @@ -45,35 +44,13 @@ def elf_symbol_program(*modules): return prog -# We don't want to support creating drgn.Symbol instances yet, so use this dumb -# class for testing. -class Symbol(NamedTuple): - name: str - address: int - size: int - binding: SymbolBinding - kind: SymbolKind - - class TestElfSymbol(TestCase): - def assert_symbol_equal(self, drgn_symbol, symbol): - self.assertEqual( - Symbol( - drgn_symbol.name, - drgn_symbol.address, - drgn_symbol.size, - drgn_symbol.binding, - drgn_symbol.kind, - ), - symbol, - ) - def assert_symbols_equal_unordered(self, drgn_symbols, symbols): self.assertEqual(len(drgn_symbols), len(symbols)) drgn_symbols = sorted(drgn_symbols, key=lambda x: (x.address, x.name)) symbols = sorted(symbols, key=lambda x: (x.address, x.name)) for drgn_symbol, symbol in zip(drgn_symbols, symbols): - self.assert_symbol_equal(drgn_symbol, symbol) + self.assertEqual(drgn_symbol, symbol) def test_by_address(self): elf_first = ElfSymbol("first", 0xFFFF0000, 0x8, STT.OBJECT, STB.LOCAL) @@ -91,13 +68,13 @@ def test_by_address(self): prog = elf_symbol_program(*modules) self.assertRaises(LookupError, prog.symbol, 0xFFFEFFFF) self.assertEqual(prog.symbols(0xFFFEFFFF), []) - self.assert_symbol_equal(prog.symbol(0xFFFF0000), first) + self.assertEqual(prog.symbol(0xFFFF0000), first) self.assert_symbols_equal_unordered(prog.symbols(0xFFFF0000), [first]) - self.assert_symbol_equal(prog.symbol(0xFFFF0004), first) + self.assertEqual(prog.symbol(0xFFFF0004), first) self.assert_symbols_equal_unordered(prog.symbols(0xFFFF0004), [first]) - self.assert_symbol_equal(prog.symbol(0xFFFF0008), second) + self.assertEqual(prog.symbol(0xFFFF0008), second) self.assert_symbols_equal_unordered(prog.symbols(0xFFFF0008), [second]) - self.assert_symbol_equal(prog.symbol(0xFFFF000C), second) + self.assertEqual(prog.symbol(0xFFFF000C), second) self.assert_symbols_equal_unordered(prog.symbols(0xFFFF000C), [second]) self.assertRaises(LookupError, prog.symbol, 0xFFFF0010) @@ -171,8 +148,8 @@ def test_by_name(self): for modules in same_module, different_modules: with self.subTest(modules=len(modules)): prog = elf_symbol_program(*modules) - self.assert_symbol_equal(prog.symbol("first"), first) - self.assert_symbol_equal(prog.symbol("second"), second) + self.assertEqual(prog.symbol("first"), first) + self.assertEqual(prog.symbol("second"), second) self.assertRaises(LookupError, prog.symbol, "third") self.assert_symbols_equal_unordered(prog.symbols("first"), [first]) @@ -258,7 +235,7 @@ def test_kind(self): (ElfSymbol("foo", 0xFFFF0000, 1, elf_type, STB.GLOBAL),) ) symbol = Symbol("foo", 0xFFFF0000, 1, SymbolBinding.GLOBAL, drgn_kind) - self.assert_symbol_equal(prog.symbol("foo"), symbol) + self.assertEqual(prog.symbol("foo"), symbol) symbols = prog.symbols("foo") self.assert_symbols_equal_unordered(symbols, [symbol]) @@ -286,3 +263,83 @@ def test_all_symbols(self): ] prog = elf_symbol_program(*elf_syms) self.assert_symbols_equal_unordered(prog.symbols(), syms) + + +class TestSymbolFinder(TestCase): + TEST_SYMS = [ + Symbol("one", 0xFFFF1000, 16, SymbolBinding.LOCAL, SymbolKind.FUNC), + Symbol("two", 0xFFFF2000, 16, SymbolBinding.GLOBAL, SymbolKind.FUNC), + Symbol("three", 0xFFFF2008, 8, SymbolBinding.GLOBAL, SymbolKind.FUNC), + ] + + def finder(self, arg_name, arg_address, arg_one): + self.called = True + res = [] + self.assertEqual(self.expected_name, arg_name) + self.assertEqual(self.expected_address, arg_address) + self.assertEqual(self.expected_one, arg_one) + for sym in self.TEST_SYMS: + if arg_name and sym.name == arg_name: + res.append(sym) + elif arg_address and sym.address <= arg_address < sym.address + sym.size: + res.append(sym) + elif not arg_name and not arg_address: + res.append(sym) + + # This symbol finder intentionally has a bug: it does not respect the + # "arg_one" flag: it may return multiple symbols even when "arg_one" is + # true. + return res + + def setUp(self): + self.prog = Program() + self.prog.add_symbol_finder(self.finder) + self.called = False + + def expect_args(self, name, address, one): + self.expected_name = name + self.expected_address = address + self.expected_one = one + + def test_args_single_string(self): + self.expect_args("search_symbol", None, True) + with self.assertRaises(LookupError): + self.prog.symbol("search_symbol") + self.assertTrue(self.called) + + def test_args_single_int(self): + self.expect_args(None, 0xFF00, True) + with self.assertRaises(LookupError): + self.prog.symbol(0xFF00) + self.assertTrue(self.called) + + def test_args_single_with_many_results(self): + self.expect_args(None, 0xFFFF2008, True) + with self.assertRaises(ValueError): + self.prog.symbol(0xFFFF2008) + self.assertTrue(self.called) + + def test_single_with_result(self): + self.expect_args("one", None, True) + self.assertEqual(self.prog.symbol("one"), self.TEST_SYMS[0]) + self.assertTrue(self.called) + + def test_args_many_string(self): + self.expect_args("search_symbol", None, False) + self.assertEqual(self.prog.symbols("search_symbol"), []) + self.assertTrue(self.called) + + def test_args_many_int(self): + self.expect_args(None, 0xFF00, False) + self.assertEqual(self.prog.symbols(0xFF00), []) + self.assertTrue(self.called) + + def test_many_with_result(self): + self.expect_args(None, 0xFFFF2004, False) + self.assertEqual(self.prog.symbols(0xFFFF2004), [self.TEST_SYMS[1]]) + self.assertTrue(self.called) + + def test_many_without_filter(self): + self.expect_args(None, None, False) + self.assertEqual(self.prog.symbols(), self.TEST_SYMS) + self.assertTrue(self.called)