Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Draft] Kallsyms Symbol Finder #351

Closed
wants to merge 10 commits into from
94 changes: 94 additions & 0 deletions _drgn.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,33 @@ class Program:
return an :class:`Object` or ``None`` if not found.
"""
...
def add_symbol_finder(
self, fn: Callable[[Optional[str], Optional[int], bool], Sequence[Symbol]]
) -> None:
"""
Register a callback for finding symbols in the program.

The callback should take three arguments: a search name, a search
address, and a boolean flag 'one' indicating whether to return only
the single best match. When the 'one' flag is True, the callback should
return a list containing at most one :class:`Symbol`. When the flag is
False, the callback should return a list of all matching
:class:`Symbol`\\ s. Both the name and address arguments are optional.
If both are provided, then the result(s) should match both. If neither
are provided, the finder should return all available symbols. If no
result is found, the return should be an empty list.

Callbacks are called in reverse order of the order they were added
(i.e,, the most recently added callback is called first). When the
'one' flag is set, the search will short-circuit after the first
finder which returns a result, and subsequent finders will not be
called. Otherwise, all callbacks will be called, and all results will be
returned.

:param fn: Callable taking name, address, and 'one' flag, and
returning a sequence of :class:`Symbol`\\ s.
"""
...
def set_core_dump(self, path: Union[Path, int]) -> None:
"""
Set the program to a core dump.
Expand Down Expand Up @@ -1585,6 +1612,73 @@ class Symbol:
kind: Final[SymbolKind]
"""Kind of entity represented by this symbol."""

class KallsymsFinder:
"""
A symbol finder which uses vmlinux kallsyms data
"""

def __init__(
self,
prog: Program,
kallsyms_names: int,
kallsyms_token_table: int,
kallsyms_token_index: int,
kallsyms_num_syms: int,
kallsyms_offsets: int,
kallsyms_relative_base: int,
kallsyms_addresses: int,
_stext: int,
) -> None:
"""
Manually construct a ``KallsymsFinder`` given all symbol addresses

.. note::

This class should not normally be instantiated manually. See
:func:`drgn.helpers.linux.kallsyms.make_kallsyms_vmlinux_finder`
instead for a way of automatically creating the finder via
information found in the ``VMCOREINFO``.

The finder is capable of searching the compressed table of symbol names
and addresses stored within kernel memory. It requires
``CONFIG_KALLSYMS=y`` and ``CONFIG_KALLSYMS_ALL=y`` in your kernel
configuration -- this is common on desktop and server Linux
distributions. However, the quality of symbol information is not
excellent: the :meth:`Symbol.binding` and :meth:`Symbol.kind` values are
inferred from type code information provided by kallsyms which was
originally generated by ``nm(1)``. Further, the :meth:`Symbol.size` is
computed using the offset of the next symbol after it in memory. This
can create some unusual results.

In order to create a ``KallsymsFinder``, drgn must know the location of
several symbols, which creates a bit of a chicken-and-egg problem.
Thankfully, starting with Linux 6.0, these symbol addresses are included
in the VMCOREINFO note. The required symbols are addresses of variables
in the vmcore:

- ``kallsyms_names``: an array of compressed symbol name data.
- ``kallsyms_token_table``, ``kallsyms_token_index``: tables used in
decompressing symbol names.
- ``kallsyms_num_syms``: the number of kallsyms symbols
- ``_stext``: the start of the kernel text segment. This symbol addresss
is necessary for verifying decoded kallsyms data.

Depending on the way that kallsyms is configured (see
``CONFIG_KALLSYMS_ABSOLUTE_PERCPU`` and
``CONFIG_KALLSYMS_BASE_RELATIVE``), the following symbols are needed. If
the symbol names are not present, they should be given as zero.

- ``kallsyms_offsets``
- ``kallsyms_realtive_base``
- ``kallsyms_addresses``

:param prog: Program to create a finder for
:returns: A callable object suitable to provide to
:meth:`Program.add_symbol_finder()`.
"""
__call__: Callable[[Optional[str], Optional[int], bool], List[Symbol]]
"""Lookup symbol by name, address, or both."""

class SymbolBinding(enum.Enum):
"""
A ``SymbolBinding`` describes the linkage behavior and visibility of a
Expand Down
1 change: 1 addition & 0 deletions docs/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ Symbols
.. drgndoc:: Symbol
.. drgndoc:: SymbolBinding
.. drgndoc:: SymbolKind
.. drgndoc:: KallsymsFinder

Stack Traces
------------
Expand Down
2 changes: 2 additions & 0 deletions drgn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
FaultError,
FindObjectFlags,
IntegerLike,
KallsymsFinder,
Language,
MissingDebugInfoError,
NoDefaultProgramError,
Expand Down Expand Up @@ -105,6 +106,7 @@
"FaultError",
"FindObjectFlags",
"IntegerLike",
"KallsymsFinder",
"Language",
"MissingDebugInfoError",
"NULL",
Expand Down
58 changes: 58 additions & 0 deletions drgn/helpers/linux/kallsyms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env python3
# Copyright (c) 2023 Oracle and/or its affiliates
# SPDX-License-Identifier: LGPL-2.1-or-later
"""
Kallsyms
--------

The kallsyms module contains helpers which allow you to use the built-in
kallsyms symbol table for drgn object lookup. Combined with an alternative type
information source, this can enable debugging Linux kernel core dumps without
the corresponding DWARF debuginfo files.
"""
import re
from typing import Dict

from drgn import KallsymsFinder, Program

__all__ = ("make_kallsyms_vmlinux_finder",)


def _vmcoreinfo_symbols(prog: Program) -> Dict[str, int]:
vmcoreinfo_data = prog["VMCOREINFO"].string_().decode("ascii")
vmcoreinfo_symbols = {}
sym_re = re.compile(r"SYMBOL\(([^)]+)\)=([A-Fa-f0-9]+)")
for line in vmcoreinfo_data.strip().split("\n"):
match = sym_re.fullmatch(line)
if match:
vmcoreinfo_symbols[match.group(1)] = int(match.group(2), 16)
return vmcoreinfo_symbols


def make_kallsyms_vmlinux_finder(prog: Program) -> KallsymsFinder:
"""
Create a vmlinux kallsyms finder, which may be passed to
:meth:`drgn.Program.add_symbol_finder`.

This function automatically finds the necessary information to create a
``KallsymsFinder`` from the program's VMCOREINFO data. It may fail if the
information is not present. Please note that the debugged Linux kernel must
be 6.0 or later to find this information.

:returns: a callable symbol finder object
"""
symbol_reqd = [
"kallsyms_names",
"kallsyms_token_table",
"kallsyms_token_index",
"kallsyms_num_syms",
"kallsyms_offsets",
"kallsyms_relative_base",
"kallsyms_addresses",
"_stext",
]
symbols = _vmcoreinfo_symbols(prog)
args = []
for sym in symbol_reqd:
args.append(symbols.get(sym, 0))
return KallsymsFinder(prog, *args)
3 changes: 3 additions & 0 deletions libdrgn/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \
helpers.h \
io.c \
io.h \
kallsyms.c \
kallsyms.h \
language.c \
language.h \
language_c.c \
Expand Down Expand Up @@ -157,6 +159,7 @@ _drgn_la_SOURCES = python/constants.c \
python/drgnpy.h \
python/error.c \
python/helpers.c \
python/kallsyms_finder.c \
python/language.c \
python/main.c \
python/object.c \
Expand Down
144 changes: 144 additions & 0 deletions libdrgn/debug_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -2024,6 +2024,148 @@ struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo,
goto out;
}

struct elf_symbols_search_arg {
const char *name;
uint64_t address;
enum drgn_find_symbol_flags flags;
struct drgn_error *err;
struct drgn_symbol_result_builder *builder;
};

static bool elf_symbol_match(struct elf_symbols_search_arg *arg, GElf_Addr addr,
const GElf_Sym *sym, const char *name)
{
if ((arg->flags & DRGN_FIND_SYMBOL_NAME) && strcmp(name, arg->name) != 0)
return false;
if ((arg->flags & DRGN_FIND_SYMBOL_ADDR) &&
(arg->address < addr || arg->address >= addr + sym->st_size))
return false;
return true;
}

static bool elf_symbol_store_match(struct elf_symbols_search_arg *arg,
GElf_Sym *elf_sym, GElf_Addr addr,
const char *name)
{
struct drgn_symbol *sym;
if (arg->flags == (DRGN_FIND_SYMBOL_ONE | DRGN_FIND_SYMBOL_NAME)) {
int binding = GELF_ST_BIND(elf_sym->st_info);
/*
* The order of precedence is
* GLOBAL = UNIQUE > WEAK > LOCAL = everything else
*
* If we found a global or unique symbol, return it
* immediately. If we found a weak symbol, then save it,
* which may overwrite a previously found weak or local
* symbol. Otherwise, save the symbol only if we haven't
* found another symbol.
*/
if (binding != STB_GLOBAL
&& binding != STB_GNU_UNIQUE
&& binding != STB_WEAK
&& drgn_symbol_result_builder_count(arg->builder) > 0)
return false;
sym = malloc(sizeof(*sym));
if (!sym) {
arg->err = &drgn_enomem;
return true;
}
drgn_symbol_from_elf(name, addr, elf_sym, sym);
if (!drgn_symbol_result_builder_add(arg->builder, sym)) {
arg->err = &drgn_enomem;
drgn_symbol_destroy(sym);
}

/* Abort on error, or short-circuit if we found a global or
* unique symbol */
return (arg->err || sym->binding == DRGN_SYMBOL_BINDING_GLOBAL
|| sym->binding == DRGN_SYMBOL_BINDING_UNIQUE);
} else {
sym = malloc(sizeof(*sym));
if (!sym) {
arg->err = &drgn_enomem;
return true;
}
drgn_symbol_from_elf(name, addr, elf_sym, sym);
if (!drgn_symbol_result_builder_add(arg->builder, sym)) {
arg->err = &drgn_enomem;
drgn_symbol_destroy(sym);
}
/* Abort on error, or short-circuit for single lookup */
return (arg->err || (arg->flags & DRGN_FIND_SYMBOL_ONE));
}
}

static int elf_symbols_search_cb(Dwfl_Module *dwfl_module, void **userdatap,
const char *module_name, Dwarf_Addr base,
void *cb_arg)
{
struct elf_symbols_search_arg *arg = cb_arg;

int symtab_len = dwfl_module_getsymtab(dwfl_module);
if (symtab_len == -1)
return DWARF_CB_OK;

/* Ignore the zeroth null symbol */
for (int i = 1; i < symtab_len; i++) {
GElf_Sym elf_sym;
GElf_Addr elf_addr;
const char *name = dwfl_module_getsym_info(dwfl_module, i,
&elf_sym, &elf_addr,
NULL, NULL, NULL);
if (!name || !elf_symbol_match(arg, elf_addr, &elf_sym, name))
continue;
if (elf_symbol_store_match(arg, &elf_sym, elf_addr, name))
return DWARF_CB_ABORT;
}
return DWARF_CB_OK;
}

static struct drgn_error *
elf_symbols_search(const char *name, uint64_t addr, enum drgn_find_symbol_flags flags,
void *data, struct drgn_symbol_result_builder *builder)
{
Dwfl_Module *dwfl_module = NULL;
struct drgn_program *prog = data;
struct elf_symbols_search_arg arg = {
.name = name,
.address = addr,
.flags = flags,
.err = NULL,
.builder = builder,
};

if (arg.flags & DRGN_FIND_SYMBOL_ADDR) {
dwfl_module = dwfl_addrmodule(prog->dbinfo.dwfl, arg.address);
if (!dwfl_module)
return NULL;
}

if ((arg.flags & (DRGN_FIND_SYMBOL_ADDR | DRGN_FIND_SYMBOL_ONE))
== (DRGN_FIND_SYMBOL_ADDR | DRGN_FIND_SYMBOL_ONE)) {
GElf_Off offset;
GElf_Sym elf_sym;
const char *name = dwfl_module_addrinfo(
dwfl_module, addr, &offset,
&elf_sym, NULL, NULL, NULL);
if (!name)
return NULL;
struct drgn_symbol *sym = malloc(sizeof(*sym));
if (!sym)
return &drgn_enomem;
drgn_symbol_from_elf(name, addr - offset, &elf_sym, sym);
if (!drgn_symbol_result_builder_add(builder, sym)) {
arg.err = &drgn_enomem;
drgn_symbol_destroy(sym);
}
} else if (dwfl_module) {
elf_symbols_search_cb(dwfl_module, NULL, NULL, 0, &arg);
} else {
dwfl_getmodules(prog->dbinfo.dwfl, elf_symbols_search_cb, &arg, 0);
}
return arg.err;
}

bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo,
const char *name)
{
Expand All @@ -2044,6 +2186,8 @@ void drgn_debug_info_init(struct drgn_debug_info *dbinfo,
drgn_program_add_object_finder_impl(prog, &dbinfo->object_finder,
drgn_debug_info_find_object,
dbinfo);
drgn_program_add_symbol_finder_impl(prog, &dbinfo->symbol_finder,
elf_symbols_search, prog);
drgn_module_table_init(&dbinfo->modules);
c_string_set_init(&dbinfo->module_names);
drgn_dwarf_info_init(dbinfo);
Expand Down
2 changes: 2 additions & 0 deletions libdrgn/debug_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "object_index.h"
#include "orc_info.h"
#include "string_builder.h"
#include "symbol.h"
#include "type.h"
#include "vector.h"

Expand Down Expand Up @@ -137,6 +138,7 @@ struct drgn_debug_info {

struct drgn_type_finder type_finder;
struct drgn_object_finder object_finder;
struct drgn_symbol_finder symbol_finder;

/** DWARF frontend library handle. */
Dwfl *dwfl;
Expand Down
Loading
Loading