Skip to content

Commit

Permalink
dbgutil+kernel: add core ID tracker service
Browse files Browse the repository at this point in the history
  • Loading branch information
Qix- committed Jan 20, 2025
1 parent d695296 commit 933c72b
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 4 deletions.
3 changes: 2 additions & 1 deletion dbgutil/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,14 @@ invocation, or try running the kernel with a single core. This will reduce the
likelihood that a breakpoint race condition occurs, assuming the symptom you're
trying to debug is still replicable with fewer cores.
The lock and PFA trackers are disabled by default and can cause overhead in the
The lock, PFA and core ID trackers are disabled by default and can cause overhead in the
kernel's execution when enabled. You can enable some or all of them with the
following commands in GDB:
```
set oro-pfa on
set oro-lock on
set oro-core-id on
```
## Problems
Expand Down
1 change: 1 addition & 0 deletions dbgutil/oro_debug_suite/service/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .autosym import SYMBOLS
from .pfa_tracker import PFA_TRACKER
from .lock_tracker import LOCK_TRACKER
from .core_id_tracker import CORE_ID_TRACKER
6 changes: 6 additions & 0 deletions dbgutil/oro_debug_suite/service/autosym.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
SYM_LOCK_ACQUIRE = "oro_dbgutil::__oro_dbgutil_lock_acquire"
## All: Indicates that a lock has been released.
SYM_LOCK_RELEASE = "oro_dbgutil::__oro_dbgutil_lock_release"
## All: Indicates that a core ID (function) has been set.
SYM_CORE_ID_SET = "oro_dbgutil::__oro_dbgutil_core_id_fn_was_set"
## All: Indicates that the core ID getter function was called.
SYM_CORE_ID_CALL = "oro_dbgutil::__oro_dbgutil_core_id_fn_was_called"

TRACKED_SYMBOLS = frozenset(
set(
Expand All @@ -38,6 +42,8 @@
("f", SYM_PFA_MASS_FREE),
("f", SYM_LOCK_ACQUIRE),
("f", SYM_LOCK_RELEASE),
("f", SYM_CORE_ID_SET),
("f", SYM_CORE_ID_CALL),
]
)
)
Expand Down
194 changes: 194 additions & 0 deletions dbgutil/oro_debug_suite/service/core_id_tracker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
import gdb # type: ignore
from ..log import debug, warn, error, log
from . import SYMBOLS, QEMU
from .autosym import (
SYM_CORE_ID_SET,
SYM_CORE_ID_CALL,
)
from .backtrace import get_backtrace, warn_backtrace, log_backtrace, error_backtrace


class LockTracker(object):
def __init__(self):
# kernel ID => GDB core (thread) ID
self.__oro_to_gdb = dict()
# GDB core (thread) ID => kernel ID
self.__gdb_to_oro = dict()
self.verbose = False
self.__enabled = False
self._set_breakpoint = None
self._call_breakpoint = None

SYMBOLS.on_loaded(self.attach)
QEMU.on_started(self.clear)

def clear(self, reattach=True):
self.__oro_to_gdb.clear()
self.__gdb_to_oro.clear()
debug("core_id_tracker: cleared all known core IDs")
if reattach:
self.attach()

@property
def enabled(self):
return self.__enabled

@enabled.setter
def enabled(self, value):
self.__enabled = value
self.attach()

def get_by_id(self, id):
return self.__oro_to_gdb.get(id, None)

def get_by_cpu(self, cpu):
return self.__gdb_to_oro.get(cpu, None)

def _track_set(self, core_id, thread_id):
bt = get_backtrace()

current_gdb = self.__oro_to_gdb.get(core_id, None)
current_oro = self.__gdb_to_oro.get(thread_id, None)

self.__oro_to_gdb[core_id] = (thread_id, bt)
self.__gdb_to_oro[thread_id] = (core_id, bt)

log(f"core_id_tracker: set: oro {core_id} ({hex(core_id)}) => gdb {thread_id}")
log_backtrace("core_id_tracker", bt)

if current_gdb is not None and current_gdb[0] != thread_id:
warn(
f"core_id_tracker: ... above replaces existing known gdb core ID: oro {core_id} => WAS gdb {current_gdb[0]}, set at:"
)
warn_backtrace("core_id_tracker", current_gdb[1])
if current_oro is not None and current_oro[0] != core_id:
warn(
f"core_id_tracker: ... above replaces existing known oro core ID: WAS oro {current_oro[0]} => gdb {thread_id}, set at:"
)
warn_backtrace("core_id_tracker", current_oro[1])

def _track_call(self, core_id, thread_id):
bt = get_backtrace()

current_gdb = self.__oro_to_gdb.get(core_id, None)
current_oro = self.__gdb_to_oro.get(thread_id, None)

if self.verbose:
cgdb = None if current_gdb is None else current_gdb[0]
coro = None if current_oro is None else current_oro[0]
agree = (
"AGREE" if cgdb == thread_id and coro == core_id else "!!! DISAGREE !!!"
)
debug(
f"core_id_tracker: call: oro {core_id} (INTERNAL MAP => {cgdb}) ON gdb {thread_id} (INTERNAL MAP => {coro}) - {agree}"
)

if current_gdb is None:
warn(
f"core_id_tracker: call: unknown oro core ID: {core_id}, gdb {thread_id}, call at:"
)
warn_backtrace("core_id_tracker", bt)
elif current_gdb[0] != thread_id:
error(
f"core_id_tracker: call: mismatched core IDs: oro {core_id} => gdb {current_gdb[0]}, but returned {thread_id}, call at:"
)
error_backtrace("core_id_tracker", bt)

if current_oro is None:
warn(
f"core_id_tracker: call: unknown gdb core ID: gdb {thread_id}, oro {core_id}, call at:"
)
warn_backtrace("core_id_tracker", bt)
elif current_oro[0] != core_id:
error(
f"core_id_tracker: call: mismatched core IDs: gdb {thread_id} => oro {current_oro[0]}, but returned {core_id}, call at:"
)
error_backtrace("core_id_tracker", bt)

def attach(self):
has_cleared = False
if self._set_breakpoint:
self._set_breakpoint.delete()
self._set_breakpoint = None
has_cleared = True
if self._call_breakpoint:
self._call_breakpoint.delete()
self._call_breakpoint = None
has_cleared = True

if has_cleared:
debug("core_id_tracker: detached")

if self.enabled:
set_sym = SYMBOLS.get_if_tracked(SYM_CORE_ID_SET)
call_sym = SYMBOLS.get_if_tracked(SYM_CORE_ID_CALL)
if set_sym and call_sym:
self._set_breakpoint = CoreIdTrackerSetBreakpoint(set_sym)
self._call_breakpoint = CoreIdTrackerCallBreakpoint(call_sym)
debug("core_id_tracker: attached")
else:
debug("core_id_tracker: not attached, missing symbols")


class CoreIdTrackerSetBreakpoint(gdb.Breakpoint):
def __init__(self, at):
super(CoreIdTrackerSetBreakpoint, self).__init__(
at, internal=True, qualified=True
)

def stop(self):
core_id = int(gdb.parse_and_eval("core_id_do_not_change_this_parameter"))
thread_id = gdb.selected_thread().num
CORE_ID_TRACKER._track_set(core_id, thread_id)
return False # don't stop


class CoreIdTrackerCallBreakpoint(gdb.Breakpoint):
def __init__(self, at):
super(CoreIdTrackerCallBreakpoint, self).__init__(
at, internal=True, qualified=True
)

def stop(self):
core_id = int(gdb.parse_and_eval("core_id_do_not_change_this_parameter"))
thread_id = gdb.selected_thread().num
CORE_ID_TRACKER._track_call(core_id, thread_id)
return False # don't stop


class CoreIdEnableParam(gdb.Parameter):
set_doc = "Enables/disables the Oro kernel core ID tracker."
show_doc = "Shows the current state of the Oro kernel core ID tracker."

def __init__(self):
super(CoreIdEnableParam, self).__init__(
"oro-core-id", gdb.COMMAND_DATA, gdb.PARAM_BOOLEAN
)
self.value = CORE_ID_TRACKER.enabled

def get_set_string(self):
CORE_ID_TRACKER.enabled = self.value
return ""


class CoreIdVerboseParam(gdb.Parameter):
set_doc = "Enables/disables verbose output for the Oro kernel core ID tracker."
show_doc = (
"Shows the current state of verbose output for the Oro kernel core ID tracker."
)

def __init__(self):
super(CoreIdVerboseParam, self).__init__(
"oro-core-id-verbose", gdb.COMMAND_DATA, gdb.PARAM_BOOLEAN
)
self.value = CORE_ID_TRACKER.verbose

def get_set_string(self):
CORE_ID_TRACKER.verbose = self.value
return ""


CORE_ID_TRACKER = LockTracker()

CoreIdEnableParam()
CoreIdVerboseParam()
43 changes: 43 additions & 0 deletions oro-dbgutil/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,3 +315,46 @@ pub extern "C" fn __oro_dbgutil_lock_release_writer(
);
}
}

/// Tells the core ID tracker that a core ID function was set. The tracker will
/// then track the ID from this point forward.
#[no_mangle]
#[cfg_attr(
any(debug_assertions, feature = "force-hooks"),
link_section = ".text.force_keep"
)]
#[cfg_attr(not(any(debug_assertions, feature = "force-hooks")), inline(always))]
#[cfg_attr(any(debug_assertions, feature = "force-hooks"), inline(never))]
pub extern "C" fn __oro_dbgutil_core_id_fn_was_set(core_id_do_not_change_this_parameter: u32) {
#[cfg(any(debug_assertions, feature = "force-hooks"))]
unsafe {
asm!(
"/*{}*/",
"nop",
in(reg) u64::from(core_id_do_not_change_this_parameter),
options(nostack, nomem, preserves_flags)
);
}
}

/// Tells the core ID tracker that a core ID was retrieved. The tracker will
/// validate that the ID returned is the same as the one at time of
/// [`__oro_dbgutil_core_id_fn_was_set`].
#[no_mangle]
#[cfg_attr(
any(debug_assertions, feature = "force-hooks"),
link_section = ".text.force_keep"
)]
#[cfg_attr(not(any(debug_assertions, feature = "force-hooks")), inline(always))]
#[cfg_attr(any(debug_assertions, feature = "force-hooks"), inline(never))]
pub extern "C" fn __oro_dbgutil_core_id_fn_was_called(core_id_do_not_change_this_parameter: u32) {
#[cfg(any(debug_assertions, feature = "force-hooks"))]
unsafe {
asm!(
"/*{}*/",
"nop",
in(reg) u64::from(core_id_do_not_change_this_parameter),
options(nostack, nomem, preserves_flags)
);
}
}
10 changes: 7 additions & 3 deletions oro-kernel/src/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ pub(crate) unsafe extern "C" fn oro_sync_current_core_id() -> u32 {
);
}

KERNEL_ID_FN.assume_init()()
let id = KERNEL_ID_FN.assume_init()();
::oro_dbgutil::__oro_dbgutil_core_id_fn_was_called(id);
id
}

/// The generic kernel ID fetcher, based on the [`Arch`] type.
Expand All @@ -56,6 +58,7 @@ pub unsafe fn initialize_kernel_id_fn<A: Arch>() {
// SAFETY(qix-): We have offloaded safety considerations to the caller here.
#[expect(static_mut_refs)]
{
::oro_dbgutil::__oro_dbgutil_core_id_fn_was_set(get_arch_kernel_id::<A>());
KERNEL_ID_FN.write(get_arch_kernel_id::<A>);
}
}
Expand All @@ -72,12 +75,13 @@ pub unsafe fn initialize_kernel_id_fn<A: Arch>() {
pub unsafe fn install_dummy_kernel_id_fn() {
#[cfg(debug_assertions)]
{
HAS_SET_KERNEL_ID_FN.store(true, core::sync::atomic::Ordering::Relaxed);
HAS_SET_KERNEL_ID_FN.store(false, core::sync::atomic::Ordering::Relaxed);
}

// SAFETY(qix-): We have offloaded safety considerations to the caller here.
#[expect(static_mut_refs)]
{
KERNEL_ID_FN.write(|| 0);
::oro_dbgutil::__oro_dbgutil_core_id_fn_was_set(0xDEAD_DEAD);
KERNEL_ID_FN.write(|| 0xDEAD_DEAD);
}
}

0 comments on commit 933c72b

Please sign in to comment.