From 933c72bb2b810999c2c89a70ef707fc84e8f9ade Mon Sep 17 00:00:00 2001 From: Josh Junon Date: Mon, 20 Jan 2025 22:59:43 +0100 Subject: [PATCH] dbgutil+kernel: add core ID tracker service --- dbgutil/README.md | 3 +- dbgutil/oro_debug_suite/service/__init__.py | 1 + dbgutil/oro_debug_suite/service/autosym.py | 6 + .../service/core_id_tracker.py | 194 ++++++++++++++++++ oro-dbgutil/src/lib.rs | 43 ++++ oro-kernel/src/sync.rs | 10 +- 6 files changed, 253 insertions(+), 4 deletions(-) create mode 100644 dbgutil/oro_debug_suite/service/core_id_tracker.py diff --git a/dbgutil/README.md b/dbgutil/README.md index a6fe8131..835f3640 100644 --- a/dbgutil/README.md +++ b/dbgutil/README.md @@ -67,13 +67,14 @@ invocation, or try running the kernel with a single core. This will reduce the likelihood that a breakpoint race condition occurs, assuming the symptom you're trying to debug is still replicable with fewer cores. -The lock and PFA trackers are disabled by default and can cause overhead in the +The lock, PFA and core ID trackers are disabled by default and can cause overhead in the kernel's execution when enabled. You can enable some or all of them with the following commands in GDB: ``` set oro-pfa on set oro-lock on +set oro-core-id on ``` ## Problems diff --git a/dbgutil/oro_debug_suite/service/__init__.py b/dbgutil/oro_debug_suite/service/__init__.py index d477701b..bc083311 100644 --- a/dbgutil/oro_debug_suite/service/__init__.py +++ b/dbgutil/oro_debug_suite/service/__init__.py @@ -2,3 +2,4 @@ from .autosym import SYMBOLS from .pfa_tracker import PFA_TRACKER from .lock_tracker import LOCK_TRACKER +from .core_id_tracker import CORE_ID_TRACKER diff --git a/dbgutil/oro_debug_suite/service/autosym.py b/dbgutil/oro_debug_suite/service/autosym.py index 66d4c7ed..26373a9c 100644 --- a/dbgutil/oro_debug_suite/service/autosym.py +++ b/dbgutil/oro_debug_suite/service/autosym.py @@ -25,6 +25,10 @@ SYM_LOCK_ACQUIRE = "oro_dbgutil::__oro_dbgutil_lock_acquire" ## All: Indicates that a lock has been released. SYM_LOCK_RELEASE = "oro_dbgutil::__oro_dbgutil_lock_release" +## All: Indicates that a core ID (function) has been set. +SYM_CORE_ID_SET = "oro_dbgutil::__oro_dbgutil_core_id_fn_was_set" +## All: Indicates that the core ID getter function was called. +SYM_CORE_ID_CALL = "oro_dbgutil::__oro_dbgutil_core_id_fn_was_called" TRACKED_SYMBOLS = frozenset( set( @@ -38,6 +42,8 @@ ("f", SYM_PFA_MASS_FREE), ("f", SYM_LOCK_ACQUIRE), ("f", SYM_LOCK_RELEASE), + ("f", SYM_CORE_ID_SET), + ("f", SYM_CORE_ID_CALL), ] ) ) diff --git a/dbgutil/oro_debug_suite/service/core_id_tracker.py b/dbgutil/oro_debug_suite/service/core_id_tracker.py new file mode 100644 index 00000000..7916aee4 --- /dev/null +++ b/dbgutil/oro_debug_suite/service/core_id_tracker.py @@ -0,0 +1,194 @@ +import gdb # type: ignore +from ..log import debug, warn, error, log +from . import SYMBOLS, QEMU +from .autosym import ( + SYM_CORE_ID_SET, + SYM_CORE_ID_CALL, +) +from .backtrace import get_backtrace, warn_backtrace, log_backtrace, error_backtrace + + +class LockTracker(object): + def __init__(self): + # kernel ID => GDB core (thread) ID + self.__oro_to_gdb = dict() + # GDB core (thread) ID => kernel ID + self.__gdb_to_oro = dict() + self.verbose = False + self.__enabled = False + self._set_breakpoint = None + self._call_breakpoint = None + + SYMBOLS.on_loaded(self.attach) + QEMU.on_started(self.clear) + + def clear(self, reattach=True): + self.__oro_to_gdb.clear() + self.__gdb_to_oro.clear() + debug("core_id_tracker: cleared all known core IDs") + if reattach: + self.attach() + + @property + def enabled(self): + return self.__enabled + + @enabled.setter + def enabled(self, value): + self.__enabled = value + self.attach() + + def get_by_id(self, id): + return self.__oro_to_gdb.get(id, None) + + def get_by_cpu(self, cpu): + return self.__gdb_to_oro.get(cpu, None) + + def _track_set(self, core_id, thread_id): + bt = get_backtrace() + + current_gdb = self.__oro_to_gdb.get(core_id, None) + current_oro = self.__gdb_to_oro.get(thread_id, None) + + self.__oro_to_gdb[core_id] = (thread_id, bt) + self.__gdb_to_oro[thread_id] = (core_id, bt) + + log(f"core_id_tracker: set: oro {core_id} ({hex(core_id)}) => gdb {thread_id}") + log_backtrace("core_id_tracker", bt) + + if current_gdb is not None and current_gdb[0] != thread_id: + warn( + f"core_id_tracker: ... above replaces existing known gdb core ID: oro {core_id} => WAS gdb {current_gdb[0]}, set at:" + ) + warn_backtrace("core_id_tracker", current_gdb[1]) + if current_oro is not None and current_oro[0] != core_id: + warn( + f"core_id_tracker: ... above replaces existing known oro core ID: WAS oro {current_oro[0]} => gdb {thread_id}, set at:" + ) + warn_backtrace("core_id_tracker", current_oro[1]) + + def _track_call(self, core_id, thread_id): + bt = get_backtrace() + + current_gdb = self.__oro_to_gdb.get(core_id, None) + current_oro = self.__gdb_to_oro.get(thread_id, None) + + if self.verbose: + cgdb = None if current_gdb is None else current_gdb[0] + coro = None if current_oro is None else current_oro[0] + agree = ( + "AGREE" if cgdb == thread_id and coro == core_id else "!!! DISAGREE !!!" + ) + debug( + f"core_id_tracker: call: oro {core_id} (INTERNAL MAP => {cgdb}) ON gdb {thread_id} (INTERNAL MAP => {coro}) - {agree}" + ) + + if current_gdb is None: + warn( + f"core_id_tracker: call: unknown oro core ID: {core_id}, gdb {thread_id}, call at:" + ) + warn_backtrace("core_id_tracker", bt) + elif current_gdb[0] != thread_id: + error( + f"core_id_tracker: call: mismatched core IDs: oro {core_id} => gdb {current_gdb[0]}, but returned {thread_id}, call at:" + ) + error_backtrace("core_id_tracker", bt) + + if current_oro is None: + warn( + f"core_id_tracker: call: unknown gdb core ID: gdb {thread_id}, oro {core_id}, call at:" + ) + warn_backtrace("core_id_tracker", bt) + elif current_oro[0] != core_id: + error( + f"core_id_tracker: call: mismatched core IDs: gdb {thread_id} => oro {current_oro[0]}, but returned {core_id}, call at:" + ) + error_backtrace("core_id_tracker", bt) + + def attach(self): + has_cleared = False + if self._set_breakpoint: + self._set_breakpoint.delete() + self._set_breakpoint = None + has_cleared = True + if self._call_breakpoint: + self._call_breakpoint.delete() + self._call_breakpoint = None + has_cleared = True + + if has_cleared: + debug("core_id_tracker: detached") + + if self.enabled: + set_sym = SYMBOLS.get_if_tracked(SYM_CORE_ID_SET) + call_sym = SYMBOLS.get_if_tracked(SYM_CORE_ID_CALL) + if set_sym and call_sym: + self._set_breakpoint = CoreIdTrackerSetBreakpoint(set_sym) + self._call_breakpoint = CoreIdTrackerCallBreakpoint(call_sym) + debug("core_id_tracker: attached") + else: + debug("core_id_tracker: not attached, missing symbols") + + +class CoreIdTrackerSetBreakpoint(gdb.Breakpoint): + def __init__(self, at): + super(CoreIdTrackerSetBreakpoint, self).__init__( + at, internal=True, qualified=True + ) + + def stop(self): + core_id = int(gdb.parse_and_eval("core_id_do_not_change_this_parameter")) + thread_id = gdb.selected_thread().num + CORE_ID_TRACKER._track_set(core_id, thread_id) + return False # don't stop + + +class CoreIdTrackerCallBreakpoint(gdb.Breakpoint): + def __init__(self, at): + super(CoreIdTrackerCallBreakpoint, self).__init__( + at, internal=True, qualified=True + ) + + def stop(self): + core_id = int(gdb.parse_and_eval("core_id_do_not_change_this_parameter")) + thread_id = gdb.selected_thread().num + CORE_ID_TRACKER._track_call(core_id, thread_id) + return False # don't stop + + +class CoreIdEnableParam(gdb.Parameter): + set_doc = "Enables/disables the Oro kernel core ID tracker." + show_doc = "Shows the current state of the Oro kernel core ID tracker." + + def __init__(self): + super(CoreIdEnableParam, self).__init__( + "oro-core-id", gdb.COMMAND_DATA, gdb.PARAM_BOOLEAN + ) + self.value = CORE_ID_TRACKER.enabled + + def get_set_string(self): + CORE_ID_TRACKER.enabled = self.value + return "" + + +class CoreIdVerboseParam(gdb.Parameter): + set_doc = "Enables/disables verbose output for the Oro kernel core ID tracker." + show_doc = ( + "Shows the current state of verbose output for the Oro kernel core ID tracker." + ) + + def __init__(self): + super(CoreIdVerboseParam, self).__init__( + "oro-core-id-verbose", gdb.COMMAND_DATA, gdb.PARAM_BOOLEAN + ) + self.value = CORE_ID_TRACKER.verbose + + def get_set_string(self): + CORE_ID_TRACKER.verbose = self.value + return "" + + +CORE_ID_TRACKER = LockTracker() + +CoreIdEnableParam() +CoreIdVerboseParam() diff --git a/oro-dbgutil/src/lib.rs b/oro-dbgutil/src/lib.rs index 146b7d64..0e18d677 100644 --- a/oro-dbgutil/src/lib.rs +++ b/oro-dbgutil/src/lib.rs @@ -315,3 +315,46 @@ pub extern "C" fn __oro_dbgutil_lock_release_writer( ); } } + +/// Tells the core ID tracker that a core ID function was set. The tracker will +/// then track the ID from this point forward. +#[no_mangle] +#[cfg_attr( + any(debug_assertions, feature = "force-hooks"), + link_section = ".text.force_keep" +)] +#[cfg_attr(not(any(debug_assertions, feature = "force-hooks")), inline(always))] +#[cfg_attr(any(debug_assertions, feature = "force-hooks"), inline(never))] +pub extern "C" fn __oro_dbgutil_core_id_fn_was_set(core_id_do_not_change_this_parameter: u32) { + #[cfg(any(debug_assertions, feature = "force-hooks"))] + unsafe { + asm!( + "/*{}*/", + "nop", + in(reg) u64::from(core_id_do_not_change_this_parameter), + options(nostack, nomem, preserves_flags) + ); + } +} + +/// Tells the core ID tracker that a core ID was retrieved. The tracker will +/// validate that the ID returned is the same as the one at time of +/// [`__oro_dbgutil_core_id_fn_was_set`]. +#[no_mangle] +#[cfg_attr( + any(debug_assertions, feature = "force-hooks"), + link_section = ".text.force_keep" +)] +#[cfg_attr(not(any(debug_assertions, feature = "force-hooks")), inline(always))] +#[cfg_attr(any(debug_assertions, feature = "force-hooks"), inline(never))] +pub extern "C" fn __oro_dbgutil_core_id_fn_was_called(core_id_do_not_change_this_parameter: u32) { + #[cfg(any(debug_assertions, feature = "force-hooks"))] + unsafe { + asm!( + "/*{}*/", + "nop", + in(reg) u64::from(core_id_do_not_change_this_parameter), + options(nostack, nomem, preserves_flags) + ); + } +} diff --git a/oro-kernel/src/sync.rs b/oro-kernel/src/sync.rs index af8acec7..0db748c6 100644 --- a/oro-kernel/src/sync.rs +++ b/oro-kernel/src/sync.rs @@ -31,7 +31,9 @@ pub(crate) unsafe extern "C" fn oro_sync_current_core_id() -> u32 { ); } - KERNEL_ID_FN.assume_init()() + let id = KERNEL_ID_FN.assume_init()(); + ::oro_dbgutil::__oro_dbgutil_core_id_fn_was_called(id); + id } /// The generic kernel ID fetcher, based on the [`Arch`] type. @@ -56,6 +58,7 @@ pub unsafe fn initialize_kernel_id_fn() { // SAFETY(qix-): We have offloaded safety considerations to the caller here. #[expect(static_mut_refs)] { + ::oro_dbgutil::__oro_dbgutil_core_id_fn_was_set(get_arch_kernel_id::()); KERNEL_ID_FN.write(get_arch_kernel_id::); } } @@ -72,12 +75,13 @@ pub unsafe fn initialize_kernel_id_fn() { pub unsafe fn install_dummy_kernel_id_fn() { #[cfg(debug_assertions)] { - HAS_SET_KERNEL_ID_FN.store(true, core::sync::atomic::Ordering::Relaxed); + HAS_SET_KERNEL_ID_FN.store(false, core::sync::atomic::Ordering::Relaxed); } // SAFETY(qix-): We have offloaded safety considerations to the caller here. #[expect(static_mut_refs)] { - KERNEL_ID_FN.write(|| 0); + ::oro_dbgutil::__oro_dbgutil_core_id_fn_was_set(0xDEAD_DEAD); + KERNEL_ID_FN.write(|| 0xDEAD_DEAD); } }