From 46dca8afaac1287c2c2b83d39f1e57a348e1a3a8 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 21 Jan 2025 19:22:54 -0800 Subject: [PATCH] Wine: Implements support for profile stats This is a little trickier, we actually open the `/dev/shm/fex--stats` file directly using Windows APIs that way Mangohud (which is going to be on the Linux side, or potentially even embedded in to Gamescope) can safely pick up the stats. A little quirky plus doesn't support expanding its size since WINE doesn't support NtExtendSection, but that's fine. --- Source/Windows/ARM64EC/Module.cpp | 20 ++++++++- Source/Windows/Common/CMakeLists.txt | 2 +- Source/Windows/Common/Profiler.cpp | 65 ++++++++++++++++++++++++++++ Source/Windows/Common/Profiler.h | 28 ++++++++++++ Source/Windows/WOW64/Module.cpp | 21 ++++++++- 5 files changed, 131 insertions(+), 5 deletions(-) create mode 100644 Source/Windows/Common/Profiler.cpp create mode 100644 Source/Windows/Common/Profiler.h diff --git a/Source/Windows/ARM64EC/Module.cpp b/Source/Windows/ARM64EC/Module.cpp index 52a2e55ea6..5d09600899 100644 --- a/Source/Windows/ARM64EC/Module.cpp +++ b/Source/Windows/ARM64EC/Module.cpp @@ -37,6 +37,7 @@ desc: Implements the ARM64EC BT module API using FEXCore #include "Common/CRT/CRT.h" #include "DummyHandlers.h" #include "BTInterface.h" +#include "Windows/Common/Profiler.h" #include #include @@ -122,6 +123,7 @@ namespace { fextl::unique_ptr CTX; fextl::unique_ptr SignalDelegator; fextl::unique_ptr SyscallHandler; +fextl::unique_ptr StatAllocHandler; std::optional InvalidationTracker; std::optional CPUFeatures; std::optional OvercommitTracker; @@ -569,10 +571,17 @@ NTSTATUS ProcessInit() { const uintptr_t KiUserExceptionDispatcherFFS = reinterpret_cast(GetProcAddress(NtDll, "KiUserExceptionDispatcher")); Exception::KiUserExceptionDispatcher = NtDllRedirectionLUT[KiUserExceptionDispatcherFFS - NtDllBase] + NtDllBase; + FEX_CONFIG_OPT(ProfileStats, PROFILESTATS); + + if (IsWine && ProfileStats()) { + StatAllocHandler = fextl::make_unique(FEXCore::Profiler::AppType::WIN_ARM64EC); + } return STATUS_SUCCESS; } -void ProcessTerm(HANDLE Handle, BOOL After, NTSTATUS Status) {} +void ProcessTerm(HANDLE Handle, BOOL After, NTSTATUS Status) { + StatAllocHandler.reset(); +} class ScopedCallbackDisable { private: @@ -808,7 +817,11 @@ NTSTATUS ThreadInit() { { std::scoped_lock Lock(ThreadCreationMutex); - Threads.emplace(GetCurrentThreadId(), Thread); + auto ThreadTID = GetCurrentThreadId(); + Threads.emplace(ThreadTID, Thread); + if (StatAllocHandler) { + Thread->ThreadStats = StatAllocHandler->AllocateSlot(ThreadTID); + } } CPUArea.ThreadState() = Thread; @@ -833,6 +846,9 @@ NTSTATUS ThreadTerm(HANDLE Thread, LONG ExitCode) { { std::scoped_lock Lock(ThreadCreationMutex); Threads.erase(ThreadTID); + if (StatAllocHandler) { + StatAllocHandler->DeallocateSlot(OldThreadState->ThreadStats); + } } CTX->DestroyThread(OldThreadState); diff --git a/Source/Windows/Common/CMakeLists.txt b/Source/Windows/Common/CMakeLists.txt index 8d92d8f18a..70a0e46c84 100644 --- a/Source/Windows/Common/CMakeLists.txt +++ b/Source/Windows/Common/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(CommonWindows STATIC CPUFeatures.cpp InvalidationTracker.cpp Logging.cpp LoadConfig.S) +add_library(CommonWindows STATIC CPUFeatures.cpp Profiler.cpp InvalidationTracker.cpp Logging.cpp LoadConfig.S) add_subdirectory(CRT) add_subdirectory(WinAPI) target_link_libraries(CommonWindows FEXCore_Base JemallocLibs) diff --git a/Source/Windows/Common/Profiler.cpp b/Source/Windows/Common/Profiler.cpp new file mode 100644 index 0000000000..2dedde586e --- /dev/null +++ b/Source/Windows/Common/Profiler.cpp @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: MIT +#include "Windows/Common/Profiler.h" + +#include +#include + +#include +#include +#include +#include +#include + +namespace FEX::Windows { +__attribute__((naked)) uint64_t linux_getpid() { + asm volatile(R"( + mov x8, 172; + svc #0; + ret; + )" :: + : "r0", "r8"); +} + +uint64_t StatAlloc::AllocateMoreSlots(uint64_t NewSize) { + LogMan::Msg::DFmt("Ran out of slots. Can't allocate more"); + return CurrentSize; +} + +StatAlloc::StatAlloc(FEXCore::Profiler::AppType AppType) { + CurrentSize = MAX_STATS_SIZE; + + auto handle = CreateFile(fextl::fmt::format("/dev/shm/fex-{}-stats", linux_getpid()).c_str(), GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr); + + // Create the section mapping for the file handle for the full size. + HANDLE SectionMapping; + LARGE_INTEGER SectionSize {{MAX_STATS_SIZE}}; + auto Result = NtCreateSection(&SectionMapping, SECTION_EXTEND_SIZE | SECTION_MAP_READ | SECTION_MAP_WRITE, nullptr, &SectionSize, + PAGE_READWRITE, SEC_COMMIT, handle); + if (Result != 0) { + CloseHandle(handle); + return; + } + + // Section mapping is used from now on. + CloseHandle(handle); + + // Now actually map the view of the section. + Base = 0; + size_t FullSize = MAX_STATS_SIZE; + Result = NtMapViewOfSection(SectionMapping, NtCurrentProcess(), &Base, 0, 0, nullptr, &FullSize, ViewUnmap, MEM_RESERVE | MEM_TOP_DOWN, + PAGE_READWRITE); + if (Result != 0) { + CloseHandle(SectionMapping); + return; + } + + // Once WINE supports NtExtendSection and SECTION_EXTEND_SIZE correctly then we can map/commit a single page, map the full MAX_STATS_SIZE + // view as reserved, and extend the view using NtExtendSection. + SaveHeader(AppType); +} +StatAlloc::~StatAlloc() { + DeleteFile(fextl::fmt::format("/dev/shm/fex-{}-stats", linux_getpid()).c_str()); +} + +} // namespace FEX::Windows diff --git a/Source/Windows/Common/Profiler.h b/Source/Windows/Common/Profiler.h new file mode 100644 index 0000000000..b12ad9631a --- /dev/null +++ b/Source/Windows/Common/Profiler.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: MIT +#pragma once + +#include "Common/Profiler.h" + +namespace FEX::Windows { +class StatAlloc final : public FEX::Profiler::StatAllocBase { +public: + StatAlloc(FEXCore::Profiler::AppType AppType); + virtual ~StatAlloc(); + + FEXCore::Profiler::ThreadStats* AllocateSlot(uint32_t TID) { + return AllocateBaseSlot(TID); + } + + void DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) { + if (!AllocatedSlot) { + return; + } + + DeallocateBaseSlot(AllocatedSlot); + } + +private: + uint64_t AllocateMoreSlots(uint64_t NewSize) override; +}; + +} // namespace FEX::Windows diff --git a/Source/Windows/WOW64/Module.cpp b/Source/Windows/WOW64/Module.cpp index a971107539..bb55985c40 100644 --- a/Source/Windows/WOW64/Module.cpp +++ b/Source/Windows/WOW64/Module.cpp @@ -38,6 +38,7 @@ desc: Implements the WOW64 BT module API using FEXCore #include "Common/CRT/CRT.h" #include "DummyHandlers.h" #include "BTInterface.h" +#include "Windows/Common/Profiler.h" #include #include @@ -105,6 +106,7 @@ namespace BridgeInstrs { fextl::unique_ptr CTX; fextl::unique_ptr SignalDelegator; fextl::unique_ptr SyscallHandler; +fextl::unique_ptr StatAllocHandler; std::optional InvalidationTracker; std::optional CPUFeatures; @@ -499,9 +501,17 @@ void BTCpuProcessInit() { // wow64.dll will only initialise the cross-process queue if this is set GetTLS().Wow64Info().CpuFlags = WOW64_CPUFLAGS_SOFTWARE; + + FEX_CONFIG_OPT(ProfileStats, PROFILESTATS); + + if (IsWine && ProfileStats()) { + StatAllocHandler = fextl::make_unique(FEXCore::Profiler::AppType::WIN_WOW64); + } } -void BTCpuProcessTerm(HANDLE Handle, BOOL After, ULONG Status) {} +void BTCpuProcessTerm(HANDLE Handle, BOOL After, ULONG Status) { + StatAllocHandler.reset(); +} void BTCpuThreadInit() { FEX::Windows::InitCRTThread(); @@ -510,7 +520,11 @@ void BTCpuThreadInit() { GetTLS().ControlWord().fetch_or(ControlBits::WOW_CPU_AREA_DIRTY, std::memory_order::relaxed); std::scoped_lock Lock(ThreadCreationMutex); - Threads.emplace(GetCurrentThreadId(), Thread); + auto ThreadTID = GetCurrentThreadId(); + Threads.emplace(ThreadTID, Thread); + if (StatAllocHandler) { + Thread->ThreadStats = StatAllocHandler->AllocateSlot(ThreadTID); + } } void BTCpuThreadTerm(HANDLE Thread, LONG ExitCode) { @@ -530,6 +544,9 @@ void BTCpuThreadTerm(HANDLE Thread, LONG ExitCode) { { std::scoped_lock Lock(ThreadCreationMutex); Threads.erase(ThreadTID); + if (StatAllocHandler) { + StatAllocHandler->DeallocateSlot(OldThreadState->ThreadStats); + } } CTX->DestroyThread(OldThreadState);