Skip to content

Commit

Permalink
Review
Browse files Browse the repository at this point in the history
  • Loading branch information
Sonicadvance1 committed Jan 22, 2025
1 parent 46dca8a commit 2e5457b
Show file tree
Hide file tree
Showing 8 changed files with 35 additions and 32 deletions.
1 change: 1 addition & 0 deletions FEXCore/include/FEXCore/Debug/InternalThreadState.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ struct InternalThreadState : public FEXCore::Allocator::FEXAllocOperators {

std::shared_mutex ObjectCacheRefCounter {};

// This pointer is owned by the frontend.
FEXCore::Profiler::ThreadStats* ThreadStats {};

///< Data pointer for exclusive use by the frontend
Expand Down
2 changes: 1 addition & 1 deletion FEXCore/include/FEXCore/Utils/Profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ struct ThreadStats {

#ifdef _M_ARM_64
/**
* @brief Get the raw cycle counter which is synchronizing.
* @brief Get the raw cycle counter with synchronizing isb.
*
* `CNTVCTSS_EL0` also does the same thing, but requires the FEAT_ECV feature.
*/
Expand Down
8 changes: 4 additions & 4 deletions Source/Common/Profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ void StatAllocBase::SaveHeader(FEXCore::Profiler::AppType AppType) {
Head->Size.store(CurrentSize, std::memory_order_relaxed);
Head->Version = FEXCore::Profiler::STATS_VERSION;

constexpr std::array<char, std::char_traits<char>::length(GIT_DESCRIBE_STRING) + 1> GitString = {GIT_DESCRIBE_STRING};
std::string_view GitString = GIT_DESCRIBE_STRING;
strncpy(Head->fex_version, GitString.data(), std::min(GitString.size(), sizeof(Head->fex_version)));
Head->app_type = AppType;

Expand All @@ -26,7 +26,7 @@ void StatAllocBase::SaveHeader(FEXCore::Profiler::AppType AppType) {
bool StatAllocBase::AllocateMoreSlots() {
const auto OriginalSlotCount = TotalSlotsFromSize();

uint64_t NewSize = AllocateMoreSlots(CurrentSize * 2);
uint64_t NewSize = FrontendAllocateSlots(CurrentSize * 2);

if (NewSize == CurrentSize) {
return false;
Expand All @@ -39,7 +39,7 @@ bool StatAllocBase::AllocateMoreSlots() {
return true;
}

FEXCore::Profiler::ThreadStats* StatAllocBase::AllocateBaseSlot(uint32_t TID) {
FEXCore::Profiler::ThreadStats* StatAllocBase::AllocateSlot(uint32_t TID) {
if (!RemainingSlots) {
if (!AllocateMoreSlots()) {
return nullptr;
Expand Down Expand Up @@ -76,7 +76,7 @@ FEXCore::Profiler::ThreadStats* StatAllocBase::AllocateBaseSlot(uint32_t TID) {
return AllocatedSlot;
}

void StatAllocBase::DeallocateBaseSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) {
void StatAllocBase::DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) {
if (!AllocatedSlot) {
return;
}
Expand Down
14 changes: 8 additions & 6 deletions Source/Common/Profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,26 @@ static inline void memory_barrier() {
#else
static inline void memory_barrier() {
// Intentionally empty.
// x86 is strongly memory ordered with regular loadstores. No need for barrier.
}
#endif

namespace FEX::Profiler {
class StatAllocBase {
public:
virtual ~StatAllocBase() = default;

protected:
FEXCore::Profiler::ThreadStats* AllocateBaseSlot(uint32_t TID);
void DeallocateBaseSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot);
FEXCore::Profiler::ThreadStats* AllocateSlot(uint32_t TID);
void DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot);

uint32_t OffsetFromStat(FEXCore::Profiler::ThreadStats* Stat) const {
return reinterpret_cast<uint64_t>(Stat) - reinterpret_cast<uint64_t>(Base);
}
size_t TotalSlotsFromSize() const {
return (CurrentSize - sizeof(FEXCore::Profiler::ThreadStatsHeader)) / sizeof(FEXCore::Profiler::ThreadStats) - 1;
}
size_t TotalSlotsFromSize(uint64_t Size) const {
return (Size - sizeof(FEXCore::Profiler::ThreadStatsHeader)) / sizeof(FEXCore::Profiler::ThreadStats) - 1;
}

size_t SlotIndexFromOffset(uint32_t Offset) {
return (Offset - sizeof(FEXCore::Profiler::ThreadStatsHeader)) / sizeof(FEXCore::Profiler::ThreadStats);
}
Expand All @@ -60,7 +62,7 @@ class StatAllocBase {
constexpr static size_t MAX_STATS_SIZE = 4 * 1024 * 1024;

private:
virtual uint64_t AllocateMoreSlots(uint64_t NewSize) = 0;
virtual uint64_t FrontendAllocateSlots(uint64_t NewSize) = 0;
bool AllocateMoreSlots();
};

Expand Down
32 changes: 16 additions & 16 deletions Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,15 @@ void ThreadManager::StatAlloc::Initialize() {
LogMan::Msg::EFmt("[StatAlloc] ftruncate failed");
goto err;
}
for (size_t i = 4096; i <= (128 * 1024 * 1024); i *= 2) {
LogMan::Msg::DFmt("{}: {} slots", i, TotalSlotsFromSize(i));
}

// 128MB ought to be enough for anyone.
// Reserve a region of MAX_STATS_SIZE so we can grow the allocation buffer.
// Number of thread slots when ThreadStatsHeader == 64bytes and ThreadStats == 40bytes:
// 1 page: 99 slots
// 1 MB: 26211 slots
// 128 MB: 3355440 slots
Base = ::mmap(nullptr, MAX_STATS_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
if (Base == MAP_FAILED) {
LogMan::Msg::EFmt("[StatAlloc] mmap base failed");
Expand All @@ -61,9 +68,10 @@ void ThreadManager::StatAlloc::Initialize() {
close(fd);
}

uint64_t ThreadManager::StatAlloc::AllocateMoreSlots(uint64_t NewSize) {
uint64_t ThreadManager::StatAlloc::FrontendAllocateSlots(uint64_t NewSize) {
if (CurrentSize == MAX_STATS_SIZE) {
// Nope.
// Allocator has reached maximum slots. We can't allocate anymore.
// New threads won't get stats.
return CurrentSize;
}
NewSize = std::max(MAX_STATS_SIZE, NewSize);
Expand All @@ -86,14 +94,6 @@ uint64_t ThreadManager::StatAlloc::AllocateMoreSlots(uint64_t NewSize) {
LogMan::Msg::EFmt("[StatAlloc] allocate more mmap shm failed");
goto err;
}

// TODO: Just a sanity check.
const char* SharedTest = (const char*)Base;
for (size_t i = CurrentSize; i < NewSize; ++i) {
if (SharedTest[i] != 0) {
LogMan::Msg::EFmt("truncate and map shared resulted in not zero'd memory!");
}
}
}

err:
Expand All @@ -103,7 +103,7 @@ uint64_t ThreadManager::StatAlloc::AllocateMoreSlots(uint64_t NewSize) {

FEXCore::Profiler::ThreadStats* ThreadManager::StatAlloc::AllocateSlot(uint32_t TID) {
std::scoped_lock lk(StatMutex);
return AllocateBaseSlot(TID);
return StatAllocBase::AllocateSlot(TID);
}

void ThreadManager::StatAlloc::DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) {
Expand All @@ -112,7 +112,7 @@ void ThreadManager::StatAlloc::DeallocateSlot(FEXCore::Profiler::ThreadStats* Al
}

std::scoped_lock lk(StatMutex);
DeallocateBaseSlot(AllocatedSlot);
StatAllocBase::DeallocateSlot(AllocatedSlot);
}

void ThreadManager::StatAlloc::CleanupForExit() {
Expand All @@ -138,8 +138,8 @@ void ThreadManager::StatAlloc::UnlockAfterFork(FEXCore::Core::InternalThreadStat

StatMutex.StealAndDropActiveLocks();

// shm_memory tied to this process is now not owned by this process.
// Replace the shm region! Otherwise this process will keep reporting time in the original parent thread's stats region!
// shm_memory ownership is retained by the parent process, so the child must replace it with its own one.
// Otherwise this process will keep reporting in the original parent thread's stats region.
munmap(Base, MAX_STATS_SIZE);
Base = nullptr;
CurrentSize = 0;
Expand Down Expand Up @@ -380,7 +380,7 @@ void ThreadManager::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThre
// This function is called after fork
// We need to cleanup some of the thread data that is dead
for (auto& DeadThread : Threads) {
// This is not owned by the child after fork.
// The fork parent retains ownership of ThreadStats
DeadThread->Thread->ThreadStats = nullptr;

if (DeadThread->Thread == LiveThread) {
Expand Down
2 changes: 1 addition & 1 deletion Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class ThreadManager final {
private:
void Initialize();

uint64_t AllocateMoreSlots(uint64_t NewSize) override;
uint64_t FrontendAllocateSlots(uint64_t NewSize) override;
FEX_CONFIG_OPT(ProfileStats, PROFILESTATS);
FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);

Expand Down
2 changes: 1 addition & 1 deletion Source/Windows/Common/Profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ __attribute__((naked)) uint64_t linux_getpid() {
: "r0", "r8");
}

uint64_t StatAlloc::AllocateMoreSlots(uint64_t NewSize) {
uint64_t StatAlloc::FrontendAllocateSlots(uint64_t NewSize) {
LogMan::Msg::DFmt("Ran out of slots. Can't allocate more");
return CurrentSize;
}
Expand Down
6 changes: 3 additions & 3 deletions Source/Windows/Common/Profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,19 @@ class StatAlloc final : public FEX::Profiler::StatAllocBase {
virtual ~StatAlloc();

FEXCore::Profiler::ThreadStats* AllocateSlot(uint32_t TID) {
return AllocateBaseSlot(TID);
return StatAllocBase::AllocateSlot(TID);
}

void DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) {
if (!AllocatedSlot) {
return;
}

DeallocateBaseSlot(AllocatedSlot);
StatAllocBase::DeallocateSlot(AllocatedSlot);
}

private:
uint64_t AllocateMoreSlots(uint64_t NewSize) override;
uint64_t FrontendAllocateSlots(uint64_t NewSize) override;
};

} // namespace FEX::Windows

0 comments on commit 2e5457b

Please sign in to comment.