diff --git a/FEXCore/Source/Interface/Config/Config.json.in b/FEXCore/Source/Interface/Config/Config.json.in index 69828638b5..541006c8ad 100644 --- a/FEXCore/Source/Interface/Config/Config.json.in +++ b/FEXCore/Source/Interface/Config/Config.json.in @@ -399,6 +399,21 @@ "Highly likely to break any multithreaded application if disabled." ] }, + "VectorTSOEnabled": { + "Type": "bool", + "Default": "true", + "Desc": [ + "When TSO emulation is enabled, controls if vector loadstores should also be atomic." + ] + }, + "MemcpySetTSOEnabled": { + "Type": "bool", + "Default": "true", + "Desc": [ + "When TSO emulation is enabled, controls if memcpy and memset should also be atomic.", + "Only affects REP MOVS and REP STOS instructions" + ] + }, "TSOAutoMigration": { "Type": "bool", "Default": "true", diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h b/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h index 0661760542..6d0838ac62 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h @@ -56,6 +56,8 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter { private: FEX_CONFIG_OPT(ParanoidTSO, PARANOIDTSO); + FEX_CONFIG_OPT(VectorTSOEnabled, VECTORTSOENABLED); + FEX_CONFIG_OPT(MemcpySetTSOEnabled, MEMCPYSETTSOENABLED); const bool HostSupportsSVE128{}; const bool HostSupportsSVE256{}; diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp index 1453d931ad..d051d20f14 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp @@ -1174,8 +1174,10 @@ DEF_OP(LoadMemTSO) { LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); break; } - // Half-barrier. - dmb(FEXCore::ARMEmitter::BarrierScope::ISHLD); + if (VectorTSOEnabled()) { + // Half-barrier. + dmb(FEXCore::ARMEmitter::BarrierScope::ISHLD); + } } } @@ -1323,7 +1325,7 @@ DEF_OP(VLoadVectorElement) { } // Emit a half-barrier if TSO is enabled. - if (CTX->IsAtomicTSOEnabled()) { + if (CTX->IsAtomicTSOEnabled() && VectorTSOEnabled()) { dmb(ARMEmitter::BarrierScope::ISHLD); } } @@ -1343,7 +1345,7 @@ DEF_OP(VStoreVectorElement) { ElementSize == 16, "Invalid element size"); // Emit a half-barrier if TSO is enabled. - if (CTX->IsAtomicTSOEnabled()) { + if (CTX->IsAtomicTSOEnabled() && VectorTSOEnabled()) { dmb(FEXCore::ARMEmitter::BarrierScope::ISH); } @@ -1443,7 +1445,7 @@ DEF_OP(VBroadcastFromMem) { } // Emit a half-barrier if TSO is enabled. - if (CTX->IsAtomicTSOEnabled()) { + if (CTX->IsAtomicTSOEnabled() && VectorTSOEnabled()) { dmb(ARMEmitter::BarrierScope::ISHLD); } } @@ -1661,8 +1663,10 @@ DEF_OP(StoreMemTSO) { } } else { - // Half-Barrier. - dmb(FEXCore::ARMEmitter::BarrierScope::ISH); + if (VectorTSOEnabled()) { + // Half-Barrier. + dmb(FEXCore::ARMEmitter::BarrierScope::ISH); + } const auto Src = GetVReg(Op->Value.ID()); const auto MemSrc = GenerateMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); switch (OpSize) { @@ -1703,6 +1707,7 @@ DEF_OP(MemSet) { // that the value is zero, we can optimize any operation larger than 8-bit down to 8-bit to use the MOPS implementation. const auto Op = IROp->C(); + const bool IsAtomic = Op->IsAtomic && MemcpySetTSOEnabled(); const int32_t Size = Op->Size; const auto MemReg = GetReg(Op->Addr.ID()); const auto Value = GetReg(Op->Value.ID()); @@ -1808,7 +1813,7 @@ DEF_OP(MemSet) { // Early exit if zero count. cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal); - if (!Op->IsAtomic) { + if (!IsAtomic) { ARMEmitter::ForwardLabel AgainInternal256Exit{}; ARMEmitter::BackwardLabel AgainInternal256{}; ARMEmitter::ForwardLabel AgainInternal128Exit{}; @@ -1858,7 +1863,7 @@ DEF_OP(MemSet) { } Bind(&AgainInternal); - if (Op->IsAtomic) { + if (IsAtomic) { MemStoreTSO(Value, OpSize, SizeDirection); } else { @@ -1936,6 +1941,7 @@ DEF_OP(MemCpy) { // Assuming non-atomicity and non-faulting behaviour, this can accelerate this implementation. const auto Op = IROp->C(); + const bool IsAtomic = Op->IsAtomic && MemcpySetTSOEnabled(); const int32_t Size = Op->Size; const auto MemRegDest = GetReg(Op->AddrDest.ID()); const auto MemRegSrc = GetReg(Op->AddrSrc.ID()); @@ -2122,7 +2128,7 @@ DEF_OP(MemCpy) { // Early exit if zero count. cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal); - if (!Op->IsAtomic) { + if (!IsAtomic) { ARMEmitter::ForwardLabel AbsPos{}; ARMEmitter::ForwardLabel AgainInternal256Exit{}; ARMEmitter::ForwardLabel AgainInternal128Exit{}; @@ -2178,7 +2184,7 @@ DEF_OP(MemCpy) { } Bind(&AgainInternal); - if (Op->IsAtomic) { + if (IsAtomic) { MemCpyTSO(OpSize, SizeDirection); } else { diff --git a/Source/Tools/FEXConfig/Main.cpp b/Source/Tools/FEXConfig/Main.cpp index 70be9dc836..1952721a67 100644 --- a/Source/Tools/FEXConfig/Main.cpp +++ b/Source/Tools/FEXConfig/Main.cpp @@ -547,12 +547,44 @@ namespace { void FillHackConfig() { if (ImGui::BeginTabItem("Hacks")) { auto Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED); + auto VectorTSO = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_VECTORTSOENABLED); + auto MemcpyTSO = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_MEMCPYSETTSOENABLED); + bool TSOEnabled = Value.has_value() && **Value == "1"; + bool VectorTSOEnabled = VectorTSO.has_value() && **VectorTSO == "1"; + bool MemcpyTSOEnabled = MemcpyTSO.has_value() && **MemcpyTSO == "1"; + if (ImGui::Checkbox("TSO Enabled", &TSOEnabled)) { LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED, TSOEnabled ? "1" : "0"); ConfigChanged = true; } + if (TSOEnabled) { + if (ImGui::TreeNodeEx("TSO sub-options", ImGuiTreeNodeFlags_Leaf)) { + if (ImGui::Checkbox("Vector TSO Enabled", &VectorTSOEnabled)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_VECTORTSOENABLED, VectorTSOEnabled ? "1" : "0"); + ConfigChanged = true; + } + if (ImGui::IsItemHovered()) { + ImGui::BeginTooltip(); + ImGui::Text("Disables TSO emulation on vector load/store instructions"); + ImGui::EndTooltip(); + } + + if (ImGui::Checkbox("Memcpy TSO Enabled", &MemcpyTSOEnabled)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_MEMCPYSETTSOENABLED, MemcpyTSOEnabled ? "1" : "0"); + ConfigChanged = true; + } + if (ImGui::IsItemHovered()) { + ImGui::BeginTooltip(); + ImGui::Text("Disables TSO emulation on memcpy/memset instructions"); + ImGui::EndTooltip(); + } + + ImGui::TreePop(); + } + } + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_PARANOIDTSO); bool ParanoidTSOEnabled = Value.has_value() && **Value == "1"; if (ImGui::Checkbox("Paranoid TSO Enabled", &ParanoidTSOEnabled)) {