From f8b68d8b5a1786a9ae3651c362fd2bf6b1698660 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 1 Apr 2024 13:47:43 -0400 Subject: [PATCH 1/3] OpcodeDispatcher: drop ZeroMultipleFlags lot of complexity for only a single interesting case. we can massively simplify. Signed-off-by: Alyssa Rosenzweig --- .../Source/Interface/Core/OpcodeDispatcher.h | 2 +- .../Interface/Core/OpcodeDispatcher/Flags.cpp | 72 ++----------------- .../Core/OpcodeDispatcher/Vector.cpp | 15 +--- 3 files changed, 11 insertions(+), 78 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 552e7b94e0..c880b7435e 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1422,7 +1422,7 @@ friend class FEXCore::IR::PassManager; SetRFLAG(_Constant(Constant << 4)); } - void ZeroMultipleFlags(uint32_t BitMask); + void ZeroPF_AF(); CondClassType CondForNZCVBit(unsigned BitOffset, bool Invert) { switch (BitOffset) { diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp index 5c8ebbd6bf..888164a1ff 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp @@ -38,61 +38,10 @@ constexpr std::array FlagOffsets = { FEXCore::X86State::RFLAG_ID_LOC, }; -void OpDispatchBuilder::ZeroMultipleFlags(uint32_t FlagsMask) { - auto ZeroConst = _Constant(0); - - if (ContainsNZCV(FlagsMask)) { - // NZCV is stored packed together. - // It's more optimal to zero NZCV with move+bic instead of multiple bics. - auto NZCVFlagsMask = FlagsMask & FullNZCVMask; - if (NZCVFlagsMask == FullNZCVMask) { - ZeroNZCV(); - } - else { - const auto IndexMask = NZCVIndexMask(FlagsMask); - - if (std::popcount(NZCVFlagsMask) == 1) { - // It's more optimal to store only one here. - - for (size_t i = 0; NZCVFlagsMask && i < FlagOffsets.size(); ++i) { - const auto FlagOffset = FlagOffsets[i]; - const auto FlagMask = 1U << FlagOffset; - if (!(FlagMask & NZCVFlagsMask)) { - continue; - } - SetRFLAG(ZeroConst, FlagOffset); - NZCVFlagsMask &= ~(FlagMask); - } - } - else { - auto IndexMaskConstant = _Constant(IndexMask); - auto NewNZCV = _Andn(OpSize::i64Bit, GetNZCV(), IndexMaskConstant); - SetNZCV(NewNZCV); - } - // Unset the possibly set bits. - PossiblySetNZCVBits &= ~IndexMask; - } - - // Handled NZCV, so remove it from the mask. - FlagsMask &= ~FullNZCVMask; - } - +void OpDispatchBuilder::ZeroPF_AF() { // PF is stored inverted, so invert it when we zero. - if (FlagsMask & (1u << X86State::RFLAG_PF_RAW_LOC)) { - SetRFLAG(_Constant(1)); - FlagsMask &= ~(1u << X86State::RFLAG_PF_RAW_LOC); - } - - // Handle remaining masks. - for (size_t i = 0; FlagsMask && i < FlagOffsets.size(); ++i) { - const auto FlagOffset = FlagOffsets[i]; - const auto FlagMask = 1U << FlagOffset; - if (!(FlagMask & FlagsMask)) { - continue; - } - SetRFLAG(ZeroConst, FlagOffset); - FlagsMask &= ~(FlagMask); - } + SetRFLAG(_Constant(1)); + SetAF(0); } void OpDispatchBuilder::SetPackedRFLAG(bool Lower8, OrderedNode *Src) { @@ -994,9 +943,7 @@ void OpDispatchBuilder::CalculateFlags_POPCOUNT(OrderedNode *Result) { // is in the range [0, 63]. In particular, it is always positive. So a // combined NZ test will correctly zero SF/CF/OF while setting ZF. SetNZ_ZeroCV(OpSize::i32Bit, Result); - - ZeroMultipleFlags((1U << X86State::RFLAG_AF_RAW_LOC) | - (1U << X86State::RFLAG_PF_RAW_LOC)); + ZeroPF_AF(); } void OpDispatchBuilder::CalculateFlags_BZHI(uint8_t SrcSize, OrderedNode *Result, OrderedNode *Src) { @@ -1022,15 +969,10 @@ void OpDispatchBuilder::CalculateFlags_ZCNT(uint8_t SrcSize, OrderedNode *Result void OpDispatchBuilder::CalculateFlags_RDRAND(OrderedNode *Src) { // OF, SF, ZF, AF, PF all zero - // CF is set to the incoming source - - uint32_t FlagsMaskToZero = - FullNZCVMask | - (1U << X86State::RFLAG_AF_RAW_LOC) | - (1U << X86State::RFLAG_PF_RAW_LOC); - - ZeroMultipleFlags(FlagsMaskToZero); + ZeroNZCV(); + ZeroPF_AF(); + // CF is set to the incoming source SetRFLAG(Src); } diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index b7200b5dd6..58e28ee121 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -4594,11 +4594,7 @@ void OpDispatchBuilder::PTestOp(OpcodeArgs) { SetNZ_ZeroCV(32, Test1); SetRFLAG(Test2); - uint32_t FlagsMaskToZero = - (1U << X86State::RFLAG_PF_RAW_LOC) | - (1U << X86State::RFLAG_AF_RAW_LOC); - - ZeroMultipleFlags(FlagsMaskToZero); + ZeroPF_AF(); } void OpDispatchBuilder::VTESTOpImpl(OpcodeArgs, size_t ElementSize) { @@ -4635,8 +4631,7 @@ void OpDispatchBuilder::VTESTOpImpl(OpcodeArgs, size_t ElementSize) { SetNZ_ZeroCV(32, AndGPR); SetRFLAG(CFResult); - ZeroMultipleFlags((1U << X86State::RFLAG_PF_RAW_LOC) | - (1U << X86State::RFLAG_AF_RAW_LOC)); + ZeroPF_AF(); } template @@ -5568,11 +5563,7 @@ void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask SetRFLAG(GetFlagBit(18)); SetRFLAG(GetFlagBit(19)); - uint32_t FlagsMaskToZero = - (1U << X86State::RFLAG_PF_RAW_LOC) | - (1U << X86State::RFLAG_AF_RAW_LOC); - - ZeroMultipleFlags(FlagsMaskToZero); + ZeroPF_AF(); } void OpDispatchBuilder::VPCMPESTRIOp(OpcodeArgs) { From fad243d3f6382590e8ba8d33a544231f758aa916 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 1 Apr 2024 13:48:27 -0400 Subject: [PATCH 2/3] InstCountCI: Update Signed-off-by: Alyssa Rosenzweig --- .../InstructionCountCI/FlagM/SecondaryGroup.json | 12 ++++++------ .../InstructionCountCI/FlagM/Secondary_REP.json | 12 ++++++------ unittests/InstructionCountCI/SecondaryGroup.json | 12 ++++++------ unittests/InstructionCountCI/Secondary_REP.json | 12 ++++++------ 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json index de0bf43684..0eaf853d60 100644 --- a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json @@ -704,8 +704,8 @@ "mov x22, x20", "mov x20, x21", "bfxil x4, x22, #0, #16", - "mov w27, #0x0", "mov w26, #0x1", + "mov w27, #0x0", "lsl x20, x20, #29", "msr nzcv, x20" ] @@ -719,8 +719,8 @@ "mov x22, x20", "mov x20, x21", "mov w4, w22", - "mov w27, #0x0", "mov w26, #0x1", + "mov w27, #0x0", "lsl x20, x20, #29", "msr nzcv, x20" ] @@ -733,8 +733,8 @@ "cset x21, ne", "mov x4, x20", "mov x20, x21", - "mov w27, #0x0", "mov w26, #0x1", + "mov w27, #0x0", "lsl x20, x20, #29", "msr nzcv, x20" ] @@ -748,8 +748,8 @@ "mov x22, x20", "mov x20, x21", "bfxil x4, x22, #0, #16", - "mov w27, #0x0", "mov w26, #0x1", + "mov w27, #0x0", "lsl x20, x20, #29", "msr nzcv, x20" ] @@ -763,8 +763,8 @@ "mov x22, x20", "mov x20, x21", "mov w4, w22", - "mov w27, #0x0", "mov w26, #0x1", + "mov w27, #0x0", "lsl x20, x20, #29", "msr nzcv, x20" ] @@ -777,8 +777,8 @@ "cset x21, ne", "mov x4, x20", "mov x20, x21", - "mov w27, #0x0", "mov w26, #0x1", + "mov w27, #0x0", "lsl x20, x20, #29", "msr nzcv, x20" ] diff --git a/unittests/InstructionCountCI/FlagM/Secondary_REP.json b/unittests/InstructionCountCI/FlagM/Secondary_REP.json index 6710e8472e..d31556cba0 100644 --- a/unittests/InstructionCountCI/FlagM/Secondary_REP.json +++ b/unittests/InstructionCountCI/FlagM/Secondary_REP.json @@ -24,8 +24,8 @@ "umov w20, v0.b[0]", "bfxil x4, x20, #0, #16", "tst w20, w20", - "mov w27, #0x0", - "mov w26, #0x1" + "mov w26, #0x1", + "mov w27, #0x0" ] }, "popcnt eax, ebx": { @@ -37,8 +37,8 @@ "addv b0, v0.8b", "umov w4, v0.b[0]", "tst w4, w4", - "mov w27, #0x0", - "mov w26, #0x1" + "mov w26, #0x1", + "mov w27, #0x0" ] }, "popcnt rax, rbx": { @@ -50,8 +50,8 @@ "addv b0, v0.8b", "umov w4, v0.b[0]", "tst w4, w4", - "mov w27, #0x0", - "mov w26, #0x1" + "mov w26, #0x1", + "mov w27, #0x0" ] }, "tzcnt ax, bx": { diff --git a/unittests/InstructionCountCI/SecondaryGroup.json b/unittests/InstructionCountCI/SecondaryGroup.json index 3f8802609f..4ad5d8012d 100644 --- a/unittests/InstructionCountCI/SecondaryGroup.json +++ b/unittests/InstructionCountCI/SecondaryGroup.json @@ -840,8 +840,8 @@ "mov x22, x20", "mov x20, x21", "bfxil x4, x22, #0, #16", - "mov w27, #0x0", "mov w26, #0x1", + "mov w27, #0x0", "lsl x20, x20, #29", "msr nzcv, x20" ] @@ -855,8 +855,8 @@ "mov x22, x20", "mov x20, x21", "mov w4, w22", - "mov w27, #0x0", "mov w26, #0x1", + "mov w27, #0x0", "lsl x20, x20, #29", "msr nzcv, x20" ] @@ -869,8 +869,8 @@ "cset x21, ne", "mov x4, x20", "mov x20, x21", - "mov w27, #0x0", "mov w26, #0x1", + "mov w27, #0x0", "lsl x20, x20, #29", "msr nzcv, x20" ] @@ -884,8 +884,8 @@ "mov x22, x20", "mov x20, x21", "bfxil x4, x22, #0, #16", - "mov w27, #0x0", "mov w26, #0x1", + "mov w27, #0x0", "lsl x20, x20, #29", "msr nzcv, x20" ] @@ -899,8 +899,8 @@ "mov x22, x20", "mov x20, x21", "mov w4, w22", - "mov w27, #0x0", "mov w26, #0x1", + "mov w27, #0x0", "lsl x20, x20, #29", "msr nzcv, x20" ] @@ -913,8 +913,8 @@ "cset x21, ne", "mov x4, x20", "mov x20, x21", - "mov w27, #0x0", "mov w26, #0x1", + "mov w27, #0x0", "lsl x20, x20, #29", "msr nzcv, x20" ] diff --git a/unittests/InstructionCountCI/Secondary_REP.json b/unittests/InstructionCountCI/Secondary_REP.json index bc1f11e7fb..2926456232 100644 --- a/unittests/InstructionCountCI/Secondary_REP.json +++ b/unittests/InstructionCountCI/Secondary_REP.json @@ -409,8 +409,8 @@ "umov w20, v0.b[0]", "bfxil x4, x20, #0, #16", "tst w20, w20", - "mov w27, #0x0", - "mov w26, #0x1" + "mov w26, #0x1", + "mov w27, #0x0" ] }, "popcnt eax, ebx": { @@ -422,8 +422,8 @@ "addv b0, v0.8b", "umov w4, v0.b[0]", "tst w4, w4", - "mov w27, #0x0", - "mov w26, #0x1" + "mov w26, #0x1", + "mov w27, #0x0" ] }, "popcnt rax, rbx": { @@ -435,8 +435,8 @@ "addv b0, v0.8b", "umov w4, v0.b[0]", "tst w4, w4", - "mov w27, #0x0", - "mov w26, #0x1" + "mov w26, #0x1", + "mov w27, #0x0" ] }, "tzcnt ax, bx": { From 7852909cc47b468195a060ff7fbaa85f0e93b3af Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 1 Apr 2024 13:50:00 -0400 Subject: [PATCH 3/3] OpcodeDispatcher: simplify IsNZCV Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index c880b7435e..84cdacb5cf 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -981,16 +981,7 @@ friend class FEXCore::IR::PassManager; } static bool IsNZCV(unsigned BitOffset) { - switch (BitOffset) { - case FEXCore::X86State::RFLAG_CF_RAW_LOC: - case FEXCore::X86State::RFLAG_ZF_RAW_LOC: - case FEXCore::X86State::RFLAG_SF_RAW_LOC: - case FEXCore::X86State::RFLAG_OF_RAW_LOC: - return true; - - default: - return false; - } + return ContainsNZCV(1U << BitOffset); } OrderedNode* CachedNZCV{};