From 61758ea47dcfcb49372ccda78342d65dfd5464bf Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 27 Mar 2024 12:25:32 -0400 Subject: [PATCH] OpcodeDispatcher: eliminate branch in cmpxchg pair In the old case: * if we take the branch, 1 instruction * if we don't take the branch, 3 instruction * branch predictor fun * 3 instructions of icache pressure In the new case: * unconditionally 2 instructions * no branch predictor dependence * 2 instructions of icache pressure This should not be non-neglibly worse, and it simplifies things for RA. Signed-off-by: Alyssa Rosenzweig --- .../Interface/Core/OpcodeDispatcher.cpp | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index afff1afb65..12091359d6 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -4270,23 +4270,15 @@ void OpDispatchBuilder::CMPXCHGPairOp(OpcodeArgs) { SetRFLAG(ZFResult); CalculateDeferredFlags(); - auto CondJump_ = CondJump(ZFResult); - - // Make sure to start a new block after ending this one - auto JumpTarget = CreateNewCodeBlockAfter(GetCurrentBlock()); - SetFalseJumpTarget(CondJump_, JumpTarget); - SetCurrentCodeBlock(JumpTarget); - StartNewBlock(); - - StoreGPRRegister(X86State::REG_RAX, Result_Lower); - StoreGPRRegister(X86State::REG_RDX, Result_Upper); + auto UpdateIfNotZF = [this](auto Reg, auto Value) { + // Always use 64-bit csel to preserve existing upper bits. If we have a + // 32-bit cmpxchg in a 64-bit context, Value will be zeroed in upper bits. + StoreGPRRegister(Reg, _NZCVSelect(OpSize::i64Bit, CondClassType{COND_NEQ}, + Value, LoadGPRRegister(Reg))); + }; - auto Jump_ = Jump(); - auto NextJumpTarget = CreateNewCodeBlockAfter(JumpTarget); - SetJumpTarget(Jump_, NextJumpTarget); - SetTrueJumpTarget(CondJump_, NextJumpTarget); - SetCurrentCodeBlock(NextJumpTarget); - StartNewBlock(); + UpdateIfNotZF(X86State::REG_RAX, Result_Lower); + UpdateIfNotZF(X86State::REG_RDX, Result_Upper); } void OpDispatchBuilder::CreateJumpBlocks(fextl::vector const *Blocks) {