Skip to content

Commit

Permalink
OpcodeDispatcher: rework rep cmp
Browse files Browse the repository at this point in the history
1. pull flag calculation out of the loop body for perf
2. fix count bug noticed while doing the above

Signed-off-by: Alyssa Rosenzweig <[email protected]>
  • Loading branch information
alyssarosenzweig committed Mar 31, 2024
1 parent a70ea30 commit 6885e23
Showing 1 changed file with 25 additions and 8 deletions.
33 changes: 25 additions & 8 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3638,6 +3638,8 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
SetCurrentCodeBlock(LoopTail);
StartNewBlock();

InvalidateDeferredFlags();

// Working loop
{
OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI);
Expand All @@ -3651,15 +3653,14 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
auto Src1 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size);

GenerateFlags_SUB(Op, Src2, Src1);

// Calculate flags early.
CalculateDeferredFlags();
// We'll calculate PF/AF after the loop, so use them as temporaries here.
_StoreRegister(Src1, false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
_StoreRegister(Src2, false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());

OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);

// Decrement counter
TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1));
TailCounter = _SubWithFlags(OpSize::i64Bit, TailCounter, _Constant(1));

// Store the counter since we don't have phis
StoreGPRRegister(X86State::REG_RCX, TailCounter);
Expand All @@ -3672,7 +3673,11 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
Dest_RSI = _Add(OpSize::i64Bit, Dest_RSI, PtrDir);
StoreGPRRegister(X86State::REG_RSI, Dest_RSI);

CalculateDeferredFlags();
// If TailCounter != 0, compare sources.
// If TailCounter == 0, set ZF iff that would break.
_CondSubNZCV(OpSize::i64Bit, Src2, Src1, {COND_NEQ}, REPE ? 0 : (1 << 2) /* Z */);
CachedNZCV = nullptr;
NZCVDirty = false;
InternalCondJump = CondJumpNZCV({REPE ? COND_EQ : COND_NEQ});

// Jump back to the start if we have more work to do
Expand All @@ -3681,12 +3686,24 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {

// Make sure to start a new block after ending this one
auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
SetTrueJumpTarget(CondJump_, LoopEnd);

SetFalseJumpTarget(InternalCondJump, LoopEnd);

SetCurrentCodeBlock(LoopEnd);
StartNewBlock();
{
// Grab the sources from the last iteration so we can set flags.
auto Src1 = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
auto Src2 = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
GenerateFlags_SUB(Op, Src2, Src1);
CalculateDeferredFlags();
}
auto Jump_ = Jump();

auto Exit = CreateNewCodeBlockAfter(LoopEnd);
SetJumpTarget(Jump_, Exit);
SetTrueJumpTarget(CondJump_, Exit);
SetCurrentCodeBlock(Exit);
StartNewBlock();
}
}

Expand Down

0 comments on commit 6885e23

Please sign in to comment.