From a70ea30c0239c168e460dbf96a192dd0e2cdc529 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sun, 31 Mar 2024 17:50:57 -0400 Subject: [PATCH 1/3] IR: add CondSubNZCV (ccmp) instruction Signed-off-by: Alyssa Rosenzweig --- .../Source/Interface/Core/JIT/Arm64/ALUOps.cpp | 18 ++++++++++++++++++ FEXCore/Source/Interface/IR/IR.json | 8 ++++++++ .../Source/Interface/IR/Passes/ConstProp.cpp | 1 + .../RedundantFlagCalculationElimination.cpp | 1 + 4 files changed, 28 insertions(+) diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp index 129d05a4f4..8466533166 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp @@ -402,6 +402,24 @@ DEF_OP(CondAddNZCV) { } } +DEF_OP(CondSubNZCV) { + auto Op = IROp->C(); + const auto OpSize = IROp->Size; + + LOGMAN_THROW_AA_FMT(OpSize == IR::i32Bit || OpSize == IR::i64Bit, "Unsupported {} size: {}", __func__, OpSize); + const auto EmitSize = OpSize == IR::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; + + ARMEmitter::StatusFlags Flags = (ARMEmitter::StatusFlags)Op->FalseNZCV; + uint64_t Const = 0; + auto Src1 = GetZeroableReg(Op->Src1); + + if (IsInlineConstant(Op->Src2, &Const)) { + ccmp(EmitSize, Src1, Const, Flags, MapSelectCC(Op->Cond)); + } else { + ccmp(EmitSize, Src1, GetReg(Op->Src2.ID()), Flags, MapSelectCC(Op->Cond)); + } +} + DEF_OP(Neg) { auto Op = IROp->C(); const uint8_t OpSize = IROp->Size; diff --git a/FEXCore/Source/Interface/IR/IR.json b/FEXCore/Source/Interface/IR/IR.json index abf9b8586d..0f40cdb12f 100644 --- a/FEXCore/Source/Interface/IR/IR.json +++ b/FEXCore/Source/Interface/IR/IR.json @@ -1035,6 +1035,14 @@ "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit" ] }, + "CondSubNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2, CondClass:$Cond, u8:$FalseNZCV": { + "Desc": ["If condition is true, set NZCV per difference of GPRs, else force NZCV to a constant."], + "HasSideEffects": true, + "DestSize": "Size", + "EmitValidation": [ + "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit" + ] + }, "GPR = AdcWithFlags OpSize:#Size, GPR:$Src1, GPR:$Src2": { "Desc": ["Adds and set NZCV for the sum of two GPRs and carry-in given as NZCV"], "HasSideEffects": true, diff --git a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp index 192ae6520c..60464625da 100644 --- a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp @@ -1139,6 +1139,7 @@ bool ConstProp::ConstantInlining(IREmitter *IREmit, const IRListView& CurrentIR) break; } case OP_CONDADDNZCV: + case OP_CONDSUBNZCV: { auto Op = IROp->C(); diff --git a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp index 2df6270d8d..ac57749dfa 100644 --- a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp +++ b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp @@ -228,6 +228,7 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp) return {.Read = FlagsForCondClassType(Op->Cond)}; } + case OP_CONDSUBNZCV: case OP_CONDADDNZCV: { auto Op = IROp->CW(); return { From 1a1545da0f19eb24b2a4b2af2ab7daab549add0f Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sun, 31 Mar 2024 17:51:13 -0400 Subject: [PATCH 2/3] OpcodeDispatcher: rework rep cmp 1. pull flag calculation out of the loop body for perf 2. fully rotate the inner loop to save an instruction per iteration 3. hoist the rcx=0 jump to avoid computing df when rcx=0 Signed-off-by: Alyssa Rosenzweig --- .../Interface/Core/OpcodeDispatcher.cpp | 69 +++++++++++-------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 2214f4c923..f3ad7d1dbc 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -3617,26 +3617,24 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) { bool REPE = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX; - // read DF once - auto PtrDir = LoadDir(Size); - - auto JumpStart = Jump(); - // Make sure to start a new block after ending this one - auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock()); - SetJumpTarget(JumpStart, LoopStart); - SetCurrentCodeBlock(LoopStart); - StartNewBlock(); - + // If rcx = 0, skip the whole loop. OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX); + auto OuterJump = CondJump(Counter, {COND_EQ}); + IRPair InnerJump; - // Can we end the block? - auto CondJump_ = CondJump(Counter, {COND_EQ}); - IRPair InternalCondJump; + // read DF once, outside the loop + auto BeforeLoop = CreateNewCodeBlockAfter(GetCurrentBlock()); + SetFalseJumpTarget(OuterJump, BeforeLoop); + SetCurrentCodeBlock(BeforeLoop); + StartNewBlock(); + auto PtrDir = LoadDir(Size); + auto JumpIntoLoop = Jump(); - auto LoopTail = CreateNewCodeBlockAfter(LoopStart); - SetFalseJumpTarget(CondJump_, LoopTail); - SetCurrentCodeBlock(LoopTail); + // Setup for the loop + auto LoopHeader = CreateNewCodeBlockAfter(GetCurrentBlock()); + SetCurrentCodeBlock(LoopHeader); StartNewBlock(); + SetJumpTarget(JumpIntoLoop, LoopHeader); // Working loop { @@ -3651,15 +3649,14 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) { auto Src1 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size); auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size); - GenerateFlags_SUB(Op, Src2, Src1); - - // Calculate flags early. - CalculateDeferredFlags(); + // We'll calculate PF/AF after the loop, so use them as temporaries here. + _StoreRegister(Src1, false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize()); + _StoreRegister(Src2, false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize()); OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX); // Decrement counter - TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1)); + TailCounter = _SubWithFlags(OpSize::i64Bit, TailCounter, _Constant(1)); // Store the counter since we don't have phis StoreGPRRegister(X86State::REG_RCX, TailCounter); @@ -3672,21 +3669,37 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) { Dest_RSI = _Add(OpSize::i64Bit, Dest_RSI, PtrDir); StoreGPRRegister(X86State::REG_RSI, Dest_RSI); - CalculateDeferredFlags(); - InternalCondJump = CondJumpNZCV({REPE ? COND_EQ : COND_NEQ}); + // If TailCounter != 0, compare sources. + // If TailCounter == 0, set ZF iff that would break. + _CondSubNZCV(OpSize::i64Bit, Src2, Src1, {COND_NEQ}, REPE ? 0 : (1 << 2) /* Z */); + CachedNZCV = nullptr; + NZCVDirty = false; + InnerJump = CondJumpNZCV({REPE ? COND_EQ : COND_NEQ}); // Jump back to the start if we have more work to do - SetTrueJumpTarget(InternalCondJump, LoopStart); + SetTrueJumpTarget(InnerJump, LoopHeader); } // Make sure to start a new block after ending this one - auto LoopEnd = CreateNewCodeBlockAfter(LoopTail); - SetTrueJumpTarget(CondJump_, LoopEnd); - - SetFalseJumpTarget(InternalCondJump, LoopEnd); + auto LoopEnd = CreateNewCodeBlockAfter(GetCurrentBlock()); + SetFalseJumpTarget(InnerJump, LoopEnd); SetCurrentCodeBlock(LoopEnd); StartNewBlock(); + { + // Grab the sources from the last iteration so we can set flags. + auto Src1 = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize()); + auto Src2 = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize()); + GenerateFlags_SUB(Op, Src2, Src1); + CalculateDeferredFlags(); + } + auto Jump_ = Jump(); + + auto Exit = CreateNewCodeBlockAfter(LoopEnd); + SetJumpTarget(Jump_, Exit); + SetTrueJumpTarget(OuterJump, Exit); + SetCurrentCodeBlock(Exit); + StartNewBlock(); } } From 784cdd7b6b055cfa25a93caff5d1eb7a53749e09 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sun, 31 Mar 2024 17:51:47 -0400 Subject: [PATCH 3/3] InstCountCI: Update Signed-off-by: Alyssa Rosenzweig --- .../InstructionCountCI/FlagM/Primary.json | 176 ++++++++------- unittests/InstructionCountCI/Primary.json | 208 ++++++++++-------- 2 files changed, 208 insertions(+), 176 deletions(-) diff --git a/unittests/InstructionCountCI/FlagM/Primary.json b/unittests/InstructionCountCI/FlagM/Primary.json index 743376c771..b187d5b094 100644 --- a/unittests/InstructionCountCI/FlagM/Primary.json +++ b/unittests/InstructionCountCI/FlagM/Primary.json @@ -1950,153 +1950,169 @@ ] }, "repz cmpsb": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 15, "Comment": "0xa6", "ExpectedArm64ASM": [ + "cbz x5, #+0x3c", "ldrsb x20, [x28, #714]", - "cbz x5, #+0x30", - "ldrb w21, [x11]", - "ldrb w22, [x10]", - "eor w27, w22, w21", - "lsl w0, w22, #24", - "cmp w0, w21, lsl #24", - "sub w26, w22, w21", - "cfinv", - "sub x5, x5, #0x1 (1)", + "ldrb w26, [x11]", + "ldrb w27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.eq #-0x2c" + "ccmp x27, x26, #nzcv, ne", + "b.eq #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "lsl w0, w20, #24", + "cmp w0, w26, lsl #24", + "sub w26, w20, w26", + "cfinv" ] }, "repz cmpsw": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 16, "Comment": "0xa7", "ExpectedArm64ASM": [ + "cbz x5, #+0x40", "ldrsb x20, [x28, #714]", "lsl x20, x20, #1", - "cbz x5, #+0x30", - "ldrh w21, [x11]", - "ldrh w22, [x10]", - "eor w27, w22, w21", - "lsl w0, w22, #16", - "cmp w0, w21, lsl #16", - "sub w26, w22, w21", - "cfinv", - "sub x5, x5, #0x1 (1)", + "ldrh w26, [x11]", + "ldrh w27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.eq #-0x2c" + "ccmp x27, x26, #nzcv, ne", + "b.eq #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "lsl w0, w20, #16", + "cmp w0, w26, lsl #16", + "sub w26, w20, w26", + "cfinv" ] }, "repz cmpsd": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 14, "Comment": "0xa7", "ExpectedArm64ASM": [ + "cbz x5, #+0x38", "ldrsb x20, [x28, #714]", "lsl x20, x20, #2", - "cbz x5, #+0x28", - "ldr w21, [x11]", - "ldr w22, [x10]", - "eor w27, w22, w21", - "subs w26, w22, w21", - "cfinv", - "sub x5, x5, #0x1 (1)", + "ldr w26, [x11]", + "ldr w27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.eq #-0x24" + "ccmp x27, x26, #nzcv, ne", + "b.eq #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "subs w26, w20, w26", + "cfinv" ] }, "repz cmpsq": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 14, "Comment": "0xa7", "ExpectedArm64ASM": [ + "cbz x5, #+0x38", "ldrsb x20, [x28, #714]", "lsl x20, x20, #3", - "cbz x5, #+0x28", - "ldr x21, [x11]", - "ldr x22, [x10]", - "eor w27, w22, w21", - "subs x26, x22, x21", - "cfinv", - "sub x5, x5, #0x1 (1)", + "ldr x26, [x11]", + "ldr x27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.eq #-0x24" + "ccmp x27, x26, #nzcv, ne", + "b.eq #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "subs x26, x20, x26", + "cfinv" ] }, "repnz cmpsb": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 15, "Comment": "0xa6", "ExpectedArm64ASM": [ + "cbz x5, #+0x3c", "ldrsb x20, [x28, #714]", - "cbz x5, #+0x30", - "ldrb w21, [x11]", - "ldrb w22, [x10]", - "eor w27, w22, w21", - "lsl w0, w22, #24", - "cmp w0, w21, lsl #24", - "sub w26, w22, w21", - "cfinv", - "sub x5, x5, #0x1 (1)", + "ldrb w26, [x11]", + "ldrb w27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.ne #-0x2c" + "ccmp x27, x26, #nZcv, ne", + "b.ne #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "lsl w0, w20, #24", + "cmp w0, w26, lsl #24", + "sub w26, w20, w26", + "cfinv" ] }, "repnz cmpsw": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 16, "Comment": "0xa7", "ExpectedArm64ASM": [ + "cbz x5, #+0x40", "ldrsb x20, [x28, #714]", "lsl x20, x20, #1", - "cbz x5, #+0x30", - "ldrh w21, [x11]", - "ldrh w22, [x10]", - "eor w27, w22, w21", - "lsl w0, w22, #16", - "cmp w0, w21, lsl #16", - "sub w26, w22, w21", - "cfinv", - "sub x5, x5, #0x1 (1)", + "ldrh w26, [x11]", + "ldrh w27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.ne #-0x2c" + "ccmp x27, x26, #nZcv, ne", + "b.ne #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "lsl w0, w20, #16", + "cmp w0, w26, lsl #16", + "sub w26, w20, w26", + "cfinv" ] }, "repnz cmpsd": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 14, "Comment": "0xa7", "ExpectedArm64ASM": [ + "cbz x5, #+0x38", "ldrsb x20, [x28, #714]", "lsl x20, x20, #2", - "cbz x5, #+0x28", - "ldr w21, [x11]", - "ldr w22, [x10]", - "eor w27, w22, w21", - "subs w26, w22, w21", - "cfinv", - "sub x5, x5, #0x1 (1)", + "ldr w26, [x11]", + "ldr w27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.ne #-0x24" + "ccmp x27, x26, #nZcv, ne", + "b.ne #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "subs w26, w20, w26", + "cfinv" ] }, "repnz cmpsq": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 14, "Comment": "0xa7", "ExpectedArm64ASM": [ + "cbz x5, #+0x38", "ldrsb x20, [x28, #714]", "lsl x20, x20, #3", - "cbz x5, #+0x28", - "ldr x21, [x11]", - "ldr x22, [x10]", - "eor w27, w22, w21", - "subs x26, x22, x21", - "cfinv", - "sub x5, x5, #0x1 (1)", + "ldr x26, [x11]", + "ldr x27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.ne #-0x24" + "ccmp x27, x26, #nZcv, ne", + "b.ne #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "subs x26, x20, x26", + "cfinv" ] }, "test al, 1": { diff --git a/unittests/InstructionCountCI/Primary.json b/unittests/InstructionCountCI/Primary.json index 5ee00256f5..26e1fac028 100644 --- a/unittests/InstructionCountCI/Primary.json +++ b/unittests/InstructionCountCI/Primary.json @@ -3295,169 +3295,185 @@ ] }, "repz cmpsb": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 17, "Comment": "0xa6", "ExpectedArm64ASM": [ + "cbz x5, #+0x44", "ldrsb x20, [x28, #714]", - "cbz x5, #+0x38", - "ldrb w21, [x11]", - "ldrb w22, [x10]", - "eor w27, w22, w21", - "lsl w0, w22, #24", - "cmp w0, w21, lsl #24", - "sub w26, w22, w21", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sub x5, x5, #0x1 (1)", + "ldrb w26, [x11]", + "ldrb w27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.eq #-0x34" + "ccmp x27, x26, #nzcv, ne", + "b.eq #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "lsl w0, w20, #24", + "cmp w0, w26, lsl #24", + "sub w26, w20, w26", + "mrs x20, nzcv", + "eor w20, w20, #0x20000000", + "msr nzcv, x20" ] }, "repz cmpsw": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 18, "Comment": "0xa7", "ExpectedArm64ASM": [ + "cbz x5, #+0x48", "ldrsb x20, [x28, #714]", "lsl x20, x20, #1", - "cbz x5, #+0x38", - "ldrh w21, [x11]", - "ldrh w22, [x10]", - "eor w27, w22, w21", - "lsl w0, w22, #16", - "cmp w0, w21, lsl #16", - "sub w26, w22, w21", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sub x5, x5, #0x1 (1)", + "ldrh w26, [x11]", + "ldrh w27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.eq #-0x34" + "ccmp x27, x26, #nzcv, ne", + "b.eq #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "lsl w0, w20, #16", + "cmp w0, w26, lsl #16", + "sub w26, w20, w26", + "mrs x20, nzcv", + "eor w20, w20, #0x20000000", + "msr nzcv, x20" ] }, "repz cmpsd": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 16, "Comment": "0xa7", "ExpectedArm64ASM": [ + "cbz x5, #+0x40", "ldrsb x20, [x28, #714]", "lsl x20, x20, #2", - "cbz x5, #+0x30", - "ldr w21, [x11]", - "ldr w22, [x10]", - "eor w27, w22, w21", - "subs w26, w22, w21", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sub x5, x5, #0x1 (1)", + "ldr w26, [x11]", + "ldr w27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.eq #-0x2c" + "ccmp x27, x26, #nzcv, ne", + "b.eq #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "subs w26, w20, w26", + "mrs x20, nzcv", + "eor w20, w20, #0x20000000", + "msr nzcv, x20" ] }, "repz cmpsq": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 16, "Comment": "0xa7", "ExpectedArm64ASM": [ + "cbz x5, #+0x40", "ldrsb x20, [x28, #714]", "lsl x20, x20, #3", - "cbz x5, #+0x30", - "ldr x21, [x11]", - "ldr x22, [x10]", - "eor w27, w22, w21", - "subs x26, x22, x21", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sub x5, x5, #0x1 (1)", + "ldr x26, [x11]", + "ldr x27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.eq #-0x2c" + "ccmp x27, x26, #nzcv, ne", + "b.eq #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "subs x26, x20, x26", + "mrs x20, nzcv", + "eor w20, w20, #0x20000000", + "msr nzcv, x20" ] }, "repnz cmpsb": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 17, "Comment": "0xa6", "ExpectedArm64ASM": [ + "cbz x5, #+0x44", "ldrsb x20, [x28, #714]", - "cbz x5, #+0x38", - "ldrb w21, [x11]", - "ldrb w22, [x10]", - "eor w27, w22, w21", - "lsl w0, w22, #24", - "cmp w0, w21, lsl #24", - "sub w26, w22, w21", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sub x5, x5, #0x1 (1)", + "ldrb w26, [x11]", + "ldrb w27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.ne #-0x34" + "ccmp x27, x26, #nZcv, ne", + "b.ne #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "lsl w0, w20, #24", + "cmp w0, w26, lsl #24", + "sub w26, w20, w26", + "mrs x20, nzcv", + "eor w20, w20, #0x20000000", + "msr nzcv, x20" ] }, "repnz cmpsw": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 18, "Comment": "0xa7", "ExpectedArm64ASM": [ + "cbz x5, #+0x48", "ldrsb x20, [x28, #714]", "lsl x20, x20, #1", - "cbz x5, #+0x38", - "ldrh w21, [x11]", - "ldrh w22, [x10]", - "eor w27, w22, w21", - "lsl w0, w22, #16", - "cmp w0, w21, lsl #16", - "sub w26, w22, w21", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sub x5, x5, #0x1 (1)", + "ldrh w26, [x11]", + "ldrh w27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.ne #-0x34" + "ccmp x27, x26, #nZcv, ne", + "b.ne #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "lsl w0, w20, #16", + "cmp w0, w26, lsl #16", + "sub w26, w20, w26", + "mrs x20, nzcv", + "eor w20, w20, #0x20000000", + "msr nzcv, x20" ] }, "repnz cmpsd": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 16, "Comment": "0xa7", "ExpectedArm64ASM": [ + "cbz x5, #+0x40", "ldrsb x20, [x28, #714]", "lsl x20, x20, #2", - "cbz x5, #+0x30", - "ldr w21, [x11]", - "ldr w22, [x10]", - "eor w27, w22, w21", - "subs w26, w22, w21", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sub x5, x5, #0x1 (1)", + "ldr w26, [x11]", + "ldr w27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.ne #-0x2c" + "ccmp x27, x26, #nZcv, ne", + "b.ne #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "subs w26, w20, w26", + "mrs x20, nzcv", + "eor w20, w20, #0x20000000", + "msr nzcv, x20" ] }, "repnz cmpsq": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 16, "Comment": "0xa7", "ExpectedArm64ASM": [ + "cbz x5, #+0x40", "ldrsb x20, [x28, #714]", "lsl x20, x20, #3", - "cbz x5, #+0x30", - "ldr x21, [x11]", - "ldr x22, [x10]", - "eor w27, w22, w21", - "subs x26, x22, x21", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sub x5, x5, #0x1 (1)", + "ldr x26, [x11]", + "ldr x27, [x10]", + "subs x5, x5, #0x1 (1)", "add x11, x11, x20", "add x10, x10, x20", - "b.ne #-0x2c" + "ccmp x27, x26, #nZcv, ne", + "b.ne #-0x18", + "mov x20, x27", + "eor w27, w20, w26", + "subs x26, x20, x26", + "mrs x20, nzcv", + "eor w20, w20, #0x20000000", + "msr nzcv, x20" ] }, "test al, 1": {