From ea4fce7a43ebeb54aa570bab01527726dac84443 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 25 Mar 2024 12:47:17 -0400 Subject: [PATCH 01/10] InstcountCI: add flagm primary 32-bit track the demon opcodes Signed-off-by: Alyssa Rosenzweig --- .../FlagM/Primary_32Bit.json | 466 ++++++++++++++++++ 1 file changed, 466 insertions(+) create mode 100644 unittests/InstructionCountCI/FlagM/Primary_32Bit.json diff --git a/unittests/InstructionCountCI/FlagM/Primary_32Bit.json b/unittests/InstructionCountCI/FlagM/Primary_32Bit.json new file mode 100644 index 0000000000..3f7306ad09 --- /dev/null +++ b/unittests/InstructionCountCI/FlagM/Primary_32Bit.json @@ -0,0 +1,466 @@ +{ + "Features": { + "Bitness": 32, + "EnabledHostFeatures": [ + "FlagM", + "FlagM2" + ], + "DisabledHostFeatures": [ + "SVE128", + "SVE256" + ] + }, + "Instructions": { + "push es": { + "ExpectedInstructionCount": 2, + "Comment": "0x06", + "ExpectedArm64ASM": [ + "ldrh w20, [x28, #136]", + "str w20, [x8, #-4]!" + ] + }, + "pop es": { + "ExpectedInstructionCount": 7, + "Comment": "0x07", + "ExpectedArm64ASM": [ + "ldr w20, [x8]", + "add x8, x8, #0x4 (4)", + "strh w20, [x28, #136]", + "ubfx w20, w20, #3, #13", + "add x0, x28, x20, lsl #2", + "ldr w20, [x0, #896]", + "str w20, [x28, #152]" + ] + }, + "push cs": { + "ExpectedInstructionCount": 2, + "Comment": "0x0e", + "ExpectedArm64ASM": [ + "ldrh w20, [x28, #138]", + "str w20, [x8, #-4]!" + ] + }, + "push ss": { + "ExpectedInstructionCount": 2, + "Comment": "0x16", + "ExpectedArm64ASM": [ + "ldrh w20, [x28, #140]", + "str w20, [x8, #-4]!" + ] + }, + "pop ss": { + "ExpectedInstructionCount": 7, + "Comment": "0x17", + "ExpectedArm64ASM": [ + "ldr w20, [x8]", + "add x8, x8, #0x4 (4)", + "strh w20, [x28, #140]", + "ubfx w20, w20, #3, #13", + "add x0, x28, x20, lsl #2", + "ldr w20, [x0, #896]", + "str w20, [x28, #160]" + ] + }, + "push ds": { + "ExpectedInstructionCount": 2, + "Comment": "0x1e", + "ExpectedArm64ASM": [ + "ldrh w20, [x28, #142]", + "str w20, [x8, #-4]!" + ] + }, + "pop ds": { + "ExpectedInstructionCount": 7, + "Comment": "0x1f", + "ExpectedArm64ASM": [ + "ldr w20, [x8]", + "add x8, x8, #0x4 (4)", + "strh w20, [x28, #142]", + "ubfx w20, w20, #3, #13", + "add x0, x28, x20, lsl #2", + "ldr w20, [x0, #896]", + "str w20, [x28, #164]" + ] + }, + "daa": { + "ExpectedInstructionCount": 49, + "Comment": "0x27", + "ExpectedArm64ASM": [ + "mov w20, #0x0", + "cset w21, hs", + "eor w22, w27, w26", + "ubfx w22, w22, #4, #1", + "uxtb w23, w4", + "rmif x20, #63, #nzCv", + "and x20, x23, #0xf", + "mrs x24, nzcv", + "cmp x20, #0x9 (9)", + "cset x20, hi", + "orr x20, x22, x20", + "msr nzcv, x24", + "cbnz x20, #+0xc", + "mov w27, #0x0", + "b #+0x1c", + "add x20, x23, #0x6 (6)", + "bfxil w4, w20, #0, #8", + "cset w20, hs", + "orr x20, x21, x20", + "rmif x20, #63, #nzCv", + "mov w27, #0x10", + "mrs x20, nzcv", + "cmp x23, #0x99 (153)", + "cset x22, hi", + "orr x21, x21, x22", + "msr nzcv, x20", + "cbnz x21, #+0x10", + "mov w20, #0x0", + "rmif x20, #63, #nzCv", + "b #+0x18", + "uxtb w20, w4", + "add x20, x20, #0x60 (96)", + "bfxil w4, w20, #0, #8", + "mov w20, #0x1", + "rmif x20, #63, #nzCv", + "uxtb w26, w4", + "and x20, x26, #0x80", + "mrs x21, nzcv", + "cmp x20, #0x0 (0)", + "cset x20, hs", + "msr nzcv, x21", + "rmif x20, #61, #Nzcv", + "and x20, x26, #0xff", + "mrs x21, nzcv", + "cmp x20, #0x0 (0)", + "cset x20, eq", + "msr nzcv, x21", + "rmif x20, #62, #nZcv", + "eor w27, w27, w26" + ] + }, + "das": { + "ExpectedInstructionCount": 49, + "Comment": "0x2f", + "ExpectedArm64ASM": [ + "mov w20, #0x0", + "cset w21, hs", + "eor w22, w27, w26", + "ubfx w22, w22, #4, #1", + "uxtb w23, w4", + "rmif x20, #63, #nzCv", + "and x20, x23, #0xf", + "mrs x24, nzcv", + "cmp x20, #0x9 (9)", + "cset x20, hi", + "orr x20, x22, x20", + "msr nzcv, x24", + "cbnz x20, #+0xc", + "mov w27, #0x0", + "b #+0x1c", + "sub x20, x23, #0x6 (6)", + "bfxil w4, w20, #0, #8", + "cset w20, hs", + "orr x20, x21, x20", + "rmif x20, #63, #nzCv", + "mov w27, #0x10", + "mrs x20, nzcv", + "cmp x23, #0x99 (153)", + "cset x22, hi", + "orr x21, x21, x22", + "msr nzcv, x20", + "cbnz x21, #+0x10", + "mov w20, #0x0", + "rmif x20, #63, #nzCv", + "b #+0x18", + "uxtb w20, w4", + "sub x20, x20, #0x60 (96)", + "bfxil w4, w20, #0, #8", + "mov w20, #0x1", + "rmif x20, #63, #nzCv", + "uxtb w26, w4", + "and x20, x26, #0x80", + "mrs x21, nzcv", + "cmp x20, #0x0 (0)", + "cset x20, hs", + "msr nzcv, x21", + "rmif x20, #61, #Nzcv", + "and x20, x26, #0xff", + "mrs x21, nzcv", + "cmp x20, #0x0 (0)", + "cset x20, eq", + "msr nzcv, x21", + "rmif x20, #62, #nZcv", + "eor w27, w27, w26" + ] + }, + "aaa": { + "ExpectedInstructionCount": 24, + "Comment": "0x37", + "ExpectedArm64ASM": [ + "eor w20, w27, w26", + "ubfx w20, w20, #4, #1", + "uxtb w21, w4", + "uxth w22, w4", + "and x21, x21, #0xf", + "mrs x23, nzcv", + "cmp x21, #0x9 (9)", + "cset x21, hi", + "orr x20, x20, x21", + "msr nzcv, x23", + "cbnz x20, #+0x1c", + "mov w20, #0xff0f", + "and x20, x22, x20", + "bfxil w4, w20, #0, #16", + "mov w27, #0x0", + "msr nzcv, x27", + "b #+0x20", + "add x20, x22, #0x106 (262)", + "mov w21, #0xff0f", + "and x20, x20, x21", + "bfxil w4, w20, #0, #16", + "mov w20, #0x20000000", + "mov w27, #0x10", + "msr nzcv, x20" + ] + }, + "aas": { + "ExpectedInstructionCount": 25, + "Comment": "0x3f", + "ExpectedArm64ASM": [ + "eor w20, w27, w26", + "ubfx w20, w20, #4, #1", + "uxtb w21, w4", + "uxth w22, w4", + "and x21, x21, #0xf", + "mrs x23, nzcv", + "cmp x21, #0x9 (9)", + "cset x21, hi", + "orr x20, x20, x21", + "msr nzcv, x23", + "cbnz x20, #+0x1c", + "mov w20, #0xff0f", + "and x20, x22, x20", + "bfxil w4, w20, #0, #16", + "mov w27, #0x0", + "msr nzcv, x27", + "b #+0x24", + "sub x20, x22, #0x6 (6)", + "sub x20, x20, #0x100 (256)", + "mov w21, #0xff0f", + "and x20, x20, x21", + "bfxil w4, w20, #0, #16", + "mov w20, #0x20000000", + "mov w27, #0x10", + "msr nzcv, x20" + ] + }, + "inc ax": { + "ExpectedInstructionCount": 6, + "Comment": "0x40", + "ExpectedArm64ASM": [ + "uxth w27, w4", + "add w26, w27, #0x1 (1)", + "setf16 w26", + "bic w20, w26, w27", + "rmif x20, #15, #nzcV", + "bfxil w4, w26, #0, #16" + ] + }, + "inc eax": { + "ExpectedInstructionCount": 5, + "Comment": "0x40", + "ExpectedArm64ASM": [ + "mov w27, w4", + "cset w20, hs", + "adds w26, w27, #0x1 (1)", + "rmif x20, #63, #nzCv", + "mov w4, w26" + ] + }, + "dec ax": { + "ExpectedInstructionCount": 6, + "Comment": "0x48", + "ExpectedArm64ASM": [ + "uxth w27, w4", + "sub w26, w27, #0x1 (1)", + "setf16 w26", + "bic w20, w27, w26", + "rmif x20, #15, #nzcV", + "bfxil w4, w26, #0, #16" + ] + }, + "push ax": { + "ExpectedInstructionCount": 1, + "Comment": "0x50", + "ExpectedArm64ASM": [ + "strh w4, [x8, #-2]!" + ] + }, + "push eax": { + "ExpectedInstructionCount": 1, + "Comment": "0x50", + "ExpectedArm64ASM": [ + "str w4, [x8, #-4]!" + ] + }, + "dec eax": { + "ExpectedInstructionCount": 5, + "Comment": "0x48", + "ExpectedArm64ASM": [ + "mov w27, w4", + "cset w20, hs", + "subs w26, w27, #0x1 (1)", + "rmif x20, #63, #nzCv", + "mov w4, w26" + ] + }, + "pusha": { + "ExpectedInstructionCount": 10, + "Comment": "0x60", + "ExpectedArm64ASM": [ + "mov w20, w8", + "str w4, [x20, #-4]!", + "str w5, [x20, #-4]!", + "str w6, [x20, #-4]!", + "str w7, [x20, #-4]!", + "str w8, [x20, #-4]!", + "str w9, [x20, #-4]!", + "str w10, [x20, #-4]!", + "mov w8, w20", + "str w11, [x8, #-4]!" + ] + }, + "pushad": { + "ExpectedInstructionCount": 10, + "Comment": "0x60", + "ExpectedArm64ASM": [ + "mov w20, w8", + "str w4, [x20, #-4]!", + "str w5, [x20, #-4]!", + "str w6, [x20, #-4]!", + "str w7, [x20, #-4]!", + "str w8, [x20, #-4]!", + "str w9, [x20, #-4]!", + "str w10, [x20, #-4]!", + "mov w8, w20", + "str w11, [x8, #-4]!" + ] + }, + "popa": { + "ExpectedInstructionCount": 14, + "Comment": "0x61", + "ExpectedArm64ASM": [ + "ldr w11, [x8]", + "add x20, x8, #0x4 (4)", + "ldr w10, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w9, [x20]", + "add x20, x20, #0x8 (8)", + "ldr w7, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w6, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w5, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w4, [x20]", + "add x8, x20, #0x4 (4)" + ] + }, + "popad": { + "ExpectedInstructionCount": 14, + "Comment": "0x61", + "ExpectedArm64ASM": [ + "ldr w11, [x8]", + "add x20, x8, #0x4 (4)", + "ldr w10, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w9, [x20]", + "add x20, x20, #0x8 (8)", + "ldr w7, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w6, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w5, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w4, [x20]", + "add x8, x20, #0x4 (4)" + ] + }, + "aam": { + "ExpectedInstructionCount": 10, + "Comment": "0xd4", + "ExpectedArm64ASM": [ + "uxtb w20, w4", + "mov w21, #0xa", + "udiv x22, x20, x21", + "udiv x2, x20, x21", + "msub x20, x2, x21, x20", + "lsl x21, x22, #8", + "add x20, x21, x20", + "bfxil w4, w20, #0, #16", + "uxtb w26, w4", + "cmn wzr, w26, lsl #24" + ] + }, + "aad": { + "ExpectedInstructionCount": 10, + "Comment": "0xd5", + "ExpectedArm64ASM": [ + "uxtb w20, w4", + "uxth w21, w4", + "lsr w21, w21, #8", + "mov w22, #0xa", + "mul x21, x21, x22", + "add x20, x20, x21", + "and x20, x20, #0xff", + "bfxil w4, w20, #0, #16", + "uxtb w26, w4", + "cmn wzr, w26, lsl #24" + ] + }, + "db 0xd4, 0x40": { + "ExpectedInstructionCount": 10, + "Comment": [ + "aam with a different immediate byte base", + "0xd4" + ], + "ExpectedArm64ASM": [ + "uxtb w20, w4", + "mov w21, #0x40", + "udiv x22, x20, x21", + "udiv x2, x20, x21", + "msub x20, x2, x21, x20", + "lsl x21, x22, #8", + "add x20, x21, x20", + "bfxil w4, w20, #0, #16", + "uxtb w26, w4", + "cmn wzr, w26, lsl #24" + ] + }, + "db 0xd5, 0x40": { + "ExpectedInstructionCount": 9, + "Comment": [ + "aad with a different immediate byte base", + "0xd5" + ], + "ExpectedArm64ASM": [ + "uxtb w20, w4", + "uxth w21, w4", + "lsr w21, w21, #8", + "lsl x21, x21, #6", + "add x20, x20, x21", + "and x20, x20, #0xff", + "bfxil w4, w20, #0, #16", + "uxtb w26, w4", + "cmn wzr, w26, lsl #24" + ] + }, + "salc": { + "ExpectedInstructionCount": 2, + "Comment": "0xd6", + "ExpectedArm64ASM": [ + "csetm w20, hs", + "bfxil w4, w20, #0, #8" + ] + } + } +} From 693d86dd675aab51a9a55a58b57afffdb111032c Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 25 Mar 2024 12:59:19 -0400 Subject: [PATCH 02/10] OpcodeDispatcher: add SetAFAndFixup helper Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 1 + .../Interface/Core/OpcodeDispatcher/Flags.cpp | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 167ecb2a21..360bdf323b 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1764,6 +1764,7 @@ friend class FEXCore::IR::PassManager; OrderedNode *LoadPFRaw(); OrderedNode *LoadAF(); void FixupAF(); + void SetAFAndFixup(OrderedNode *AF); void CalculatePF(OrderedNode *Res); void CalculateAF(OrderedNode *Src1, OrderedNode *Src2); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp index 67e51966d5..b8aa8371fa 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp @@ -277,6 +277,19 @@ void OpDispatchBuilder::FixupAF() { SetRFLAG(XorRes); } +void OpDispatchBuilder::SetAFAndFixup(OrderedNode *AF) { + // We have a value of AF, we shift into AF[4]. We need to fixup AF[4] so that + // we get the right value when we XOR in PF[4] later. The easiest solution is + // to XOR by PF[4], since: + // + // (AF[4] ^ PF[4]) ^ PF[4] = AF[4] + + auto PFRaw = GetRFLAG(FEXCore::X86State::RFLAG_PF_RAW_LOC); + + OrderedNode *XorRes = _XorShift(OpSize::i32Bit, PFRaw, AF, ShiftType::LSL, 4); + SetRFLAG(XorRes); +} + void OpDispatchBuilder::CalculatePF(OrderedNode *Res) { // Calculation is entirely deferred until load, just store the 8-bit result. SetRFLAG(Res); From 949717a95f0b23a906c8ef2aa2735700c156070f Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 25 Mar 2024 13:00:02 -0400 Subject: [PATCH 03/10] OpcodeDispatcher: rewrite DAA implementation Based on https://www.righto.com/2023/01/ New implementation is branchless, which is theoretically easier to RA. It's also massively simpler which is good for a demon opcode. Signed-off-by: Alyssa Rosenzweig --- .../Interface/Core/OpcodeDispatcher.cpp | 81 +++++-------------- 1 file changed, 18 insertions(+), 63 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 0a7cca0d28..cc8c364b23 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -3119,77 +3119,32 @@ void OpDispatchBuilder::PopcountOp(OpcodeArgs) { void OpDispatchBuilder::DAAOp(OpcodeArgs) { CalculateDeferredFlags(); + auto AL = LoadGPRRegister(X86State::REG_RAX, 1); auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); auto AF = LoadAF(); - auto AL = LoadGPRRegister(X86State::REG_RAX, 1); - SetRFLAG(_Constant(0)); - CalculateDeferredFlags(); + // AF |= ((AL & 0x0F) > 9); + AF = _Or(OpSize::i64Bit, AF, + _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xF)), _Constant(9), + _Constant(1), _Constant(0))); - auto Cond = _Or(OpSize::i64Bit, AF, _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xF)), _Constant(9), _Constant(1), _Constant(0))); - auto FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); - auto TrueBlock = CreateNewCodeBlockAfter(FalseBlock); - auto EndBlock = CreateNewCodeBlockAfter(TrueBlock); + // CF |= (AL > 0x99); + CF = _Or(OpSize::i64Bit, CF, _Select(FEXCore::IR::COND_UGT, AL, _Constant(0x99), _Constant(1), _Constant(0))); - CalculateDeferredFlags(); - CondJump(Cond, TrueBlock, FalseBlock); - SetCurrentCodeBlock(FalseBlock); - StartNewBlock(); - { - SetAF(0); - Jump(EndBlock); - } - SetCurrentCodeBlock(TrueBlock); - StartNewBlock(); - { - auto NewAL = _Add(OpSize::i64Bit, AL, _Constant(0x6)); - StoreGPRRegister(X86State::REG_RAX, NewAL, 1); - CalculateDeferredFlags(); - auto NewCF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); - // XXX: I don't think this is correct. Needs Investigation. - // The `CF` variable is the original CF from the start of the operation - // The `NewCF` will be _Constant(0) stored aboved. - // So Or(CF, _Constant(0)) ill mean CF gets updated to the old value in the true case? - SetRFLAG(_Or(OpSize::i64Bit, CF, NewCF)); - SetAF(1); - CalculateDeferredFlags(); - Jump(EndBlock); - } - SetCurrentCodeBlock(EndBlock); - StartNewBlock(); + // AL = AF ? (AL + 0x6) : AL; + AL = _Select(FEXCore::IR::COND_NEQ, AF, _Constant(0), + _Add(OpSize::i64Bit, AL, _Constant(0x6)), AL); - Cond = _Or(OpSize::i64Bit, CF, _Select(FEXCore::IR::COND_UGT, AL, _Constant(0x99), _Constant(1), _Constant(0))); - FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); - TrueBlock = CreateNewCodeBlockAfter(FalseBlock); - EndBlock = CreateNewCodeBlockAfter(TrueBlock); - CondJump(Cond, TrueBlock, FalseBlock); - SetCurrentCodeBlock(FalseBlock); - StartNewBlock(); - { - SetRFLAG(_Constant(0)); - CalculateDeferredFlags(); - Jump(EndBlock); - } - SetCurrentCodeBlock(TrueBlock); - StartNewBlock(); - { - AL = LoadGPRRegister(X86State::REG_RAX, 1); + // AL = CF ? (AL + 0x60) : AL; + AL = _Select(FEXCore::IR::COND_NEQ, CF, _Constant(0), + _Add(OpSize::i64Bit, AL, _Constant(0x60)), AL); - auto NewAL = _Add(OpSize::i64Bit, AL, _Constant(0x60)); - StoreGPRRegister(X86State::REG_RAX, NewAL, 1); - SetRFLAG(_Constant(1)); - CalculateDeferredFlags(); - Jump(EndBlock); - } - SetCurrentCodeBlock(EndBlock); - StartNewBlock(); - // Update Flags - AL = LoadGPRRegister(X86State::REG_RAX, 1); - - SetRFLAG(_Select(FEXCore::IR::COND_UGE, _And(OpSize::i64Bit, AL, _Constant(0x80)), _Constant(0), _Constant(1), _Constant(0))); - SetRFLAG(_Select(FEXCore::IR::COND_EQ, _And(OpSize::i64Bit, AL, _Constant(0xFF)), _Constant(0), _Constant(1), _Constant(0))); + // SF, ZF, PF set according to result. CF set per above. OF undefined. + StoreGPRRegister(X86State::REG_RAX, AL, 1); + SetNZ_ZeroCV(1, AL); + SetRFLAG(CF); CalculatePF(AL); - FixupAF(); + SetAFAndFixup(AF); } void OpDispatchBuilder::DASOp(OpcodeArgs) { From 3ca2c4377f97a84eaf30565e227268a7c1ad3058 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 25 Mar 2024 13:37:13 -0400 Subject: [PATCH 04/10] OpcodeDispatcher: rewrite AAA Signed-off-by: Alyssa Rosenzweig --- .../Interface/Core/OpcodeDispatcher.cpp | 48 +++++++------------ 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index cc8c364b23..ca5de861e8 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -3224,40 +3224,26 @@ void OpDispatchBuilder::AAAOp(OpcodeArgs) { InvalidateDeferredFlags(); auto AF = LoadAF(); - auto AL = LoadGPRRegister(X86State::REG_RAX, 1); - auto AX = LoadGPRRegister(X86State::REG_RAX, 2); - auto Cond = _Or(OpSize::i64Bit, AF, _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xF)), _Constant(9), _Constant(1), _Constant(0))); + auto A = LoadGPRRegister(X86State::REG_RAX); - auto FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); - auto TrueBlock = CreateNewCodeBlockAfter(FalseBlock); - auto EndBlock = CreateNewCodeBlockAfter(TrueBlock); - CondJump(Cond, TrueBlock, FalseBlock); + // AF |= ((AL & 0x0F) > 9); + AF = _Or(OpSize::i32Bit, AF, + _Select(FEXCore::IR::COND_UGT, _And(OpSize::i32Bit, A, _Constant(0xF)), _Constant(9), + _Constant(1), _Constant(0))); - SetCurrentCodeBlock(FalseBlock); - StartNewBlock(); - { - auto NewAX = _And(OpSize::i64Bit, AX, _Constant(0xFF0F)); - StoreGPRRegister(X86State::REG_RAX, NewAX, 2); - ZeroNZCV(); - SetAF(0); - CalculateDeferredFlags(); - Jump(EndBlock); - } - SetCurrentCodeBlock(TrueBlock); - StartNewBlock(); + // CF = AF, OF/SF/ZF/PF undefined + ZeroNZCV(); + SetRFLAG(AF); + SetAFAndFixup(AF); + CalculateDeferredFlags(); - { - auto NewAX = _Add(OpSize::i64Bit, AX, _Constant(0x106)); - auto Result = _And(OpSize::i64Bit, NewAX, _Constant(0xFF0F)); - StoreGPRRegister(X86State::REG_RAX, Result, 2); - ZeroNZCV(); - SetRFLAG(_Constant(1)); - SetAF(1); - CalculateDeferredFlags(); - Jump(EndBlock); - } - SetCurrentCodeBlock(EndBlock); - StartNewBlock(); + // AX = CF ? (AX + 0x106) : 0 + A = _NZCVSelect(OpSize::i32Bit, CondClassType{COND_UGE} /* CF = 1 */, + _Add(OpSize::i32Bit, A, _Constant(0x106)), A); + + // AL = AL & 0x0F + A = _And(OpSize::i32Bit, A, _Constant(0xFF0F)); + StoreGPRRegister(X86State::REG_RAX, A, 2); } void OpDispatchBuilder::AASOp(OpcodeArgs) { From 583d4f8f941a824d94aa6c932822eb650f0efdf3 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 25 Mar 2024 14:07:53 -0400 Subject: [PATCH 05/10] OpcodeDispatcher: factor out CalculateAFForDecimal Signed-off-by: Alyssa Rosenzweig --- .../Interface/Core/OpcodeDispatcher.cpp | 22 +++++++++---------- .../Source/Interface/Core/OpcodeDispatcher.h | 1 + 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index ca5de861e8..a670c7404b 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -3117,16 +3117,19 @@ void OpDispatchBuilder::PopcountOp(OpcodeArgs) { GenerateFlags_POPCOUNT(Op, Src); } +OrderedNode *OpDispatchBuilder::CalculateAFForDecimal(OrderedNode *A) { + auto Nibble = _And(OpSize::i64Bit, A, _Constant(0xF)); + auto Greater = _Select(FEXCore::IR::COND_UGT, Nibble, _Constant(9), + _Constant(1), _Constant(0)); + + return _Or(OpSize::i64Bit, LoadAF(), Greater); +} + void OpDispatchBuilder::DAAOp(OpcodeArgs) { CalculateDeferredFlags(); auto AL = LoadGPRRegister(X86State::REG_RAX, 1); auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); - auto AF = LoadAF(); - - // AF |= ((AL & 0x0F) > 9); - AF = _Or(OpSize::i64Bit, AF, - _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xF)), _Constant(9), - _Constant(1), _Constant(0))); + auto AF = CalculateAFForDecimal(AL); // CF |= (AL > 0x99); CF = _Or(OpSize::i64Bit, CF, _Select(FEXCore::IR::COND_UGT, AL, _Constant(0x99), _Constant(1), _Constant(0))); @@ -3223,13 +3226,8 @@ void OpDispatchBuilder::DASOp(OpcodeArgs) { void OpDispatchBuilder::AAAOp(OpcodeArgs) { InvalidateDeferredFlags(); - auto AF = LoadAF(); auto A = LoadGPRRegister(X86State::REG_RAX); - - // AF |= ((AL & 0x0F) > 9); - AF = _Or(OpSize::i32Bit, AF, - _Select(FEXCore::IR::COND_UGT, _And(OpSize::i32Bit, A, _Constant(0xF)), _Constant(9), - _Constant(1), _Constant(0))); + auto AF = CalculateAFForDecimal(A); // CF = AF, OF/SF/ZF/PF undefined ZeroNZCV(); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 360bdf323b..d9f8fb08b4 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1765,6 +1765,7 @@ friend class FEXCore::IR::PassManager; OrderedNode *LoadAF(); void FixupAF(); void SetAFAndFixup(OrderedNode *AF); + OrderedNode *CalculateAFForDecimal(OrderedNode *A); void CalculatePF(OrderedNode *Res); void CalculateAF(OrderedNode *Src1, OrderedNode *Src2); From 2bf880c43ae2e59f3728b589e3babd7d0cbd14e2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 25 Mar 2024 14:12:48 -0400 Subject: [PATCH 06/10] OpcodeDispatcher: rewrite AAS Signed-off-by: Alyssa Rosenzweig --- .../Interface/Core/OpcodeDispatcher.cpp | 47 ++++++------------- 1 file changed, 14 insertions(+), 33 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index a670c7404b..3639b30c4f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -3247,41 +3247,22 @@ void OpDispatchBuilder::AAAOp(OpcodeArgs) { void OpDispatchBuilder::AASOp(OpcodeArgs) { InvalidateDeferredFlags(); - auto AF = LoadAF(); - auto AL = LoadGPRRegister(X86State::REG_RAX, 1); - auto AX = LoadGPRRegister(X86State::REG_RAX, 2); - auto Cond = _Or(OpSize::i64Bit, AF, _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xF)), _Constant(9), _Constant(1), _Constant(0))); + auto A = LoadGPRRegister(X86State::REG_RAX); + auto AF = CalculateAFForDecimal(A); - auto FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); - auto TrueBlock = CreateNewCodeBlockAfter(FalseBlock); - auto EndBlock = CreateNewCodeBlockAfter(TrueBlock); - CondJump(Cond, TrueBlock, FalseBlock); + // CF = AF, OF/SF/ZF/PF undefined + ZeroNZCV(); + SetRFLAG(AF); + SetAFAndFixup(AF); + CalculateDeferredFlags(); - SetCurrentCodeBlock(FalseBlock); - StartNewBlock(); - { - auto NewAX = _And(OpSize::i64Bit, AX, _Constant(0xFF0F)); - StoreGPRRegister(X86State::REG_RAX, NewAX, 2); - ZeroNZCV(); - SetAF(0); - CalculateDeferredFlags(); - Jump(EndBlock); - } - SetCurrentCodeBlock(TrueBlock); - StartNewBlock(); - { - auto NewAX = _Sub(OpSize::i64Bit, AX, _Constant(6)); - NewAX = _Sub(OpSize::i64Bit, NewAX, _Constant(0x100)); - auto Result = _And(OpSize::i64Bit, NewAX, _Constant(0xFF0F)); - StoreGPRRegister(X86State::REG_RAX, Result, 2); - ZeroNZCV(); - SetRFLAG(_Constant(1)); - SetAF(1); - CalculateDeferredFlags(); - Jump(EndBlock); - } - SetCurrentCodeBlock(EndBlock); - StartNewBlock(); + // AX = CF ? (AX - 0x106) : 0 + A = _NZCVSelect(OpSize::i32Bit, CondClassType{COND_UGE} /* CF = 1 */, + _Sub(OpSize::i32Bit, A, _Constant(0x106)), A); + + // AL = AL & 0x0F + A = _And(OpSize::i32Bit, A, _Constant(0xFF0F)); + StoreGPRRegister(X86State::REG_RAX, A, 2); } void OpDispatchBuilder::AAMOp(OpcodeArgs) { From 86b5a2f35208a7835c0505ada9d12886ac2f94e4 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 25 Mar 2024 17:02:45 -0400 Subject: [PATCH 07/10] OpcodeDispatcher: simplify AAD noticed in the area. Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 3639b30c4f..2aba081c70 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -3286,17 +3286,15 @@ void OpDispatchBuilder::AAMOp(OpcodeArgs) { void OpDispatchBuilder::AADOp(OpcodeArgs) { InvalidateDeferredFlags(); - auto AL = LoadGPRRegister(X86State::REG_RAX, 1); - auto AH = _Lshr(OpSize::i32Bit, LoadGPRRegister(X86State::REG_RAX, 2), _Constant(8)); + auto A = LoadGPRRegister(X86State::REG_RAX); + auto AH = _Lshr(OpSize::i32Bit, A, _Constant(8)); auto Imm8 = _Constant(Op->Src[0].Data.Literal.Value & 0xFF); - auto NewAL = _Add(OpSize::i64Bit, AL, _Mul(OpSize::i64Bit, AH, Imm8)); + auto NewAL = _Add(OpSize::i64Bit, A, _Mul(OpSize::i64Bit, AH, Imm8)); auto Result = _And(OpSize::i64Bit, NewAL, _Constant(0xFF)); StoreGPRRegister(X86State::REG_RAX, Result, 2); - // Update Flags - AL = LoadGPRRegister(X86State::REG_RAX, 1); - SetNZ_ZeroCV(1, AL); - CalculatePF(AL); + SetNZ_ZeroCV(1, Result); + CalculatePF(Result); _InvalidateFlags(1u << X86State::RFLAG_AF_RAW_LOC); } From e26481e3ccbaf3d6340c88f36a2b2b0b8541c2b8 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 25 Mar 2024 17:10:34 -0400 Subject: [PATCH 08/10] OpcodeDispatcher: simplify AAM in the area. Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 2aba081c70..183cd5121d 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -3272,14 +3272,11 @@ void OpDispatchBuilder::AAMOp(OpcodeArgs) { auto Imm8 = _Constant(Op->Src[0].Data.Literal.Value & 0xFF); auto UDivOp = _UDiv(OpSize::i64Bit, AL, Imm8); auto URemOp = _URem(OpSize::i64Bit, AL, Imm8); - auto AH = _Lshl(OpSize::i64Bit, UDivOp, _Constant(8)); - auto AX = _Add(OpSize::i64Bit, AH, URemOp); - StoreGPRRegister(X86State::REG_RAX, AX, 2); + auto Res = _AddShift(OpSize::i64Bit, URemOp, UDivOp, ShiftType::LSL, 8); + StoreGPRRegister(X86State::REG_RAX, Res, 2); - // Update Flags - AL = LoadGPRRegister(X86State::REG_RAX, 1); - SetNZ_ZeroCV(1, AL); - CalculatePF(AL); + SetNZ_ZeroCV(1, Res); + CalculatePF(Res); _InvalidateFlags(1u << X86State::RFLAG_AF_RAW_LOC); } From dfe0bdd7f2da46f80047bb2011c0270c92513505 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 25 Mar 2024 19:14:08 -0400 Subject: [PATCH 09/10] OpcodeDispatcher: rewrite DAS exhaustively checked against the Intel pseudocode since this is tricky: def intel(AL, CF, AF): old_AL = AL old_CF = CF CF = False if (AL & 0x0F) > 9 or AF: Borrow = AL < 6 AL = (AL - 6) & 0xff CF = old_CF or Borrow AF = True else: AF = False if (old_AL > 0x99) or old_CF: AL = (AL - 0x60) & 0xff CF = True return (AL & 0xff, CF, AF) def fex(AL, CF, AF): AF = AF | ((AL & 0xf) > 9) CF = CF | (AL > 0x99) NewCF = CF | (AF if (AL < 6) else CF) AL = (AL - 6) if AF else AL AL = (AL - 0x60) if CF else AL return (AL & 0xff, NewCF, AF) for AL in range(256): for CF in [False, True]: for AF in [False, True]: ref = intel(AL, CF, AF) test = fex(AL, CF, AF) print(AL, "CF" if CF else "", "AF" if AF else "", ref, test) assert(ref == test) Signed-off-by: Alyssa Rosenzweig --- .../Interface/Core/OpcodeDispatcher.cpp | 81 +++++-------------- 1 file changed, 18 insertions(+), 63 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 183cd5121d..bcd40404ab 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -3152,75 +3152,30 @@ void OpDispatchBuilder::DAAOp(OpcodeArgs) { void OpDispatchBuilder::DASOp(OpcodeArgs) { CalculateDeferredFlags(); - auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); - auto AF = LoadAF(); auto AL = LoadGPRRegister(X86State::REG_RAX, 1); + auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); + auto AF = CalculateAFForDecimal(AL); - SetRFLAG(_Constant(0)); - CalculateDeferredFlags(); + // CF |= (AL > 0x99); + CF = _Or(OpSize::i64Bit, CF, _Select(FEXCore::IR::COND_UGT, AL, _Constant(0x99), _Constant(1), _Constant(0))); - auto Cond = _Or(OpSize::i64Bit, AF, _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xf)), _Constant(9), _Constant(1), _Constant(0))); - auto FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); - auto TrueBlock = CreateNewCodeBlockAfter(FalseBlock); - auto EndBlock = CreateNewCodeBlockAfter(TrueBlock); + // NewCF = CF | (AF && (Borrow from AL - 6)) + auto NewCF = _Or(OpSize::i32Bit, CF, _Select(FEXCore::IR::COND_ULT, AL, _Constant(6), AF, CF)); - CalculateDeferredFlags(); - CondJump(Cond, TrueBlock, FalseBlock); - SetCurrentCodeBlock(FalseBlock); - StartNewBlock(); - { - SetAF(0); - Jump(EndBlock); - } - SetCurrentCodeBlock(TrueBlock); - StartNewBlock(); - { - auto NewAL = _Sub(OpSize::i64Bit, AL, _Constant(0x6)); - StoreGPRRegister(X86State::REG_RAX, NewAL, 1); - CalculateDeferredFlags(); - auto NewCF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); - // XXX: I don't think this is correct. Needs Investigation. - // The `CF` variable is the original CF from the start of the operation - // The `NewCF` will be _Constant(0) stored aboved. - // So Or(CF, _Constant(0)) ill mean CF gets updated to the old value in the true case? - SetRFLAG(_Or(OpSize::i64Bit, CF, NewCF)); - SetAF(1); - CalculateDeferredFlags(); - Jump(EndBlock); - } - SetCurrentCodeBlock(EndBlock); - StartNewBlock(); + // AL = AF ? (AL - 0x6) : AL; + AL = _Select(FEXCore::IR::COND_NEQ, AF, _Constant(0), + _Sub(OpSize::i64Bit, AL, _Constant(0x6)), AL); - Cond = _Or(OpSize::i64Bit, CF, _Select(FEXCore::IR::COND_UGT, AL, _Constant(0x99), _Constant(1), _Constant(0))); - FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); - TrueBlock = CreateNewCodeBlockAfter(FalseBlock); - EndBlock = CreateNewCodeBlockAfter(TrueBlock); - CondJump(Cond, TrueBlock, FalseBlock); - SetCurrentCodeBlock(FalseBlock); - StartNewBlock(); - { - SetRFLAG(_Constant(0)); - CalculateDeferredFlags(); - Jump(EndBlock); - } - SetCurrentCodeBlock(TrueBlock); - StartNewBlock(); - { - AL = LoadGPRRegister(X86State::REG_RAX, 1); - auto NewAL = _Sub(OpSize::i64Bit, AL, _Constant(0x60)); - StoreGPRRegister(X86State::REG_RAX, NewAL, 1); - SetRFLAG(_Constant(1)); - CalculateDeferredFlags(); - Jump(EndBlock); - } - SetCurrentCodeBlock(EndBlock); - StartNewBlock(); - // Update Flags - AL = LoadGPRRegister(X86State::REG_RAX, 1); - SetRFLAG(_Select(FEXCore::IR::COND_UGE, _And(OpSize::i64Bit, AL, _Constant(0x80)), _Constant(0), _Constant(1), _Constant(0))); - SetRFLAG(_Select(FEXCore::IR::COND_EQ, _And(OpSize::i64Bit, AL, _Constant(0xFF)), _Constant(0), _Constant(1), _Constant(0))); + // AL = CF ? (AL - 0x60) : AL; + AL = _Select(FEXCore::IR::COND_NEQ, CF, _Constant(0), + _Sub(OpSize::i64Bit, AL, _Constant(0x60)), AL); + + // SF, ZF, PF set according to result. CF set per above. OF undefined. + StoreGPRRegister(X86State::REG_RAX, AL, 1); + SetNZ_ZeroCV(1, AL); + SetRFLAG(NewCF); CalculatePF(AL); - FixupAF(); + SetAFAndFixup(AF); } void OpDispatchBuilder::AAAOp(OpcodeArgs) { From bbc232741b39a442c24b65531501f2c32802b9da Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 25 Mar 2024 19:43:20 -0400 Subject: [PATCH 10/10] InstCountCI: Update Signed-off-by: Alyssa Rosenzweig --- .../FlagM/Primary_32Bit.json | 264 ++++++----------- .../InstructionCountCI/Primary_32Bit.json | 274 ++++++------------ 2 files changed, 180 insertions(+), 358 deletions(-) diff --git a/unittests/InstructionCountCI/FlagM/Primary_32Bit.json b/unittests/InstructionCountCI/FlagM/Primary_32Bit.json index 3f7306ad09..4c0f7dc6ce 100644 --- a/unittests/InstructionCountCI/FlagM/Primary_32Bit.json +++ b/unittests/InstructionCountCI/FlagM/Primary_32Bit.json @@ -83,174 +83,100 @@ ] }, "daa": { - "ExpectedInstructionCount": 49, + "ExpectedInstructionCount": 21, "Comment": "0x27", "ExpectedArm64ASM": [ - "mov w20, #0x0", + "uxtb w20, w4", "cset w21, hs", - "eor w22, w27, w26", - "ubfx w22, w22, #4, #1", - "uxtb w23, w4", - "rmif x20, #63, #nzCv", - "and x20, x23, #0xf", - "mrs x24, nzcv", - "cmp x20, #0x9 (9)", - "cset x20, hi", - "orr x20, x22, x20", - "msr nzcv, x24", - "cbnz x20, #+0xc", - "mov w27, #0x0", - "b #+0x1c", - "add x20, x23, #0x6 (6)", - "bfxil w4, w20, #0, #8", - "cset w20, hs", - "orr x20, x21, x20", - "rmif x20, #63, #nzCv", - "mov w27, #0x10", - "mrs x20, nzcv", - "cmp x23, #0x99 (153)", + "and x22, x20, #0xf", + "cmp x22, #0x9 (9)", "cset x22, hi", - "orr x21, x21, x22", - "msr nzcv, x20", - "cbnz x21, #+0x10", - "mov w20, #0x0", - "rmif x20, #63, #nzCv", - "b #+0x18", - "uxtb w20, w4", - "add x20, x20, #0x60 (96)", - "bfxil w4, w20, #0, #8", - "mov w20, #0x1", - "rmif x20, #63, #nzCv", - "uxtb w26, w4", - "and x20, x26, #0x80", - "mrs x21, nzcv", - "cmp x20, #0x0 (0)", - "cset x20, hs", - "msr nzcv, x21", - "rmif x20, #61, #Nzcv", - "and x20, x26, #0xff", - "mrs x21, nzcv", - "cmp x20, #0x0 (0)", - "cset x20, eq", - "msr nzcv, x21", - "rmif x20, #62, #nZcv", - "eor w27, w27, w26" + "eor w23, w27, w26", + "ubfx w23, w23, #4, #1", + "orr x22, x23, x22", + "cmp x20, #0x99 (153)", + "cset x23, hi", + "orr x21, x21, x23", + "add x23, x20, #0x6 (6)", + "cmp x22, #0x0 (0)", + "csel x20, x23, x20, ne", + "add x23, x20, #0x60 (96)", + "cmp x21, #0x0 (0)", + "csel x26, x23, x20, ne", + "bfxil w4, w26, #0, #8", + "cmn wzr, w26, lsl #24", + "rmif x21, #63, #nzCv", + "eor w27, w26, w22, lsl #4" ] }, "das": { - "ExpectedInstructionCount": 49, + "ExpectedInstructionCount": 24, "Comment": "0x2f", "ExpectedArm64ASM": [ - "mov w20, #0x0", + "uxtb w20, w4", "cset w21, hs", - "eor w22, w27, w26", - "ubfx w22, w22, #4, #1", - "uxtb w23, w4", - "rmif x20, #63, #nzCv", - "and x20, x23, #0xf", - "mrs x24, nzcv", - "cmp x20, #0x9 (9)", - "cset x20, hi", - "orr x20, x22, x20", - "msr nzcv, x24", - "cbnz x20, #+0xc", - "mov w27, #0x0", - "b #+0x1c", - "sub x20, x23, #0x6 (6)", - "bfxil w4, w20, #0, #8", - "cset w20, hs", - "orr x20, x21, x20", - "rmif x20, #63, #nzCv", - "mov w27, #0x10", - "mrs x20, nzcv", - "cmp x23, #0x99 (153)", + "and x22, x20, #0xf", + "cmp x22, #0x9 (9)", "cset x22, hi", - "orr x21, x21, x22", - "msr nzcv, x20", - "cbnz x21, #+0x10", - "mov w20, #0x0", - "rmif x20, #63, #nzCv", - "b #+0x18", - "uxtb w20, w4", - "sub x20, x20, #0x60 (96)", - "bfxil w4, w20, #0, #8", - "mov w20, #0x1", - "rmif x20, #63, #nzCv", - "uxtb w26, w4", - "and x20, x26, #0x80", - "mrs x21, nzcv", - "cmp x20, #0x0 (0)", - "cset x20, hs", - "msr nzcv, x21", - "rmif x20, #61, #Nzcv", - "and x20, x26, #0xff", - "mrs x21, nzcv", - "cmp x20, #0x0 (0)", - "cset x20, eq", - "msr nzcv, x21", - "rmif x20, #62, #nZcv", - "eor w27, w27, w26" + "eor w23, w27, w26", + "ubfx w23, w23, #4, #1", + "orr x22, x23, x22", + "cmp x20, #0x99 (153)", + "cset x23, hi", + "orr x21, x21, x23", + "cmp x20, #0x6 (6)", + "csel x23, x22, x21, lo", + "orr w23, w21, w23", + "sub x24, x20, #0x6 (6)", + "cmp x22, #0x0 (0)", + "csel x20, x24, x20, ne", + "sub x24, x20, #0x60 (96)", + "cmp x21, #0x0 (0)", + "csel x26, x24, x20, ne", + "bfxil w4, w26, #0, #8", + "cmn wzr, w26, lsl #24", + "rmif x23, #63, #nzCv", + "eor w27, w26, w22, lsl #4" ] }, "aaa": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 14, "Comment": "0x37", "ExpectedArm64ASM": [ - "eor w20, w27, w26", - "ubfx w20, w20, #4, #1", - "uxtb w21, w4", - "uxth w22, w4", - "and x21, x21, #0xf", - "mrs x23, nzcv", - "cmp x21, #0x9 (9)", - "cset x21, hi", - "orr x20, x20, x21", - "msr nzcv, x23", - "cbnz x20, #+0x1c", - "mov w20, #0xff0f", - "and x20, x22, x20", - "bfxil w4, w20, #0, #16", - "mov w27, #0x0", - "msr nzcv, x27", - "b #+0x20", - "add x20, x22, #0x106 (262)", + "and x20, x4, #0xf", + "cmp x20, #0x9 (9)", + "cset x20, hi", + "eor w21, w27, w26", + "ubfx w21, w21, #4, #1", + "orr x20, x21, x20", + "lsl x21, x20, #29", + "eor w27, w26, w20, lsl #4", + "msr nzcv, x21", + "add w20, w4, #0x106 (262)", + "csel w20, w20, w4, hs", "mov w21, #0xff0f", - "and x20, x20, x21", - "bfxil w4, w20, #0, #16", - "mov w20, #0x20000000", - "mov w27, #0x10", - "msr nzcv, x20" + "and w20, w20, w21", + "bfxil w4, w20, #0, #16" ] }, "aas": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 14, "Comment": "0x3f", "ExpectedArm64ASM": [ - "eor w20, w27, w26", - "ubfx w20, w20, #4, #1", - "uxtb w21, w4", - "uxth w22, w4", - "and x21, x21, #0xf", - "mrs x23, nzcv", - "cmp x21, #0x9 (9)", - "cset x21, hi", - "orr x20, x20, x21", - "msr nzcv, x23", - "cbnz x20, #+0x1c", - "mov w20, #0xff0f", - "and x20, x22, x20", - "bfxil w4, w20, #0, #16", - "mov w27, #0x0", - "msr nzcv, x27", - "b #+0x24", - "sub x20, x22, #0x6 (6)", - "sub x20, x20, #0x100 (256)", + "and x20, x4, #0xf", + "cmp x20, #0x9 (9)", + "cset x20, hi", + "eor w21, w27, w26", + "ubfx w21, w21, #4, #1", + "orr x20, x21, x20", + "lsl x21, x20, #29", + "eor w27, w26, w20, lsl #4", + "msr nzcv, x21", + "sub w20, w4, #0x106 (262)", + "csel w20, w20, w4, hs", "mov w21, #0xff0f", - "and x20, x20, x21", - "bfxil w4, w20, #0, #16", - "mov w20, #0x20000000", - "mov w27, #0x10", - "msr nzcv, x20" + "and w20, w20, w21", + "bfxil w4, w20, #0, #16" ] }, "inc ax": { @@ -386,7 +312,7 @@ ] }, "aam": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 8, "Comment": "0xd4", "ExpectedArm64ASM": [ "uxtb w20, w4", @@ -394,31 +320,26 @@ "udiv x22, x20, x21", "udiv x2, x20, x21", "msub x20, x2, x21, x20", - "lsl x21, x22, #8", - "add x20, x21, x20", - "bfxil w4, w20, #0, #16", - "uxtb w26, w4", + "add x26, x20, x22, lsl #8", + "bfxil w4, w26, #0, #16", "cmn wzr, w26, lsl #24" ] }, "aad": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 7, "Comment": "0xd5", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxth w21, w4", - "lsr w21, w21, #8", - "mov w22, #0xa", - "mul x21, x21, x22", - "add x20, x20, x21", - "and x20, x20, #0xff", - "bfxil w4, w20, #0, #16", - "uxtb w26, w4", + "lsr w20, w4, #8", + "mov w21, #0xa", + "mul x20, x20, x21", + "add x20, x4, x20", + "and x26, x20, #0xff", + "bfxil w4, w26, #0, #16", "cmn wzr, w26, lsl #24" ] }, "db 0xd4, 0x40": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 8, "Comment": [ "aam with a different immediate byte base", "0xd4" @@ -429,28 +350,23 @@ "udiv x22, x20, x21", "udiv x2, x20, x21", "msub x20, x2, x21, x20", - "lsl x21, x22, #8", - "add x20, x21, x20", - "bfxil w4, w20, #0, #16", - "uxtb w26, w4", + "add x26, x20, x22, lsl #8", + "bfxil w4, w26, #0, #16", "cmn wzr, w26, lsl #24" ] }, "db 0xd5, 0x40": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 6, "Comment": [ "aad with a different immediate byte base", "0xd5" ], "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxth w21, w4", - "lsr w21, w21, #8", - "lsl x21, x21, #6", - "add x20, x20, x21", - "and x20, x20, #0xff", - "bfxil w4, w20, #0, #16", - "uxtb w26, w4", + "lsr w20, w4, #8", + "lsl x20, x20, #6", + "add x20, x4, x20", + "and x26, x20, #0xff", + "bfxil w4, w26, #0, #16", "cmn wzr, w26, lsl #24" ] }, diff --git a/unittests/InstructionCountCI/Primary_32Bit.json b/unittests/InstructionCountCI/Primary_32Bit.json index eaeb0f592a..cf8b6df87f 100644 --- a/unittests/InstructionCountCI/Primary_32Bit.json +++ b/unittests/InstructionCountCI/Primary_32Bit.json @@ -82,188 +82,104 @@ ] }, "daa": { - "ExpectedInstructionCount": 56, + "ExpectedInstructionCount": 23, "Comment": "0x27", "ExpectedArm64ASM": [ - "cset w20, hs", - "eor w21, w27, w26", - "ubfx w21, w21, #4, #1", - "uxtb w22, w4", - "mrs x23, nzcv", - "and w23, w23, #0xdfffffff", - "msr nzcv, x23", - "and x23, x22, #0xf", - "mrs x24, nzcv", - "cmp x23, #0x9 (9)", - "cset x23, hi", - "orr x21, x21, x23", - "msr nzcv, x24", - "cbnz x21, #+0xc", - "mov w27, #0x0", - "b #+0x2c", - "add x21, x22, #0x6 (6)", - "bfxil w4, w21, #0, #8", + "uxtb w20, w4", "cset w21, hs", - "orr x21, x20, x21", - "mrs x23, nzcv", - "mov w0, w23", - "bfi w0, w21, #29, #1", - "mov w21, w0", - "mov w27, #0x10", - "msr nzcv, x21", - "mrs x21, nzcv", - "cmp x22, #0x99 (153)", + "and x22, x20, #0xf", + "cmp x22, #0x9 (9)", "cset x22, hi", - "orr x20, x20, x22", - "msr nzcv, x21", - "cbnz x20, #+0x14", - "mrs x20, nzcv", - "and w20, w20, #0xdfffffff", - "msr nzcv, x20", - "b #+0x1c", - "uxtb w20, w4", - "add x20, x20, #0x60 (96)", - "bfxil w4, w20, #0, #8", - "mrs x20, nzcv", - "orr w20, w20, #0x20000000", - "msr nzcv, x20", - "uxtb w26, w4", - "and x20, x26, #0x80", - "mrs x21, nzcv", - "cmp x20, #0x0 (0)", - "cset x20, hs", - "mov w0, w21", - "bfi w0, w20, #31, #1", - "mov w20, w0", - "and x21, x26, #0xff", + "eor w23, w27, w26", + "ubfx w23, w23, #4, #1", + "orr x22, x23, x22", + "cmp x20, #0x99 (153)", + "cset x23, hi", + "orr x21, x21, x23", + "add x23, x20, #0x6 (6)", + "cmp x22, #0x0 (0)", + "csel x20, x23, x20, ne", + "add x23, x20, #0x60 (96)", "cmp x21, #0x0 (0)", - "cset x21, eq", - "bfi w20, w21, #30, #1", - "eor w27, w27, w26", + "csel x26, x23, x20, ne", + "bfxil w4, w26, #0, #8", + "cmn wzr, w26, lsl #24", + "mrs x20, nzcv", + "orr w20, w20, w21, lsl #29", + "eor w27, w26, w22, lsl #4", "msr nzcv, x20" ] }, "das": { - "ExpectedInstructionCount": 56, + "ExpectedInstructionCount": 26, "Comment": "0x2f", "ExpectedArm64ASM": [ - "cset w20, hs", - "eor w21, w27, w26", - "ubfx w21, w21, #4, #1", - "uxtb w22, w4", - "mrs x23, nzcv", - "and w23, w23, #0xdfffffff", - "msr nzcv, x23", - "and x23, x22, #0xf", - "mrs x24, nzcv", - "cmp x23, #0x9 (9)", - "cset x23, hi", - "orr x21, x21, x23", - "msr nzcv, x24", - "cbnz x21, #+0xc", - "mov w27, #0x0", - "b #+0x2c", - "sub x21, x22, #0x6 (6)", - "bfxil w4, w21, #0, #8", + "uxtb w20, w4", "cset w21, hs", - "orr x21, x20, x21", - "mrs x23, nzcv", - "mov w0, w23", - "bfi w0, w21, #29, #1", - "mov w21, w0", - "mov w27, #0x10", - "msr nzcv, x21", - "mrs x21, nzcv", - "cmp x22, #0x99 (153)", + "and x22, x20, #0xf", + "cmp x22, #0x9 (9)", "cset x22, hi", - "orr x20, x20, x22", - "msr nzcv, x21", - "cbnz x20, #+0x14", - "mrs x20, nzcv", - "and w20, w20, #0xdfffffff", - "msr nzcv, x20", - "b #+0x1c", - "uxtb w20, w4", - "sub x20, x20, #0x60 (96)", - "bfxil w4, w20, #0, #8", - "mrs x20, nzcv", - "orr w20, w20, #0x20000000", - "msr nzcv, x20", - "uxtb w26, w4", - "and x20, x26, #0x80", - "mrs x21, nzcv", - "cmp x20, #0x0 (0)", - "cset x20, hs", - "mov w0, w21", - "bfi w0, w20, #31, #1", - "mov w20, w0", - "and x21, x26, #0xff", + "eor w23, w27, w26", + "ubfx w23, w23, #4, #1", + "orr x22, x23, x22", + "cmp x20, #0x99 (153)", + "cset x23, hi", + "orr x21, x21, x23", + "cmp x20, #0x6 (6)", + "csel x23, x22, x21, lo", + "orr w23, w21, w23", + "sub x24, x20, #0x6 (6)", + "cmp x22, #0x0 (0)", + "csel x20, x24, x20, ne", + "sub x24, x20, #0x60 (96)", "cmp x21, #0x0 (0)", - "cset x21, eq", - "bfi w20, w21, #30, #1", - "eor w27, w27, w26", + "csel x26, x24, x20, ne", + "bfxil w4, w26, #0, #8", + "cmn wzr, w26, lsl #24", + "mrs x20, nzcv", + "orr w20, w20, w23, lsl #29", + "eor w27, w26, w22, lsl #4", "msr nzcv, x20" ] }, "aaa": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 14, "Comment": "0x37", "ExpectedArm64ASM": [ - "eor w20, w27, w26", - "ubfx w20, w20, #4, #1", - "uxtb w21, w4", - "uxth w22, w4", - "and x21, x21, #0xf", - "mrs x23, nzcv", - "cmp x21, #0x9 (9)", - "cset x21, hi", - "orr x20, x20, x21", - "msr nzcv, x23", - "cbnz x20, #+0x1c", - "mov w20, #0xff0f", - "and x20, x22, x20", - "bfxil w4, w20, #0, #16", - "mov w27, #0x0", - "msr nzcv, x27", - "b #+0x20", - "add x20, x22, #0x106 (262)", + "and x20, x4, #0xf", + "cmp x20, #0x9 (9)", + "cset x20, hi", + "eor w21, w27, w26", + "ubfx w21, w21, #4, #1", + "orr x20, x21, x20", + "lsl x21, x20, #29", + "eor w27, w26, w20, lsl #4", + "msr nzcv, x21", + "add w20, w4, #0x106 (262)", + "csel w20, w20, w4, hs", "mov w21, #0xff0f", - "and x20, x20, x21", - "bfxil w4, w20, #0, #16", - "mov w20, #0x20000000", - "mov w27, #0x10", - "msr nzcv, x20" + "and w20, w20, w21", + "bfxil w4, w20, #0, #16" ] }, "aas": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 14, "Comment": "0x3f", "ExpectedArm64ASM": [ - "eor w20, w27, w26", - "ubfx w20, w20, #4, #1", - "uxtb w21, w4", - "uxth w22, w4", - "and x21, x21, #0xf", - "mrs x23, nzcv", - "cmp x21, #0x9 (9)", - "cset x21, hi", - "orr x20, x20, x21", - "msr nzcv, x23", - "cbnz x20, #+0x1c", - "mov w20, #0xff0f", - "and x20, x22, x20", - "bfxil w4, w20, #0, #16", - "mov w27, #0x0", - "msr nzcv, x27", - "b #+0x24", - "sub x20, x22, #0x6 (6)", - "sub x20, x20, #0x100 (256)", + "and x20, x4, #0xf", + "cmp x20, #0x9 (9)", + "cset x20, hi", + "eor w21, w27, w26", + "ubfx w21, w21, #4, #1", + "orr x20, x21, x20", + "lsl x21, x20, #29", + "eor w27, w26, w20, lsl #4", + "msr nzcv, x21", + "sub w20, w4, #0x106 (262)", + "csel w20, w20, w4, hs", "mov w21, #0xff0f", - "and x20, x20, x21", - "bfxil w4, w20, #0, #16", - "mov w20, #0x20000000", - "mov w27, #0x10", - "msr nzcv, x20" + "and w20, w20, w21", + "bfxil w4, w20, #0, #16" ] }, "inc ax": { @@ -415,7 +331,7 @@ ] }, "aam": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 8, "Comment": "0xd4", "ExpectedArm64ASM": [ "uxtb w20, w4", @@ -423,31 +339,26 @@ "udiv x22, x20, x21", "udiv x2, x20, x21", "msub x20, x2, x21, x20", - "lsl x21, x22, #8", - "add x20, x21, x20", - "bfxil w4, w20, #0, #16", - "uxtb w26, w4", + "add x26, x20, x22, lsl #8", + "bfxil w4, w26, #0, #16", "cmn wzr, w26, lsl #24" ] }, "aad": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 7, "Comment": "0xd5", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxth w21, w4", - "lsr w21, w21, #8", - "mov w22, #0xa", - "mul x21, x21, x22", - "add x20, x20, x21", - "and x20, x20, #0xff", - "bfxil w4, w20, #0, #16", - "uxtb w26, w4", + "lsr w20, w4, #8", + "mov w21, #0xa", + "mul x20, x20, x21", + "add x20, x4, x20", + "and x26, x20, #0xff", + "bfxil w4, w26, #0, #16", "cmn wzr, w26, lsl #24" ] }, "db 0xd4, 0x40": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 8, "Comment": [ "aam with a different immediate byte base", "0xd4" @@ -458,28 +369,23 @@ "udiv x22, x20, x21", "udiv x2, x20, x21", "msub x20, x2, x21, x20", - "lsl x21, x22, #8", - "add x20, x21, x20", - "bfxil w4, w20, #0, #16", - "uxtb w26, w4", + "add x26, x20, x22, lsl #8", + "bfxil w4, w26, #0, #16", "cmn wzr, w26, lsl #24" ] }, "db 0xd5, 0x40": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 6, "Comment": [ "aad with a different immediate byte base", "0xd5" ], "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxth w21, w4", - "lsr w21, w21, #8", - "lsl x21, x21, #6", - "add x20, x20, x21", - "and x20, x20, #0xff", - "bfxil w4, w20, #0, #16", - "uxtb w26, w4", + "lsr w20, w4, #8", + "lsl x20, x20, #6", + "add x20, x4, x20", + "and x26, x20, #0xff", + "bfxil w4, w26, #0, #16", "cmn wzr, w26, lsl #24" ] },