From 65ec191dc1f8b78b625cfe0eb33222d55ed91f88 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 1 Apr 2024 14:08:43 -0400 Subject: [PATCH 1/4] IR: add XornShift Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp | 10 ++++++++++ FEXCore/Source/Interface/IR/IR.json | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp index 218342ce65..a177442958 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp @@ -769,6 +769,16 @@ DEF_OP(XorShift) { eor(EmitSize, GetReg(Node), GetReg(Op->Src1.ID()), GetReg(Op->Src2.ID()), ConvertIRShiftType(Op->Shift), Op->ShiftAmount); } +DEF_OP(XornShift) { + auto Op = IROp->C(); + const uint8_t OpSize = IROp->Size; + + LOGMAN_THROW_AA_FMT(OpSize == 4 || OpSize == 8, "Unsupported {} size: {}", __func__, OpSize); + const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; + + eon(EmitSize, GetReg(Node), GetReg(Op->Src1.ID()), GetReg(Op->Src2.ID()), ConvertIRShiftType(Op->Shift), Op->ShiftAmount); +} + DEF_OP(Lshl) { auto Op = IROp->C(); const uint8_t OpSize = IROp->Size; diff --git a/FEXCore/Source/Interface/IR/IR.json b/FEXCore/Source/Interface/IR/IR.json index abf9b8586d..a58cb05e04 100644 --- a/FEXCore/Source/Interface/IR/IR.json +++ b/FEXCore/Source/Interface/IR/IR.json @@ -1153,6 +1153,13 @@ "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit" ] }, + "GPR = XornShift OpSize:#Size, GPR:$Src1, GPR:$Src2, ShiftType:$Shift{ShiftType::LSL}, u8:$ShiftAmount{0}": { + "Desc": [ "Integer binary exclusive or not with shifted register"], + "DestSize": "Size", + "EmitValidation": [ + "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit" + ] + }, "GPR = And OpSize:#Size, GPR:$Src1, GPR:$Src2": { "Desc": ["Integer binary and" ], From 3b052e826f20659357aeda2a9688e2a19a7db682 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 1 Apr 2024 13:58:07 -0400 Subject: [PATCH 2/4] OpcodeDispatcher: calculate PF with integer ops based on clang's __builtin_parity Signed-off-by: Alyssa Rosenzweig --- .../Source/Interface/Core/OpcodeDispatcher/Flags.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp index 5c8ebbd6bf..a6b8636844 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp @@ -242,13 +242,12 @@ OrderedNode *OpDispatchBuilder::LoadPFRaw() { // parity calculated. auto Result = GetRFLAG(FEXCore::X86State::RFLAG_PF_RAW_LOC); - // Cast the input to a 32-bit FPR. Logically we only need 8-bit, but that would - // generate unwanted an ubfx instruction. VPopcount will ignore the upper bits anyway. - auto InputFPR = _VCastFromGPR(4, 4, Result); + // Cascade to calculate parity of bottom 8-bits to bottom bit. + Result = _XorShift(OpSize::i32Bit, Result, Result, ShiftType::LSR, 4); + Result = _XorShift(OpSize::i32Bit, Result, Result, ShiftType::LSR, 2); + Result = _XorShift(OpSize::i32Bit, Result, Result, ShiftType::LSR, 1); - // Calculate the popcount. - auto Count = _VPopcount(1, 1, InputFPR); - return _VExtractToGPR(8, 1, Count, 0); + return Result; } OrderedNode *OpDispatchBuilder::LoadAF() { From eb4bb5875e8e9a26a15c6dc31d3d34935edab8b4 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 1 Apr 2024 14:08:58 -0400 Subject: [PATCH 3/4] OpcodeDispatcher: absorb invert into PF calculation with xorn Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 14 +++++--------- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 4 ++-- .../Interface/Core/OpcodeDispatcher/Flags.cpp | 10 +++++++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 3c42d45fa8..2d6a7ff586 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -775,24 +775,20 @@ void OpDispatchBuilder::CALLAbsoluteOp(OpcodeArgs) { _ExitFunction(JMPPCOffset); // If we get here then leave the function now } -OrderedNode *OpDispatchBuilder::SelectBit(OrderedNode *Cmp, bool TrueIsNonzero, IR::OpSize ResultSize, OrderedNode *TrueValue, OrderedNode *FalseValue) { +OrderedNode *OpDispatchBuilder::SelectBit(OrderedNode *Cmp, IR::OpSize ResultSize, OrderedNode *TrueValue, OrderedNode *FalseValue) { uint64_t TrueConst, FalseConst; if (IsValueConstant(WrapNode(TrueValue), &TrueConst) && IsValueConstant(WrapNode(FalseValue), &FalseConst) && TrueConst == 1 && FalseConst == 0) { - if (!TrueIsNonzero) - Cmp = _Not(OpSize::i32Bit, Cmp); - return _And(ResultSize, Cmp, _Constant(1)); } SaveNZCV(); _TestNZ(OpSize::i32Bit, Cmp, _Constant(1)); - return _NZCVSelect(ResultSize, - TrueIsNonzero ? CondClassType{COND_NEQ} : CondClassType{COND_EQ}, - TrueValue, FalseValue); + return _NZCVSelect(ResultSize, CondClassType{COND_NEQ}, + TrueValue, FalseValue); } std::pair OpDispatchBuilder::DecodeNZCVCondition(uint8_t OP) const { @@ -857,10 +853,10 @@ OrderedNode *OpDispatchBuilder::SelectCC(uint8_t OP, IR::OpSize ResultSize, Orde } case 0xA: { // JP - Jump if PF == 1 // Raw value contains inverted PF in bottom bit - return SelectBit(LoadPFRaw(), false, ResultSize, TrueValue, FalseValue); + return SelectBit(LoadPFRaw(true), ResultSize, TrueValue, FalseValue); } case 0xB: { // JNP - Jump if PF == 0 - return SelectBit(LoadPFRaw(), true, ResultSize, TrueValue, FalseValue); + return SelectBit(LoadPFRaw(false), ResultSize, TrueValue, FalseValue); } default: LOGMAN_MSG_A_FMT("Unknown CC Op: 0x{:x}\n", OP); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 552e7b94e0..da14ffe1c1 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1627,7 +1627,7 @@ friend class FEXCore::IR::PassManager; } std::pair DecodeNZCVCondition(uint8_t OP) const; - OrderedNode *SelectBit(OrderedNode *Cmp, bool Invert, IR::OpSize ResultSize, OrderedNode *TrueValue, OrderedNode *FalseValue); + OrderedNode *SelectBit(OrderedNode *Cmp, IR::OpSize ResultSize, OrderedNode *TrueValue, OrderedNode *FalseValue); OrderedNode *SelectCC(uint8_t OP, IR::OpSize ResultSize, OrderedNode *TrueValue, OrderedNode *FalseValue); /** @@ -1761,7 +1761,7 @@ friend class FEXCore::IR::PassManager; /** * @name These functions are used by the deferred flag handling while it is calculating and storing flags in to RFLAGs. * @{ */ - OrderedNode *LoadPFRaw(); + OrderedNode *LoadPFRaw(bool Invert); OrderedNode *LoadAF(); void FixupAF(); void SetAFAndFixup(OrderedNode *AF); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp index a6b8636844..80f6bd1e80 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp @@ -179,7 +179,7 @@ OrderedNode *OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) { // instead. if (FlagsMask & (1 << FEXCore::X86State::RFLAG_PF_RAW_LOC)) { // Set every bit except the bottommost. - auto OnesInvPF = _Or(OpSize::i64Bit, LoadPFRaw(), _Constant(~1ull)); + auto OnesInvPF = _Or(OpSize::i64Bit, LoadPFRaw(false), _Constant(~1ull)); // Rotate the bottom bit to the appropriate location for PF, so we get // something like 111P1111. Then invert that to get 000p0000. Then OR that @@ -237,7 +237,7 @@ void OpDispatchBuilder::CalculateOF(uint8_t SrcSize, OrderedNode *Res, OrderedNo SetRFLAG(Anded, SrcSize * 8 - 1, true); } -OrderedNode *OpDispatchBuilder::LoadPFRaw() { +OrderedNode *OpDispatchBuilder::LoadPFRaw(bool Invert) { // Read the stored byte. This is the original result (up to 64-bits), it needs // parity calculated. auto Result = GetRFLAG(FEXCore::X86State::RFLAG_PF_RAW_LOC); @@ -245,7 +245,11 @@ OrderedNode *OpDispatchBuilder::LoadPFRaw() { // Cascade to calculate parity of bottom 8-bits to bottom bit. Result = _XorShift(OpSize::i32Bit, Result, Result, ShiftType::LSR, 4); Result = _XorShift(OpSize::i32Bit, Result, Result, ShiftType::LSR, 2); - Result = _XorShift(OpSize::i32Bit, Result, Result, ShiftType::LSR, 1); + + if (Invert) + Result = _XornShift(OpSize::i32Bit, Result, Result, ShiftType::LSR, 1); + else + Result = _XorShift(OpSize::i32Bit, Result, Result, ShiftType::LSR, 1); return Result; } From 5c590b9a50eab11c32a5aa1b02d203fef01592ad Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 1 Apr 2024 14:13:09 -0400 Subject: [PATCH 4/4] InstCountCI: Update Signed-off-by: Alyssa Rosenzweig --- .../InstructionCountCI/FlagM/FlagOpts.json | 9 +- .../InstructionCountCI/FlagM/Primary.json | 18 +-- .../InstructionCountCI/FlagM/Secondary.json | 57 +++++---- unittests/InstructionCountCI/FlagM/x87.json | 112 +++++++++--------- .../InstructionCountCI/FlagM/x87_f64.json | 112 +++++++++--------- unittests/InstructionCountCI/Primary.json | 18 +-- unittests/InstructionCountCI/Secondary.json | 57 +++++---- unittests/InstructionCountCI/x87.json | 112 +++++++++--------- unittests/InstructionCountCI/x87_f64.json | 112 +++++++++--------- 9 files changed, 302 insertions(+), 305 deletions(-) diff --git a/unittests/InstructionCountCI/FlagM/FlagOpts.json b/unittests/InstructionCountCI/FlagM/FlagOpts.json index d2c0c60de4..25b414f4b4 100644 --- a/unittests/InstructionCountCI/FlagM/FlagOpts.json +++ b/unittests/InstructionCountCI/FlagM/FlagOpts.json @@ -267,7 +267,7 @@ ] }, "AND use only PF": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 8, "x86Insts": [ "and eax, ebx", "setp cl", @@ -275,10 +275,9 @@ ], "ExpectedArm64ASM": [ "and w4, w4, w7", - "fmov s2, w4", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", - "mvn w20, w20", + "eor w20, w4, w4, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "and x20, x20, #0x1", "bfxil x5, x20, #0, #8", "mov x26, x5", diff --git a/unittests/InstructionCountCI/FlagM/Primary.json b/unittests/InstructionCountCI/FlagM/Primary.json index 631f337dfc..84e5cc0cb8 100644 --- a/unittests/InstructionCountCI/FlagM/Primary.json +++ b/unittests/InstructionCountCI/FlagM/Primary.json @@ -1746,9 +1746,9 @@ "orr x20, x20, x21, lsl #20", "ldrb w21, [x28, #725]", "orr x20, x20, x21, lsl #21", - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w21, v2.b[0]", + "eor w21, w26, w26, lsr #4", + "eor w21, w21, w21, lsr #2", + "eor w21, w21, w21, lsr #1", "orr x21, x21, #0xfffffffffffffffe", "orn x20, x20, x21, ror #62", "mrs x21, nzcv", @@ -1791,9 +1791,9 @@ "orr x20, x20, x21, lsl #20", "ldrb w21, [x28, #725]", "orr x20, x20, x21, lsl #21", - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w21, v2.b[0]", + "eor w21, w26, w26, lsr #4", + "eor w21, w21, w21, lsr #2", + "eor w21, w21, w21, lsr #1", "orr x21, x21, #0xfffffffffffffffe", "orn x20, x20, x21, ror #62", "mrs x21, nzcv", @@ -1866,9 +1866,9 @@ "eor w21, w27, w26", "ubfx w21, w21, #4, #1", "orr x20, x20, x21, lsl #4", - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w21, v2.b[0]", + "eor w21, w26, w26, lsr #4", + "eor w21, w21, w21, lsr #2", + "eor w21, w21, w21, lsr #1", "orr x21, x21, #0xfffffffffffffffe", "orn x20, x20, x21, ror #62", "mrs x21, nzcv", diff --git a/unittests/InstructionCountCI/FlagM/Secondary.json b/unittests/InstructionCountCI/FlagM/Secondary.json index 1982756f11..616b060106 100644 --- a/unittests/InstructionCountCI/FlagM/Secondary.json +++ b/unittests/InstructionCountCI/FlagM/Secondary.json @@ -265,12 +265,12 @@ "ExpectedInstructionCount": 8, "Comment": "0x0f 0x4a", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csel w20, w7, w4, eq", + "csel w20, w7, w4, ne", "bfxil x4, x20, #0, #16", "msr nzcv, x21" ] @@ -279,12 +279,12 @@ "ExpectedInstructionCount": 7, "Comment": "0x0f 0x4a", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csel w4, w7, w4, eq", + "csel w4, w7, w4, ne", "msr nzcv, x21" ] }, @@ -292,12 +292,12 @@ "ExpectedInstructionCount": 7, "Comment": "0x0f 0x4a", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csel x4, x7, x4, eq", + "csel x4, x7, x4, ne", "msr nzcv, x21" ] }, @@ -305,9 +305,9 @@ "ExpectedInstructionCount": 8, "Comment": "0x0f 0x4b", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csel w20, w7, w4, ne", @@ -319,9 +319,9 @@ "ExpectedInstructionCount": 7, "Comment": "0x0f 0x4b", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csel w4, w7, w4, ne", @@ -332,9 +332,9 @@ "ExpectedInstructionCount": 7, "Comment": "0x0f 0x4b", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csel x4, x7, x4, ne", @@ -513,13 +513,12 @@ ] }, "setpe al": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x9a", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", - "mvn w20, w20", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "and x20, x20, #0x1", "bfxil x4, x20, #0, #8" ] @@ -528,9 +527,9 @@ "ExpectedInstructionCount": 5, "Comment": "0x0f 0x9b", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "and x20, x20, #0x1", "bfxil x4, x20, #0, #8" ] diff --git a/unittests/InstructionCountCI/FlagM/x87.json b/unittests/InstructionCountCI/FlagM/x87.json index 5bcbafa0b0..d342b9beb6 100644 --- a/unittests/InstructionCountCI/FlagM/x87.json +++ b/unittests/InstructionCountCI/FlagM/x87.json @@ -6554,12 +6554,12 @@ "0xda 11b 0xd8 /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x0 (0)", @@ -6580,12 +6580,12 @@ "0xda 11b 0xd9 /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x1 (1)", @@ -6606,12 +6606,12 @@ "0xda 11b 0xda /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x2 (2)", @@ -6632,12 +6632,12 @@ "0xda 11b 0xdb /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x3 (3)", @@ -6658,12 +6658,12 @@ "0xda 11b 0xdc /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x4 (4)", @@ -6684,12 +6684,12 @@ "0xda 11b 0xdd /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x5 (5)", @@ -6710,12 +6710,12 @@ "0xda 11b 0xde /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x6 (6)", @@ -6736,12 +6736,12 @@ "0xda 11b 0xdf /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x7 (7)", @@ -7530,9 +7530,9 @@ "0xdb 11b 0xd8 /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7556,9 +7556,9 @@ "0xdb 11b 0xd9 /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7582,9 +7582,9 @@ "0xdb 11b 0xda /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7608,9 +7608,9 @@ "0xdb 11b 0xdb /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7634,9 +7634,9 @@ "0xdb 11b 0xdc /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7660,9 +7660,9 @@ "0xdb 11b 0xdd /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7686,9 +7686,9 @@ "0xdb 11b 0xde /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7712,9 +7712,9 @@ "0xdb 11b 0xdf /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", diff --git a/unittests/InstructionCountCI/FlagM/x87_f64.json b/unittests/InstructionCountCI/FlagM/x87_f64.json index f0257b6dad..5fc98d3472 100644 --- a/unittests/InstructionCountCI/FlagM/x87_f64.json +++ b/unittests/InstructionCountCI/FlagM/x87_f64.json @@ -3868,12 +3868,12 @@ "0xda 11b 0xd8 /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x0 (0)", @@ -3894,12 +3894,12 @@ "0xda 11b 0xd9 /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x1 (1)", @@ -3920,12 +3920,12 @@ "0xda 11b 0xda /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x2 (2)", @@ -3946,12 +3946,12 @@ "0xda 11b 0xdb /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x3 (3)", @@ -3972,12 +3972,12 @@ "0xda 11b 0xdc /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x4 (4)", @@ -3998,12 +3998,12 @@ "0xda 11b 0xdd /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x5 (5)", @@ -4024,12 +4024,12 @@ "0xda 11b 0xde /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x6 (6)", @@ -4050,12 +4050,12 @@ "0xda 11b 0xdf /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x7 (7)", @@ -4774,9 +4774,9 @@ "0xdb 11b 0xd8 /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4800,9 +4800,9 @@ "0xdb 11b 0xd9 /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4826,9 +4826,9 @@ "0xdb 11b 0xda /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4852,9 +4852,9 @@ "0xdb 11b 0xdb /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4878,9 +4878,9 @@ "0xdb 11b 0xdc /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4904,9 +4904,9 @@ "0xdb 11b 0xdd /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4930,9 +4930,9 @@ "0xdb 11b 0xde /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4956,9 +4956,9 @@ "0xdb 11b 0xdf /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", diff --git a/unittests/InstructionCountCI/Primary.json b/unittests/InstructionCountCI/Primary.json index 5ee00256f5..28163392bb 100644 --- a/unittests/InstructionCountCI/Primary.json +++ b/unittests/InstructionCountCI/Primary.json @@ -2607,9 +2607,9 @@ "orr x20, x20, x21, lsl #20", "ldrb w21, [x28, #725]", "orr x20, x20, x21, lsl #21", - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w21, v2.b[0]", + "eor w21, w26, w26, lsr #4", + "eor w21, w21, w21, lsr #2", + "eor w21, w21, w21, lsr #1", "orr x21, x21, #0xfffffffffffffffe", "orn x20, x20, x21, ror #62", "mrs x21, nzcv", @@ -2652,9 +2652,9 @@ "orr x20, x20, x21, lsl #20", "ldrb w21, [x28, #725]", "orr x20, x20, x21, lsl #21", - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w21, v2.b[0]", + "eor w21, w26, w26, lsr #4", + "eor w21, w21, w21, lsr #2", + "eor w21, w21, w21, lsr #1", "orr x21, x21, #0xfffffffffffffffe", "orn x20, x20, x21, ror #62", "mrs x21, nzcv", @@ -2739,9 +2739,9 @@ "eor w21, w27, w26", "ubfx w21, w21, #4, #1", "orr x20, x20, x21, lsl #4", - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w21, v2.b[0]", + "eor w21, w26, w26, lsr #4", + "eor w21, w21, w21, lsr #2", + "eor w21, w21, w21, lsr #1", "orr x21, x21, #0xfffffffffffffffe", "orn x20, x20, x21, ror #62", "mrs x21, nzcv", diff --git a/unittests/InstructionCountCI/Secondary.json b/unittests/InstructionCountCI/Secondary.json index a43162e428..fb9761743f 100644 --- a/unittests/InstructionCountCI/Secondary.json +++ b/unittests/InstructionCountCI/Secondary.json @@ -477,12 +477,12 @@ "ExpectedInstructionCount": 8, "Comment": "0x0f 0x4a", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csel w20, w7, w4, eq", + "csel w20, w7, w4, ne", "bfxil x4, x20, #0, #16", "msr nzcv, x21" ] @@ -491,12 +491,12 @@ "ExpectedInstructionCount": 7, "Comment": "0x0f 0x4a", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csel w4, w7, w4, eq", + "csel w4, w7, w4, ne", "msr nzcv, x21" ] }, @@ -504,12 +504,12 @@ "ExpectedInstructionCount": 7, "Comment": "0x0f 0x4a", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csel x4, x7, x4, eq", + "csel x4, x7, x4, ne", "msr nzcv, x21" ] }, @@ -517,9 +517,9 @@ "ExpectedInstructionCount": 8, "Comment": "0x0f 0x4b", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csel w20, w7, w4, ne", @@ -531,9 +531,9 @@ "ExpectedInstructionCount": 7, "Comment": "0x0f 0x4b", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csel w4, w7, w4, ne", @@ -544,9 +544,9 @@ "ExpectedInstructionCount": 7, "Comment": "0x0f 0x4b", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csel x4, x7, x4, ne", @@ -1234,13 +1234,12 @@ ] }, "setpe al": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x9a", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", - "mvn w20, w20", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "and x20, x20, #0x1", "bfxil x4, x20, #0, #8" ] @@ -1249,9 +1248,9 @@ "ExpectedInstructionCount": 5, "Comment": "0x0f 0x9b", "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "and x20, x20, #0x1", "bfxil x4, x20, #0, #8" ] diff --git a/unittests/InstructionCountCI/x87.json b/unittests/InstructionCountCI/x87.json index c6f1e8f31b..a5a69a3ef4 100644 --- a/unittests/InstructionCountCI/x87.json +++ b/unittests/InstructionCountCI/x87.json @@ -6553,12 +6553,12 @@ "0xda 11b 0xd8 /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x0 (0)", @@ -6579,12 +6579,12 @@ "0xda 11b 0xd9 /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x1 (1)", @@ -6605,12 +6605,12 @@ "0xda 11b 0xda /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x2 (2)", @@ -6631,12 +6631,12 @@ "0xda 11b 0xdb /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x3 (3)", @@ -6657,12 +6657,12 @@ "0xda 11b 0xdc /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x4 (4)", @@ -6683,12 +6683,12 @@ "0xda 11b 0xdd /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x5 (5)", @@ -6709,12 +6709,12 @@ "0xda 11b 0xde /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x6 (6)", @@ -6735,12 +6735,12 @@ "0xda 11b 0xdf /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x7 (7)", @@ -7529,9 +7529,9 @@ "0xdb 11b 0xd8 /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7555,9 +7555,9 @@ "0xdb 11b 0xd9 /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7581,9 +7581,9 @@ "0xdb 11b 0xda /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7607,9 +7607,9 @@ "0xdb 11b 0xdb /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7633,9 +7633,9 @@ "0xdb 11b 0xdc /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7659,9 +7659,9 @@ "0xdb 11b 0xdd /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7685,9 +7685,9 @@ "0xdb 11b 0xde /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -7711,9 +7711,9 @@ "0xdb 11b 0xdf /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", diff --git a/unittests/InstructionCountCI/x87_f64.json b/unittests/InstructionCountCI/x87_f64.json index ea97056ab9..d2fe133ac2 100644 --- a/unittests/InstructionCountCI/x87_f64.json +++ b/unittests/InstructionCountCI/x87_f64.json @@ -3888,12 +3888,12 @@ "0xda 11b 0xd8 /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x0 (0)", @@ -3914,12 +3914,12 @@ "0xda 11b 0xd9 /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x1 (1)", @@ -3940,12 +3940,12 @@ "0xda 11b 0xda /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x2 (2)", @@ -3966,12 +3966,12 @@ "0xda 11b 0xdb /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x3 (3)", @@ -3992,12 +3992,12 @@ "0xda 11b 0xdc /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x4 (4)", @@ -4018,12 +4018,12 @@ "0xda 11b 0xdd /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x5 (5)", @@ -4044,12 +4044,12 @@ "0xda 11b 0xde /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x6 (6)", @@ -4070,12 +4070,12 @@ "0xda 11b 0xdf /1" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eon w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", - "csetm x20, eq", + "csetm x20, ne", "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w22, w20, #0x7 (7)", @@ -4795,9 +4795,9 @@ "0xdb 11b 0xd8 /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4821,9 +4821,9 @@ "0xdb 11b 0xd9 /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4847,9 +4847,9 @@ "0xdb 11b 0xda /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4873,9 +4873,9 @@ "0xdb 11b 0xdb /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4899,9 +4899,9 @@ "0xdb 11b 0xdc /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4925,9 +4925,9 @@ "0xdb 11b 0xdd /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4951,9 +4951,9 @@ "0xdb 11b 0xde /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne", @@ -4977,9 +4977,9 @@ "0xdb 11b 0xdf /3" ], "ExpectedArm64ASM": [ - "fmov s2, w26", - "cnt v2.16b, v2.16b", - "umov w20, v2.b[0]", + "eor w20, w26, w26, lsr #4", + "eor w20, w20, w20, lsr #2", + "eor w20, w20, w20, lsr #1", "mrs x21, nzcv", "tst w20, #0x1", "csetm x20, ne",