From 98c5607a0d8186fa8c8eb4492be44c460fa9fe54 Mon Sep 17 00:00:00 2001 From: Lioncache Date: Tue, 9 Apr 2024 02:21:55 -0400 Subject: [PATCH] OpcodeDispatcher: Make use of new x87 constants Now we can load these directly instead of needing to manually materialize them. --- .../Interface/Core/OpcodeDispatcher.cpp | 14 ++-- .../Source/Interface/Core/OpcodeDispatcher.h | 2 +- .../Interface/Core/OpcodeDispatcher/X87.cpp | 32 +++----- .../InstructionCountCI/Crypto/H0F3A.json | 4 +- .../InstructionCountCI/FlagM/Secondary.json | 2 +- .../FlagM/Secondary_OpSize.json | 2 +- unittests/InstructionCountCI/FlagM/x87.json | 77 +++++-------------- unittests/InstructionCountCI/H0F38.json | 2 +- unittests/InstructionCountCI/H0F3A.json | 14 ++-- .../InstructionCountCI/PrimaryGroup.json | 8 +- .../InstructionCountCI/Secondary_OpSize.json | 8 +- .../InstructionCountCI/Secondary_REP.json | 2 +- .../InstructionCountCI/Secondary_REPNE.json | 4 +- unittests/InstructionCountCI/VEX_map1.json | 12 +-- unittests/InstructionCountCI/VEX_map2.json | 2 +- unittests/InstructionCountCI/VEX_map3.json | 4 +- unittests/InstructionCountCI/x87.json | 77 +++++-------------- 17 files changed, 88 insertions(+), 178 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 9282b91444..ee21b23841 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -6665,13 +6665,13 @@ constexpr uint16_t PF_F2 = 3; {OPD(0xD9, 0xE4), 1, &OpDispatchBuilder::FTST}, {OPD(0xD9, 0xE5), 1, &OpDispatchBuilder::X87FXAM}, // E6 = Invalid - {OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::FLD_Const<0x8000'0000'0000'0000, 0b0'011'1111'1111'1111>}, // 1.0 - {OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::FLD_Const<0xD49A'784B'CD1B'8AFE, 0x4000>}, // log2l(10) - {OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::FLD_Const<0xB8AA'3B29'5C17'F0BC, 0x3FFF>}, // log2l(e) - {OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::FLD_Const<0xC90F'DAA2'2168'C235, 0x4000>}, // pi - {OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::FLD_Const<0x9A20'9A84'FBCF'F799, 0x3FFD>}, // log10l(2) - {OPD(0xD9, 0xED), 1, &OpDispatchBuilder::FLD_Const<0xB172'17F7'D1CF'79AC, 0x3FFE>}, // log(2) - {OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::FLD_Const<0, 0>}, // 0.0 + {OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::FLD_Const}, // 1.0 + {OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::FLD_Const}, // log2l(10) + {OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::FLD_Const}, // log2l(e) + {OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::FLD_Const}, // pi + {OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::FLD_Const}, // log10l(2) + {OPD(0xD9, 0xED), 1, &OpDispatchBuilder::FLD_Const}, // log(2) + {OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::FLD_Const}, // 0.0 // EF = Invalid {OPD(0xD9, 0xF0), 1, &OpDispatchBuilder::X87UnaryOp}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 7812dd1477..e734a44898 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -703,7 +703,7 @@ friend class FEXCore::IR::PassManager; OrderedNode *ReconstructX87StateFromFSW(OrderedNode *FSW); template void FLD(OpcodeArgs); - template + template void FLD_Const(OpcodeArgs); void FBLD(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp index 39a0871416..3c7662f345 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp @@ -196,7 +196,7 @@ void OpDispatchBuilder::FBSTP(OpcodeArgs) { SetX87Top(top); } -template +template void OpDispatchBuilder::FLD_Const(OpcodeArgs) { // Update TOP auto orig_top = GetX87Top(); @@ -204,28 +204,26 @@ void OpDispatchBuilder::FLD_Const(OpcodeArgs) { SetX87ValidTag(top, true); SetX87Top(top); - auto low = _Constant(Lower); - auto high = _Constant(Upper); - OrderedNode *data = _VCastFromGPR(16, 8, low); - data = _VInsGPR(16, 8, 1, data, high); + OrderedNode *data = LoadAndCacheNamedVectorConstant(16, constant); + // Write to ST[TOP] _StoreContextIndexed(data, top, 16, MMBaseOffset(), 16, FPRClass); } template -void OpDispatchBuilder::FLD_Const<0x8000'0000'0000'0000ULL, 0b0'011'1111'1111'1111ULL>(OpcodeArgs); // 1.0 +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // 1.0 template -void OpDispatchBuilder::FLD_Const<0xD49A'784B'CD1B'8AFEULL, 0x4000ULL>(OpcodeArgs); // log2l(10) +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // log2l(10) template -void OpDispatchBuilder::FLD_Const<0xB8AA'3B29'5C17'F0BCULL, 0x3FFFULL>(OpcodeArgs); // log2l(e) +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // log2l(e) template -void OpDispatchBuilder::FLD_Const<0xC90F'DAA2'2168'C235ULL, 0x4000ULL>(OpcodeArgs); // pi +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // pi template -void OpDispatchBuilder::FLD_Const<0x9A20'9A84'FBCF'F799ULL, 0x3FFDULL>(OpcodeArgs); // log10l(2) +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // log10l(2) template -void OpDispatchBuilder::FLD_Const<0xB172'17F7'D1CF'79ACULL, 0x3FFEULL>(OpcodeArgs); // log(2) +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // log(2) template -void OpDispatchBuilder::FLD_Const<0, 0>(OpcodeArgs); // 0.0 +void OpDispatchBuilder::FLD_Const(OpcodeArgs); // 0.0 void OpDispatchBuilder::FILD(OpcodeArgs) { // Update TOP @@ -958,10 +956,7 @@ void OpDispatchBuilder::X87FYL2X(OpcodeArgs) { OrderedNode *st1 = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); if (Plus1) { - auto low = _Constant(0x8000'0000'0000'0000ULL); - auto high = _Constant(0b0'011'1111'1111'1111); - OrderedNode *data = _VCastFromGPR(16, 8, low); - data = _VInsGPR(16, 8, 1, data, high); + OrderedNode *data = LoadAndCacheNamedVectorConstant(16, NamedVectorConstant::NAMED_VECTOR_X87_ONE); st0 = _F80Add(st0, data); } @@ -981,10 +976,7 @@ void OpDispatchBuilder::X87TAN(OpcodeArgs) { auto result = _F80TAN(a); - auto low = _Constant(0x8000'0000'0000'0000ULL); - auto high = _Constant(0b0'011'1111'1111'1111ULL); - OrderedNode *data = _VCastFromGPR(16, 8, low); - data = _VInsGPR(16, 8, 1, data, high); + OrderedNode *data = LoadAndCacheNamedVectorConstant(16, NamedVectorConstant::NAMED_VECTOR_X87_ONE); // TODO: ACCURACY: should check source is in range –2^63 to +2^63 SetRFLAG(_Constant(0)); diff --git a/unittests/InstructionCountCI/Crypto/H0F3A.json b/unittests/InstructionCountCI/Crypto/H0F3A.json index 238782658e..f427a56faa 100644 --- a/unittests/InstructionCountCI/Crypto/H0F3A.json +++ b/unittests/InstructionCountCI/Crypto/H0F3A.json @@ -55,7 +55,7 @@ "0x66 0x0f 0x3a 0xdf" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2112]", + "ldr q2, [x28, #2160]", "movi v3.2d, #0x0", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", @@ -68,7 +68,7 @@ "0x66 0x0f 0x3a 0xdf" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2112]", + "ldr q2, [x28, #2160]", "movi v3.2d, #0x0", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", diff --git a/unittests/InstructionCountCI/FlagM/Secondary.json b/unittests/InstructionCountCI/FlagM/Secondary.json index 232921e309..4084ceb643 100644 --- a/unittests/InstructionCountCI/FlagM/Secondary.json +++ b/unittests/InstructionCountCI/FlagM/Secondary.json @@ -1612,7 +1612,7 @@ "Comment": "0x0f 0xd7", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ldr d3, [x28, #2224]", + "ldr d3, [x28, #2272]", "cmlt v2.16b, v2.16b, #0", "and v2.16b, v2.16b, v3.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json b/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json index ce9df6ffec..24169c7eae 100644 --- a/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json +++ b/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json @@ -38,7 +38,7 @@ "ExpectedInstructionCount": 7, "Comment": "0x66 0x0f 0xd7", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2224]", + "ldr q2, [x28, #2272]", "cmlt v3.16b, v16.16b, #0", "and v2.16b, v3.16b, v2.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/FlagM/x87.json b/unittests/InstructionCountCI/FlagM/x87.json index d342b9beb6..2ae33f3b84 100644 --- a/unittests/InstructionCountCI/FlagM/x87.json +++ b/unittests/InstructionCountCI/FlagM/x87.json @@ -4502,7 +4502,7 @@ ] }, "fld1": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xe8 /5" ], @@ -4516,16 +4516,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0x8000000000000000", - "mov w22, #0x3fff", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2304]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldl2t": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xe9 /5" ], @@ -4539,19 +4536,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0x8afe", - "movk x21, #0xcd1b, lsl #16", - "movk x21, #0x784b, lsl #32", - "movk x21, #0xd49a, lsl #48", - "mov w22, #0x4000", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2320]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldl2e": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xea /5" ], @@ -4565,19 +4556,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0xf0bc", - "movk x21, #0x5c17, lsl #16", - "movk x21, #0x3b29, lsl #32", - "movk x21, #0xb8aa, lsl #48", - "mov w22, #0x3fff", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2336]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldpi": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xeb /5" ], @@ -4591,19 +4576,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0xc235", - "movk x21, #0x2168, lsl #16", - "movk x21, #0xdaa2, lsl #32", - "movk x21, #0xc90f, lsl #48", - "mov w22, #0x4000", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2352]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldlg2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xec /5" ], @@ -4617,19 +4596,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0xf799", - "movk x21, #0xfbcf, lsl #16", - "movk x21, #0x9a84, lsl #32", - "movk x21, #0x9a20, lsl #48", - "mov w22, #0x3ffd", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2368]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldln2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xed /5" ], @@ -4643,19 +4616,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0x79ac", - "movk x21, #0xd1cf, lsl #16", - "movk x21, #0x17f7, lsl #32", - "movk x21, #0xb172, lsl #48", - "mov w22, #0x3ffe", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2384]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldz": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xee /5" ], @@ -4669,9 +4636,7 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov w21, #0x0", - "fmov d2, x21", - "mov v2.d[1], x21", + "movi v2.2d, #0x0", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] @@ -4771,7 +4736,7 @@ ] }, "fptan": { - "ExpectedInstructionCount": 49, + "ExpectedInstructionCount": 46, "Comment": [ "0xd9 11b 0xf2 /6" ], @@ -4815,10 +4780,7 @@ "eor v2.16b, v2.16b, v2.16b", "mov v2.d[0], x0", "mov v2.h[4], w1", - "mov x21, #0x8000000000000000", - "mov w23, #0x3fff", - "fmov d3, x21", - "mov v3.d[1], x23", + "ldr q3, [x28, #2304]", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", @@ -5082,7 +5044,7 @@ ] }, "fyl2xp1": { - "ExpectedInstructionCount": 79, + "ExpectedInstructionCount": 76, "Comment": [ "0xd9 11b 0xf9 /7" ], @@ -5100,10 +5062,7 @@ "ldr q2, [x0, #768]", "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "mov x20, #0x8000000000000000", - "mov w22, #0x3fff", - "fmov d4, x20", - "mov v4.d[1], x22", + "ldr q4, [x28, #2304]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", diff --git a/unittests/InstructionCountCI/H0F38.json b/unittests/InstructionCountCI/H0F38.json index 65009d764d..2fb8062559 100644 --- a/unittests/InstructionCountCI/H0F38.json +++ b/unittests/InstructionCountCI/H0F38.json @@ -624,7 +624,7 @@ "0x66 0x0f 0x38 0x41" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2000]", + "ldr q2, [x28, #2048]", "zip1 v3.8h, v2.8h, v17.8h", "zip2 v2.8h, v2.8h, v17.8h", "umin v2.4s, v3.4s, v2.4s", diff --git a/unittests/InstructionCountCI/H0F3A.json b/unittests/InstructionCountCI/H0F3A.json index 6e720a5854..47ebb158bd 100644 --- a/unittests/InstructionCountCI/H0F3A.json +++ b/unittests/InstructionCountCI/H0F3A.json @@ -315,7 +315,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2128]", + "ldr q2, [x28, #2176]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -325,7 +325,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2144]", + "ldr q2, [x28, #2192]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -344,7 +344,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2160]", + "ldr q2, [x28, #2208]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -364,7 +364,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2176]", + "ldr q2, [x28, #2224]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -383,7 +383,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2192]", + "ldr q2, [x28, #2240]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -393,7 +393,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2208]", + "ldr q2, [x28, #2256]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -462,7 +462,7 @@ "0x66 0x0f 0x3a 0x0e" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1736]", + "ldr x0, [x28, #1784]", "ldr q2, [x0, #3440]", "tbx v16.16b, {v17.16b}, v2.16b" ] diff --git a/unittests/InstructionCountCI/PrimaryGroup.json b/unittests/InstructionCountCI/PrimaryGroup.json index bc6ada6e86..750f446f03 100644 --- a/unittests/InstructionCountCI/PrimaryGroup.json +++ b/unittests/InstructionCountCI/PrimaryGroup.json @@ -2868,7 +2868,7 @@ "mov x0, x6", "mov x1, x20", "mov x2, x7", - "ldr x3, [x28, #2288]", + "ldr x3, [x28, #2432]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", @@ -2879,7 +2879,7 @@ "mov x0, x6", "mov x1, x20", "mov x2, x7", - "ldr x3, [x28, #2304]", + "ldr x3, [x28, #2448]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", @@ -2940,7 +2940,7 @@ "mov x0, x6", "mov x1, x20", "mov x2, x7", - "ldr x3, [x28, #2296]", + "ldr x3, [x28, #2440]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", @@ -2953,7 +2953,7 @@ "mov x0, x6", "mov x1, x20", "mov x2, x7", - "ldr x3, [x28, #2312]", + "ldr x3, [x28, #2456]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", diff --git a/unittests/InstructionCountCI/Secondary_OpSize.json b/unittests/InstructionCountCI/Secondary_OpSize.json index f60bf46136..a2eac2612b 100644 --- a/unittests/InstructionCountCI/Secondary_OpSize.json +++ b/unittests/InstructionCountCI/Secondary_OpSize.json @@ -522,7 +522,7 @@ "0x66 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1704]", + "ldr x0, [x28, #1752]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b}, v2.16b" ] @@ -536,7 +536,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x4]", - "ldr x0, [x28, #1704]", + "ldr x0, [x28, #1752]", "ldr q3, [x0, #16]", "tbl v16.16b, {v2.16b}, v3.16b" ] @@ -1014,7 +1014,7 @@ "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xd0", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2064]", + "ldr q2, [x28, #2112]", "eor v2.16b, v17.16b, v2.16b", "fadd v16.2d, v16.2d, v2.2d" ] @@ -1070,7 +1070,7 @@ "ExpectedInstructionCount": 7, "Comment": "0x66 0x0f 0xd7", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2224]", + "ldr q2, [x28, #2272]", "cmlt v3.16b, v16.16b, #0", "and v2.16b, v3.16b, v2.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/Secondary_REP.json b/unittests/InstructionCountCI/Secondary_REP.json index ba273aff97..df1f8e46fe 100644 --- a/unittests/InstructionCountCI/Secondary_REP.json +++ b/unittests/InstructionCountCI/Secondary_REP.json @@ -354,7 +354,7 @@ "0xf3 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1696]", + "ldr x0, [x28, #1744]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b}, v2.16b" ] diff --git a/unittests/InstructionCountCI/Secondary_REPNE.json b/unittests/InstructionCountCI/Secondary_REPNE.json index d7280457c1..ab58de2ee4 100644 --- a/unittests/InstructionCountCI/Secondary_REPNE.json +++ b/unittests/InstructionCountCI/Secondary_REPNE.json @@ -296,7 +296,7 @@ "0xf2 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1688]", + "ldr x0, [x28, #1736]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b}, v2.16b" ] @@ -452,7 +452,7 @@ "ExpectedInstructionCount": 3, "Comment": "0xf2 0x0f 0xd0", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2032]", + "ldr q2, [x28, #2080]", "eor v2.16b, v17.16b, v2.16b", "fadd v16.4s, v16.4s, v2.4s" ] diff --git a/unittests/InstructionCountCI/VEX_map1.json b/unittests/InstructionCountCI/VEX_map1.json index 06f1f4dcc7..9b80f39ebf 100644 --- a/unittests/InstructionCountCI/VEX_map1.json +++ b/unittests/InstructionCountCI/VEX_map1.json @@ -2755,7 +2755,7 @@ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1712]", + "ldr x0, [x28, #1760]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b, v18.16b}, v2.16b" ] @@ -2824,7 +2824,7 @@ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1712]", + "ldr x0, [x28, #1760]", "ldr q2, [x0, #32]", "tbl v16.16b, {v17.16b, v18.16b}, v2.16b" ] @@ -2893,7 +2893,7 @@ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1712]", + "ldr x0, [x28, #1760]", "ldr q2, [x0, #48]", "tbl v16.16b, {v17.16b, v18.16b}, v2.16b" ] @@ -4338,7 +4338,7 @@ "Map 1 0b01 0xd0 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2064]", + "ldr q2, [x28, #2112]", "eor v2.16b, v18.16b, v2.16b", "fadd v16.2d, v17.2d, v2.2d" ] @@ -4361,7 +4361,7 @@ "Map 1 0b11 0xd0 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2032]", + "ldr q2, [x28, #2080]", "eor v2.16b, v18.16b, v2.16b", "fadd v16.4s, v17.4s, v2.4s" ] @@ -4498,7 +4498,7 @@ "Map 1 0b01 0xd7 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2224]", + "ldr q2, [x28, #2272]", "cmlt v3.16b, v16.16b, #0", "and v2.16b, v3.16b, v2.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/VEX_map2.json b/unittests/InstructionCountCI/VEX_map2.json index dd6c291801..b2ccc16d47 100644 --- a/unittests/InstructionCountCI/VEX_map2.json +++ b/unittests/InstructionCountCI/VEX_map2.json @@ -1575,7 +1575,7 @@ "Map 2 0b01 0x41 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2000]", + "ldr q2, [x28, #2048]", "zip1 v3.8h, v2.8h, v17.8h", "zip2 v2.8h, v2.8h, v17.8h", "umin v2.4s, v3.4s, v2.4s", diff --git a/unittests/InstructionCountCI/VEX_map3.json b/unittests/InstructionCountCI/VEX_map3.json index bcb21cc5cb..2a58bbef8b 100644 --- a/unittests/InstructionCountCI/VEX_map3.json +++ b/unittests/InstructionCountCI/VEX_map3.json @@ -4799,7 +4799,7 @@ "Map 3 0b01 0xdf 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2112]", + "ldr q2, [x28, #2160]", "movi v3.2d, #0x0", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", @@ -4812,7 +4812,7 @@ "Map 3 0b01 0xdf 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2112]", + "ldr q2, [x28, #2160]", "movi v3.2d, #0x0", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", diff --git a/unittests/InstructionCountCI/x87.json b/unittests/InstructionCountCI/x87.json index a5a69a3ef4..62bf253366 100644 --- a/unittests/InstructionCountCI/x87.json +++ b/unittests/InstructionCountCI/x87.json @@ -4501,7 +4501,7 @@ ] }, "fld1": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xe8 /5" ], @@ -4515,16 +4515,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0x8000000000000000", - "mov w22, #0x3fff", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2304]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldl2t": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xe9 /5" ], @@ -4538,19 +4535,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0x8afe", - "movk x21, #0xcd1b, lsl #16", - "movk x21, #0x784b, lsl #32", - "movk x21, #0xd49a, lsl #48", - "mov w22, #0x4000", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2320]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldl2e": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xea /5" ], @@ -4564,19 +4555,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0xf0bc", - "movk x21, #0x5c17, lsl #16", - "movk x21, #0x3b29, lsl #32", - "movk x21, #0xb8aa, lsl #48", - "mov w22, #0x3fff", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2336]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldpi": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xeb /5" ], @@ -4590,19 +4575,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0xc235", - "movk x21, #0x2168, lsl #16", - "movk x21, #0xdaa2, lsl #32", - "movk x21, #0xc90f, lsl #48", - "mov w22, #0x4000", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2352]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldlg2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xec /5" ], @@ -4616,19 +4595,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0xf799", - "movk x21, #0xfbcf, lsl #16", - "movk x21, #0x9a84, lsl #32", - "movk x21, #0x9a20, lsl #48", - "mov w22, #0x3ffd", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2368]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldln2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xed /5" ], @@ -4642,19 +4615,13 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov x21, #0x79ac", - "movk x21, #0xd1cf, lsl #16", - "movk x21, #0x17f7, lsl #32", - "movk x21, #0xb172, lsl #48", - "mov w22, #0x3ffe", - "fmov d2, x21", - "mov v2.d[1], x22", + "ldr q2, [x28, #2384]", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] }, "fldz": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 12, "Comment": [ "0xd9 11b 0xee /5" ], @@ -4668,9 +4635,7 @@ "orr w21, w22, w21", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "mov w21, #0x0", - "fmov d2, x21", - "mov v2.d[1], x21", + "movi v2.2d, #0x0", "add x0, x28, x20, lsl #4", "str q2, [x0, #768]" ] @@ -4770,7 +4735,7 @@ ] }, "fptan": { - "ExpectedInstructionCount": 49, + "ExpectedInstructionCount": 46, "Comment": [ "0xd9 11b 0xf2 /6" ], @@ -4814,10 +4779,7 @@ "eor v2.16b, v2.16b, v2.16b", "mov v2.d[0], x0", "mov v2.h[4], w1", - "mov x21, #0x8000000000000000", - "mov w23, #0x3fff", - "fmov d3, x21", - "mov v3.d[1], x23", + "ldr q3, [x28, #2304]", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", @@ -5081,7 +5043,7 @@ ] }, "fyl2xp1": { - "ExpectedInstructionCount": 79, + "ExpectedInstructionCount": 76, "Comment": [ "0xd9 11b 0xf9 /7" ], @@ -5099,10 +5061,7 @@ "ldr q2, [x0, #768]", "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "mov x20, #0x8000000000000000", - "mov w22, #0x3fff", - "fmov d4, x20", - "mov v4.d[1], x22", + "ldr q4, [x28, #2304]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]",