Skip to content

Commit

Permalink
OpcodeDispatcher: Make use of new x87 constants
Browse files Browse the repository at this point in the history
Now we can load these directly instead of needing to manually materialize them.
  • Loading branch information
lioncash committed Apr 9, 2024
1 parent 6b14c13 commit 98c5607
Show file tree
Hide file tree
Showing 17 changed files with 88 additions and 178 deletions.
14 changes: 7 additions & 7 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6665,13 +6665,13 @@ constexpr uint16_t PF_F2 = 3;
{OPD(0xD9, 0xE4), 1, &OpDispatchBuilder::FTST},
{OPD(0xD9, 0xE5), 1, &OpDispatchBuilder::X87FXAM},
// E6 = Invalid
{OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::FLD_Const<0x8000'0000'0000'0000, 0b0'011'1111'1111'1111>}, // 1.0
{OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::FLD_Const<0xD49A'784B'CD1B'8AFE, 0x4000>}, // log2l(10)
{OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::FLD_Const<0xB8AA'3B29'5C17'F0BC, 0x3FFF>}, // log2l(e)
{OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::FLD_Const<0xC90F'DAA2'2168'C235, 0x4000>}, // pi
{OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::FLD_Const<0x9A20'9A84'FBCF'F799, 0x3FFD>}, // log10l(2)
{OPD(0xD9, 0xED), 1, &OpDispatchBuilder::FLD_Const<0xB172'17F7'D1CF'79AC, 0x3FFE>}, // log(2)
{OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::FLD_Const<0, 0>}, // 0.0
{OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_ONE>}, // 1.0
{OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG2_10>}, // log2l(10)
{OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG2_E>}, // log2l(e)
{OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_PI>}, // pi
{OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG10_2>}, // log10l(2)
{OPD(0xD9, 0xED), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG_2>}, // log(2)
{OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_ZERO>}, // 0.0

// EF = Invalid
{OPD(0xD9, 0xF0), 1, &OpDispatchBuilder::X87UnaryOp<IR::OP_F80F2XM1>},
Expand Down
2 changes: 1 addition & 1 deletion FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ friend class FEXCore::IR::PassManager;
OrderedNode *ReconstructX87StateFromFSW(OrderedNode *FSW);
template<size_t width>
void FLD(OpcodeArgs);
template<uint64_t Lower, uint32_t Upper>
template<NamedVectorConstant constant>
void FLD_Const(OpcodeArgs);

void FBLD(OpcodeArgs);
Expand Down
32 changes: 12 additions & 20 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,36 +196,34 @@ void OpDispatchBuilder::FBSTP(OpcodeArgs) {
SetX87Top(top);
}

template<uint64_t Lower, uint32_t Upper>
template<NamedVectorConstant constant>
void OpDispatchBuilder::FLD_Const(OpcodeArgs) {
// Update TOP
auto orig_top = GetX87Top();
auto top = _And(OpSize::i32Bit, _Sub(OpSize::i32Bit, orig_top, _Constant(1)), _Constant(7));
SetX87ValidTag(top, true);
SetX87Top(top);

auto low = _Constant(Lower);
auto high = _Constant(Upper);
OrderedNode *data = _VCastFromGPR(16, 8, low);
data = _VInsGPR(16, 8, 1, data, high);
OrderedNode *data = LoadAndCacheNamedVectorConstant(16, constant);

// Write to ST[TOP]
_StoreContextIndexed(data, top, 16, MMBaseOffset(), 16, FPRClass);
}

template
void OpDispatchBuilder::FLD_Const<0x8000'0000'0000'0000ULL, 0b0'011'1111'1111'1111ULL>(OpcodeArgs); // 1.0
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_ONE>(OpcodeArgs); // 1.0
template
void OpDispatchBuilder::FLD_Const<0xD49A'784B'CD1B'8AFEULL, 0x4000ULL>(OpcodeArgs); // log2l(10)
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG2_10>(OpcodeArgs); // log2l(10)
template
void OpDispatchBuilder::FLD_Const<0xB8AA'3B29'5C17'F0BCULL, 0x3FFFULL>(OpcodeArgs); // log2l(e)
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG2_E>(OpcodeArgs); // log2l(e)
template
void OpDispatchBuilder::FLD_Const<0xC90F'DAA2'2168'C235ULL, 0x4000ULL>(OpcodeArgs); // pi
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_PI>(OpcodeArgs); // pi
template
void OpDispatchBuilder::FLD_Const<0x9A20'9A84'FBCF'F799ULL, 0x3FFDULL>(OpcodeArgs); // log10l(2)
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG10_2>(OpcodeArgs); // log10l(2)
template
void OpDispatchBuilder::FLD_Const<0xB172'17F7'D1CF'79ACULL, 0x3FFEULL>(OpcodeArgs); // log(2)
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_X87_LOG_2>(OpcodeArgs); // log(2)
template
void OpDispatchBuilder::FLD_Const<0, 0>(OpcodeArgs); // 0.0
void OpDispatchBuilder::FLD_Const<NamedVectorConstant::NAMED_VECTOR_ZERO>(OpcodeArgs); // 0.0

void OpDispatchBuilder::FILD(OpcodeArgs) {
// Update TOP
Expand Down Expand Up @@ -958,10 +956,7 @@ void OpDispatchBuilder::X87FYL2X(OpcodeArgs) {
OrderedNode *st1 = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass);

if (Plus1) {
auto low = _Constant(0x8000'0000'0000'0000ULL);
auto high = _Constant(0b0'011'1111'1111'1111);
OrderedNode *data = _VCastFromGPR(16, 8, low);
data = _VInsGPR(16, 8, 1, data, high);
OrderedNode *data = LoadAndCacheNamedVectorConstant(16, NamedVectorConstant::NAMED_VECTOR_X87_ONE);
st0 = _F80Add(st0, data);
}

Expand All @@ -981,10 +976,7 @@ void OpDispatchBuilder::X87TAN(OpcodeArgs) {

auto result = _F80TAN(a);

auto low = _Constant(0x8000'0000'0000'0000ULL);
auto high = _Constant(0b0'011'1111'1111'1111ULL);
OrderedNode *data = _VCastFromGPR(16, 8, low);
data = _VInsGPR(16, 8, 1, data, high);
OrderedNode *data = LoadAndCacheNamedVectorConstant(16, NamedVectorConstant::NAMED_VECTOR_X87_ONE);

// TODO: ACCURACY: should check source is in range –2^63 to +2^63
SetRFLAG<FEXCore::X86State::X87FLAG_C2_LOC>(_Constant(0));
Expand Down
4 changes: 2 additions & 2 deletions unittests/InstructionCountCI/Crypto/H0F3A.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
"0x66 0x0f 0x3a 0xdf"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2112]",
"ldr q2, [x28, #2160]",
"movi v3.2d, #0x0",
"mov v16.16b, v17.16b",
"unimplemented (Unimplemented)",
Expand All @@ -68,7 +68,7 @@
"0x66 0x0f 0x3a 0xdf"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2112]",
"ldr q2, [x28, #2160]",
"movi v3.2d, #0x0",
"mov v16.16b, v17.16b",
"unimplemented (Unimplemented)",
Expand Down
2 changes: 1 addition & 1 deletion unittests/InstructionCountCI/FlagM/Secondary.json
Original file line number Diff line number Diff line change
Expand Up @@ -1612,7 +1612,7 @@
"Comment": "0x0f 0xd7",
"ExpectedArm64ASM": [
"ldr d2, [x28, #768]",
"ldr d3, [x28, #2224]",
"ldr d3, [x28, #2272]",
"cmlt v2.16b, v2.16b, #0",
"and v2.16b, v2.16b, v3.16b",
"addp v2.16b, v2.16b, v2.16b",
Expand Down
2 changes: 1 addition & 1 deletion unittests/InstructionCountCI/FlagM/Secondary_OpSize.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"ExpectedInstructionCount": 7,
"Comment": "0x66 0x0f 0xd7",
"ExpectedArm64ASM": [
"ldr q2, [x28, #2224]",
"ldr q2, [x28, #2272]",
"cmlt v3.16b, v16.16b, #0",
"and v2.16b, v3.16b, v2.16b",
"addp v2.16b, v2.16b, v2.16b",
Expand Down
77 changes: 18 additions & 59 deletions unittests/InstructionCountCI/FlagM/x87.json
Original file line number Diff line number Diff line change
Expand Up @@ -4502,7 +4502,7 @@
]
},
"fld1": {
"ExpectedInstructionCount": 15,
"ExpectedInstructionCount": 12,
"Comment": [
"0xd9 11b 0xe8 /5"
],
Expand All @@ -4516,16 +4516,13 @@
"orr w21, w22, w21",
"strb w21, [x28, #1026]",
"strb w20, [x28, #747]",
"mov x21, #0x8000000000000000",
"mov w22, #0x3fff",
"fmov d2, x21",
"mov v2.d[1], x22",
"ldr q2, [x28, #2304]",
"add x0, x28, x20, lsl #4",
"str q2, [x0, #768]"
]
},
"fldl2t": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 12,
"Comment": [
"0xd9 11b 0xe9 /5"
],
Expand All @@ -4539,19 +4536,13 @@
"orr w21, w22, w21",
"strb w21, [x28, #1026]",
"strb w20, [x28, #747]",
"mov x21, #0x8afe",
"movk x21, #0xcd1b, lsl #16",
"movk x21, #0x784b, lsl #32",
"movk x21, #0xd49a, lsl #48",
"mov w22, #0x4000",
"fmov d2, x21",
"mov v2.d[1], x22",
"ldr q2, [x28, #2320]",
"add x0, x28, x20, lsl #4",
"str q2, [x0, #768]"
]
},
"fldl2e": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 12,
"Comment": [
"0xd9 11b 0xea /5"
],
Expand All @@ -4565,19 +4556,13 @@
"orr w21, w22, w21",
"strb w21, [x28, #1026]",
"strb w20, [x28, #747]",
"mov x21, #0xf0bc",
"movk x21, #0x5c17, lsl #16",
"movk x21, #0x3b29, lsl #32",
"movk x21, #0xb8aa, lsl #48",
"mov w22, #0x3fff",
"fmov d2, x21",
"mov v2.d[1], x22",
"ldr q2, [x28, #2336]",
"add x0, x28, x20, lsl #4",
"str q2, [x0, #768]"
]
},
"fldpi": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 12,
"Comment": [
"0xd9 11b 0xeb /5"
],
Expand All @@ -4591,19 +4576,13 @@
"orr w21, w22, w21",
"strb w21, [x28, #1026]",
"strb w20, [x28, #747]",
"mov x21, #0xc235",
"movk x21, #0x2168, lsl #16",
"movk x21, #0xdaa2, lsl #32",
"movk x21, #0xc90f, lsl #48",
"mov w22, #0x4000",
"fmov d2, x21",
"mov v2.d[1], x22",
"ldr q2, [x28, #2352]",
"add x0, x28, x20, lsl #4",
"str q2, [x0, #768]"
]
},
"fldlg2": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 12,
"Comment": [
"0xd9 11b 0xec /5"
],
Expand All @@ -4617,19 +4596,13 @@
"orr w21, w22, w21",
"strb w21, [x28, #1026]",
"strb w20, [x28, #747]",
"mov x21, #0xf799",
"movk x21, #0xfbcf, lsl #16",
"movk x21, #0x9a84, lsl #32",
"movk x21, #0x9a20, lsl #48",
"mov w22, #0x3ffd",
"fmov d2, x21",
"mov v2.d[1], x22",
"ldr q2, [x28, #2368]",
"add x0, x28, x20, lsl #4",
"str q2, [x0, #768]"
]
},
"fldln2": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 12,
"Comment": [
"0xd9 11b 0xed /5"
],
Expand All @@ -4643,19 +4616,13 @@
"orr w21, w22, w21",
"strb w21, [x28, #1026]",
"strb w20, [x28, #747]",
"mov x21, #0x79ac",
"movk x21, #0xd1cf, lsl #16",
"movk x21, #0x17f7, lsl #32",
"movk x21, #0xb172, lsl #48",
"mov w22, #0x3ffe",
"fmov d2, x21",
"mov v2.d[1], x22",
"ldr q2, [x28, #2384]",
"add x0, x28, x20, lsl #4",
"str q2, [x0, #768]"
]
},
"fldz": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 12,
"Comment": [
"0xd9 11b 0xee /5"
],
Expand All @@ -4669,9 +4636,7 @@
"orr w21, w22, w21",
"strb w21, [x28, #1026]",
"strb w20, [x28, #747]",
"mov w21, #0x0",
"fmov d2, x21",
"mov v2.d[1], x21",
"movi v2.2d, #0x0",
"add x0, x28, x20, lsl #4",
"str q2, [x0, #768]"
]
Expand Down Expand Up @@ -4771,7 +4736,7 @@
]
},
"fptan": {
"ExpectedInstructionCount": 49,
"ExpectedInstructionCount": 46,
"Comment": [
"0xd9 11b 0xf2 /6"
],
Expand Down Expand Up @@ -4815,10 +4780,7 @@
"eor v2.16b, v2.16b, v2.16b",
"mov v2.d[0], x0",
"mov v2.h[4], w1",
"mov x21, #0x8000000000000000",
"mov w23, #0x3fff",
"fmov d3, x21",
"mov v3.d[1], x23",
"ldr q3, [x28, #2304]",
"mov w21, #0x0",
"strb w21, [x28, #746]",
"add x0, x28, x20, lsl #4",
Expand Down Expand Up @@ -5082,7 +5044,7 @@
]
},
"fyl2xp1": {
"ExpectedInstructionCount": 79,
"ExpectedInstructionCount": 76,
"Comment": [
"0xd9 11b 0xf9 /7"
],
Expand All @@ -5100,10 +5062,7 @@
"ldr q2, [x0, #768]",
"add x0, x28, x21, lsl #4",
"ldr q3, [x0, #768]",
"mov x20, #0x8000000000000000",
"mov w22, #0x3fff",
"fmov d4, x20",
"mov v4.d[1], x22",
"ldr q4, [x28, #2304]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
Expand Down
2 changes: 1 addition & 1 deletion unittests/InstructionCountCI/H0F38.json
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@
"0x66 0x0f 0x38 0x41"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2000]",
"ldr q2, [x28, #2048]",
"zip1 v3.8h, v2.8h, v17.8h",
"zip2 v2.8h, v2.8h, v17.8h",
"umin v2.4s, v3.4s, v2.4s",
Expand Down
14 changes: 7 additions & 7 deletions unittests/InstructionCountCI/H0F3A.json
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@
"0x66 0x0f 0x3a 0x0c"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2128]",
"ldr q2, [x28, #2176]",
"tbx v16.16b, {v17.16b}, v2.16b"
]
},
Expand All @@ -325,7 +325,7 @@
"0x66 0x0f 0x3a 0x0c"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2144]",
"ldr q2, [x28, #2192]",
"tbx v16.16b, {v17.16b}, v2.16b"
]
},
Expand All @@ -344,7 +344,7 @@
"0x66 0x0f 0x3a 0x0c"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2160]",
"ldr q2, [x28, #2208]",
"tbx v16.16b, {v17.16b}, v2.16b"
]
},
Expand All @@ -364,7 +364,7 @@
"0x66 0x0f 0x3a 0x0c"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2176]",
"ldr q2, [x28, #2224]",
"tbx v16.16b, {v17.16b}, v2.16b"
]
},
Expand All @@ -383,7 +383,7 @@
"0x66 0x0f 0x3a 0x0c"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2192]",
"ldr q2, [x28, #2240]",
"tbx v16.16b, {v17.16b}, v2.16b"
]
},
Expand All @@ -393,7 +393,7 @@
"0x66 0x0f 0x3a 0x0c"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2208]",
"ldr q2, [x28, #2256]",
"tbx v16.16b, {v17.16b}, v2.16b"
]
},
Expand Down Expand Up @@ -462,7 +462,7 @@
"0x66 0x0f 0x3a 0x0e"
],
"ExpectedArm64ASM": [
"ldr x0, [x28, #1736]",
"ldr x0, [x28, #1784]",
"ldr q2, [x0, #3440]",
"tbx v16.16b, {v17.16b}, v2.16b"
]
Expand Down
Loading

0 comments on commit 98c5607

Please sign in to comment.