Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpcodeDispatcher: drop ZeroMultipleFlags #3544

Merged
merged 3 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -981,16 +981,7 @@ friend class FEXCore::IR::PassManager;
}

static bool IsNZCV(unsigned BitOffset) {
switch (BitOffset) {
case FEXCore::X86State::RFLAG_CF_RAW_LOC:
case FEXCore::X86State::RFLAG_ZF_RAW_LOC:
case FEXCore::X86State::RFLAG_SF_RAW_LOC:
case FEXCore::X86State::RFLAG_OF_RAW_LOC:
return true;

default:
return false;
}
return ContainsNZCV(1U << BitOffset);
}

OrderedNode* CachedNZCV{};
Expand Down Expand Up @@ -1422,7 +1413,7 @@ friend class FEXCore::IR::PassManager;
SetRFLAG<FEXCore::X86State::RFLAG_AF_RAW_LOC>(_Constant(Constant << 4));
}

void ZeroMultipleFlags(uint32_t BitMask);
void ZeroPF_AF();

CondClassType CondForNZCVBit(unsigned BitOffset, bool Invert) {
switch (BitOffset) {
Expand Down
72 changes: 7 additions & 65 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,61 +38,10 @@ constexpr std::array<uint32_t, 17> FlagOffsets = {
FEXCore::X86State::RFLAG_ID_LOC,
};

void OpDispatchBuilder::ZeroMultipleFlags(uint32_t FlagsMask) {
auto ZeroConst = _Constant(0);

if (ContainsNZCV(FlagsMask)) {
// NZCV is stored packed together.
// It's more optimal to zero NZCV with move+bic instead of multiple bics.
auto NZCVFlagsMask = FlagsMask & FullNZCVMask;
if (NZCVFlagsMask == FullNZCVMask) {
ZeroNZCV();
}
else {
const auto IndexMask = NZCVIndexMask(FlagsMask);

if (std::popcount(NZCVFlagsMask) == 1) {
// It's more optimal to store only one here.

for (size_t i = 0; NZCVFlagsMask && i < FlagOffsets.size(); ++i) {
const auto FlagOffset = FlagOffsets[i];
const auto FlagMask = 1U << FlagOffset;
if (!(FlagMask & NZCVFlagsMask)) {
continue;
}
SetRFLAG(ZeroConst, FlagOffset);
NZCVFlagsMask &= ~(FlagMask);
}
}
else {
auto IndexMaskConstant = _Constant(IndexMask);
auto NewNZCV = _Andn(OpSize::i64Bit, GetNZCV(), IndexMaskConstant);
SetNZCV(NewNZCV);
}
// Unset the possibly set bits.
PossiblySetNZCVBits &= ~IndexMask;
}

// Handled NZCV, so remove it from the mask.
FlagsMask &= ~FullNZCVMask;
}

void OpDispatchBuilder::ZeroPF_AF() {
// PF is stored inverted, so invert it when we zero.
if (FlagsMask & (1u << X86State::RFLAG_PF_RAW_LOC)) {
SetRFLAG<FEXCore::X86State::RFLAG_PF_RAW_LOC>(_Constant(1));
FlagsMask &= ~(1u << X86State::RFLAG_PF_RAW_LOC);
}

// Handle remaining masks.
for (size_t i = 0; FlagsMask && i < FlagOffsets.size(); ++i) {
const auto FlagOffset = FlagOffsets[i];
const auto FlagMask = 1U << FlagOffset;
if (!(FlagMask & FlagsMask)) {
continue;
}
SetRFLAG(ZeroConst, FlagOffset);
FlagsMask &= ~(FlagMask);
}
SetRFLAG<FEXCore::X86State::RFLAG_PF_RAW_LOC>(_Constant(1));
SetAF(0);
}

void OpDispatchBuilder::SetPackedRFLAG(bool Lower8, OrderedNode *Src) {
Expand Down Expand Up @@ -994,9 +943,7 @@ void OpDispatchBuilder::CalculateFlags_POPCOUNT(OrderedNode *Result) {
// is in the range [0, 63]. In particular, it is always positive. So a
// combined NZ test will correctly zero SF/CF/OF while setting ZF.
SetNZ_ZeroCV(OpSize::i32Bit, Result);

ZeroMultipleFlags((1U << X86State::RFLAG_AF_RAW_LOC) |
(1U << X86State::RFLAG_PF_RAW_LOC));
ZeroPF_AF();
}

void OpDispatchBuilder::CalculateFlags_BZHI(uint8_t SrcSize, OrderedNode *Result, OrderedNode *Src) {
Expand All @@ -1022,15 +969,10 @@ void OpDispatchBuilder::CalculateFlags_ZCNT(uint8_t SrcSize, OrderedNode *Result

void OpDispatchBuilder::CalculateFlags_RDRAND(OrderedNode *Src) {
// OF, SF, ZF, AF, PF all zero
// CF is set to the incoming source

uint32_t FlagsMaskToZero =
FullNZCVMask |
(1U << X86State::RFLAG_AF_RAW_LOC) |
(1U << X86State::RFLAG_PF_RAW_LOC);

ZeroMultipleFlags(FlagsMaskToZero);
ZeroNZCV();
ZeroPF_AF();

// CF is set to the incoming source
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Src);
}

Expand Down
15 changes: 3 additions & 12 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4594,11 +4594,7 @@ void OpDispatchBuilder::PTestOp(OpcodeArgs) {
SetNZ_ZeroCV(32, Test1);
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Test2);

uint32_t FlagsMaskToZero =
(1U << X86State::RFLAG_PF_RAW_LOC) |
(1U << X86State::RFLAG_AF_RAW_LOC);

ZeroMultipleFlags(FlagsMaskToZero);
ZeroPF_AF();
}

void OpDispatchBuilder::VTESTOpImpl(OpcodeArgs, size_t ElementSize) {
Expand Down Expand Up @@ -4635,8 +4631,7 @@ void OpDispatchBuilder::VTESTOpImpl(OpcodeArgs, size_t ElementSize) {
SetNZ_ZeroCV(32, AndGPR);
SetRFLAG<X86State::RFLAG_CF_RAW_LOC>(CFResult);

ZeroMultipleFlags((1U << X86State::RFLAG_PF_RAW_LOC) |
(1U << X86State::RFLAG_AF_RAW_LOC));
ZeroPF_AF();
}

template <size_t ElementSize>
Expand Down Expand Up @@ -5568,11 +5563,7 @@ void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask
SetRFLAG<X86State::RFLAG_CF_RAW_LOC>(GetFlagBit(18));
SetRFLAG<X86State::RFLAG_OF_RAW_LOC>(GetFlagBit(19));

uint32_t FlagsMaskToZero =
(1U << X86State::RFLAG_PF_RAW_LOC) |
(1U << X86State::RFLAG_AF_RAW_LOC);

ZeroMultipleFlags(FlagsMaskToZero);
ZeroPF_AF();
}

void OpDispatchBuilder::VPCMPESTRIOp(OpcodeArgs) {
Expand Down
12 changes: 6 additions & 6 deletions unittests/InstructionCountCI/FlagM/SecondaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -704,8 +704,8 @@
"mov x22, x20",
"mov x20, x21",
"bfxil x4, x22, #0, #16",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -719,8 +719,8 @@
"mov x22, x20",
"mov x20, x21",
"mov w4, w22",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -733,8 +733,8 @@
"cset x21, ne",
"mov x4, x20",
"mov x20, x21",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -748,8 +748,8 @@
"mov x22, x20",
"mov x20, x21",
"bfxil x4, x22, #0, #16",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -763,8 +763,8 @@
"mov x22, x20",
"mov x20, x21",
"mov w4, w22",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -777,8 +777,8 @@
"cset x21, ne",
"mov x4, x20",
"mov x20, x21",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand Down
12 changes: 6 additions & 6 deletions unittests/InstructionCountCI/FlagM/Secondary_REP.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
"umov w20, v0.b[0]",
"bfxil x4, x20, #0, #16",
"tst w20, w20",
"mov w27, #0x0",
"mov w26, #0x1"
"mov w26, #0x1",
"mov w27, #0x0"
]
},
"popcnt eax, ebx": {
Expand All @@ -37,8 +37,8 @@
"addv b0, v0.8b",
"umov w4, v0.b[0]",
"tst w4, w4",
"mov w27, #0x0",
"mov w26, #0x1"
"mov w26, #0x1",
"mov w27, #0x0"
]
},
"popcnt rax, rbx": {
Expand All @@ -50,8 +50,8 @@
"addv b0, v0.8b",
"umov w4, v0.b[0]",
"tst w4, w4",
"mov w27, #0x0",
"mov w26, #0x1"
"mov w26, #0x1",
"mov w27, #0x0"
]
},
"tzcnt ax, bx": {
Expand Down
12 changes: 6 additions & 6 deletions unittests/InstructionCountCI/SecondaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -840,8 +840,8 @@
"mov x22, x20",
"mov x20, x21",
"bfxil x4, x22, #0, #16",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -855,8 +855,8 @@
"mov x22, x20",
"mov x20, x21",
"mov w4, w22",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -869,8 +869,8 @@
"cset x21, ne",
"mov x4, x20",
"mov x20, x21",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -884,8 +884,8 @@
"mov x22, x20",
"mov x20, x21",
"bfxil x4, x22, #0, #16",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -899,8 +899,8 @@
"mov x22, x20",
"mov x20, x21",
"mov w4, w22",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -913,8 +913,8 @@
"cset x21, ne",
"mov x4, x20",
"mov x20, x21",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand Down
12 changes: 6 additions & 6 deletions unittests/InstructionCountCI/Secondary_REP.json
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,8 @@
"umov w20, v0.b[0]",
"bfxil x4, x20, #0, #16",
"tst w20, w20",
"mov w27, #0x0",
"mov w26, #0x1"
"mov w26, #0x1",
"mov w27, #0x0"
]
},
"popcnt eax, ebx": {
Expand All @@ -422,8 +422,8 @@
"addv b0, v0.8b",
"umov w4, v0.b[0]",
"tst w4, w4",
"mov w27, #0x0",
"mov w26, #0x1"
"mov w26, #0x1",
"mov w27, #0x0"
]
},
"popcnt rax, rbx": {
Expand All @@ -435,8 +435,8 @@
"addv b0, v0.8b",
"umov w4, v0.b[0]",
"tst w4, w4",
"mov w27, #0x0",
"mov w26, #0x1"
"mov w26, #0x1",
"mov w27, #0x0"
]
},
"tzcnt ax, bx": {
Expand Down
Loading