Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure predicate cache is reset when control flow leaves block #4274

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion FEXCore/Scripts/json_ir_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class OpDefinition:
NonSSAArgNum: int
DynamicDispatch: bool
LoweredX87: bool
MaybeClobbersPredRegs: bool
JITDispatch: bool
JITDispatchOverride: str
TiedSource: int
Expand All @@ -79,6 +80,7 @@ def __init__(self):
self.NonSSAArgNum = 0
self.DynamicDispatch = False
self.LoweredX87 = False
self.MaybeClobbersPredRegs = False
self.JITDispatch = True
self.JITDispatchOverride = None
self.TiedSource = -1
Expand Down Expand Up @@ -223,7 +225,7 @@ def parse_ops(ops):
(OpArg.Type == "GPR" or
OpArg.Type == "GPRPair" or
OpArg.Type == "FPR" or
OpArg.Type == "PR")):
OpArg.Type == "PRED")):
OpDef.EmitValidation.append(f"GetOpRegClass({ArgName}) == InvalidClass || WalkFindRegClass({ArgName}) == {OpArg.Type}Class")

OpArg.Name = ArgName
Expand Down Expand Up @@ -277,6 +279,9 @@ def parse_ops(ops):
assert("JITDispatch" not in op_val)
OpDef.JITDispatch = False

if "MaybeClobbersPredRegs" in op_val:
OpDef.MaybeClobbersPredRegs = op_val["MaybeClobbersPredRegs"]

if "TiedSource" in op_val:
OpDef.TiedSource = op_val["TiedSource"]

Expand Down Expand Up @@ -506,6 +511,7 @@ def print_ir_hassideeffects():
("HasSideEffects", "bool"),
("ImplicitFlagClobber", "bool"),
("LoweredX87", "bool"),
("MaybeClobbersPredRegs", "bool"),
("TiedSource", "int8_t"),
]:
output_file.write(
Expand Down Expand Up @@ -707,6 +713,9 @@ def print_ir_allocator_helpers():
"\t\tif(MMXState == MMXState_MMX) ChgStateMMX_X87();\n"
)

if op.MaybeClobbersPredRegs:
output_file.write("\t\tResetInitPredicateCache();\n")

output_file.write("\t\tauto _Op = AllocateOp<IROp_{}, IROps::OP_{}>();\n".format(op.Name, op.Name.upper()))

if op.SSAArgNum != 0:
Expand Down
4 changes: 2 additions & 2 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4314,7 +4314,7 @@ Ref OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, const X86T
Ref MemSrc = LoadEffectiveAddress(A, true);
if (CTX->HostFeatures.SupportsSVE128 || CTX->HostFeatures.SupportsSVE256) {
// Using SVE we can load this with a single instruction.
auto PReg = _InitPredicate(OpSize::i16Bit, FEXCore::ToUnderlying(ARMEmitter::PredicatePattern::SVE_VL5));
auto PReg = InitPredicateCached(OpSize::i16Bit, ARMEmitter::PredicatePattern::SVE_VL5);
return _LoadMemPredicate(OpSize::i128Bit, OpSize::i16Bit, PReg, MemSrc);
} else {
// For X87 extended doubles, Split the load.
Expand Down Expand Up @@ -4448,7 +4448,7 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl
if (OpSize == OpSize::f80Bit) {
Ref MemStoreDst = LoadEffectiveAddress(A, true);
if (CTX->HostFeatures.SupportsSVE128 || CTX->HostFeatures.SupportsSVE256) {
auto PReg = _InitPredicate(OpSize::i16Bit, FEXCore::ToUnderlying(ARMEmitter::PredicatePattern::SVE_VL5));
auto PReg = InitPredicateCached(OpSize::i16Bit, ARMEmitter::PredicatePattern::SVE_VL5);
_StoreMemPredicate(OpSize::i128Bit, OpSize::i16Bit, Src, PReg, MemStoreDst);
} else {
// For X87 extended doubles, split before storing
Expand Down
5 changes: 1 addition & 4 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ class OpDispatchBuilder final : public IREmitter {
CachedNZCV = nullptr;
CFInverted = CFInvertedABI;
FlushRegisterCache();
ResetInitPredicateCache();

// New block needs to reset segment telemetry.
SegmentsNeedReadCheck = ~0U;
Expand Down Expand Up @@ -718,7 +719,6 @@ class OpDispatchBuilder final : public IREmitter {
void FNINIT(OpcodeArgs);

void X87ModifySTP(OpcodeArgs, bool Inc);
void X87SinCos(OpcodeArgs);
void X87FYL2X(OpcodeArgs, bool IsFYL2XP1);
void X87LDENV(OpcodeArgs);
void X87FLDCW(OpcodeArgs);
Expand Down Expand Up @@ -764,9 +764,6 @@ class OpDispatchBuilder final : public IREmitter {
void FTSTF64(OpcodeArgs);
void FRNDINTF64(OpcodeArgs);
void FSQRTF64(OpcodeArgs);
void X87UnaryOpF64(OpcodeArgs, FEXCore::IR::IROps IROp);
void X87BinaryOpF64(OpcodeArgs, FEXCore::IR::IROps IROp);
void X87SinCosF64(OpcodeArgs);
void X87FLDCWF64(OpcodeArgs);
void X87TANF64(OpcodeArgs);
void X87ATANF64(OpcodeArgs);
Expand Down
14 changes: 8 additions & 6 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,11 @@ void OpDispatchBuilder::FILD(OpcodeArgs) {

void OpDispatchBuilder::FST(OpcodeArgs, IR::OpSize Width) {
Ref Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
_StoreStackMemory(Mem, OpSize::i128Bit, true, Width);
Ref PredReg = Invalid();
if (CTX->HostFeatures.SupportsSVE128 || CTX->HostFeatures.SupportsSVE256) {
PredReg = InitPredicateCached(OpSize::i16Bit, ARMEmitter::PredicatePattern::SVE_VL5);
}
_StoreStackMemory(PredReg, Mem, OpSize::i128Bit, true, Width);
if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
_PopStackDestroy();
}
Expand Down Expand Up @@ -267,9 +271,9 @@ void OpDispatchBuilder::FDIV(OpcodeArgs, IR::OpSize Width, bool Integer, bool Re

void OpDispatchBuilder::FSUB(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) {
if (Op->Src[0].IsNone()) {
const auto Offset = Op->OP & 7;
const auto St0 = 0;
const auto Result = (ResInST0 == OpResult::RES_STI) ? Offset : St0;
const uint8_t Offset = Op->OP & 7;
const uint8_t St0 = 0;
const uint8_t Result = (ResInST0 == OpResult::RES_STI) ? Offset : St0;

if (Reverse ^ (ResInST0 == OpResult::RES_STI)) {
_F80SubStack(Result, Offset, St0);
Expand Down Expand Up @@ -751,13 +755,11 @@ void OpDispatchBuilder::FNINIT(OpcodeArgs) {
}

void OpDispatchBuilder::X87FFREE(OpcodeArgs) {

_InvalidateStack(Op->OP & 7);
}

void OpDispatchBuilder::X87EMMS(OpcodeArgs) {
// Tags all get set to 0b11

_InvalidateStack(0xff);
}

Expand Down
2 changes: 1 addition & 1 deletion FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ void OpDispatchBuilder::FILDF64(OpcodeArgs) {

void OpDispatchBuilder::FSTF64(OpcodeArgs, IR::OpSize Width) {
Ref Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
_StoreStackMemory(Mem, OpSize::i64Bit, true, Width);
_StoreStackMemory(Invalid(), Mem, OpSize::i64Bit, true, Width);

if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
_PopStackDestroy();
Expand Down
Loading
Loading