Skip to content

Commit

Permalink
Merge pull request #1257 from CallumDev/caspair_fix_armv8
Browse files Browse the repository at this point in the history
Fix unaligned CASPair on ARMv8.0
  • Loading branch information
Sonicadvance1 authored Sep 5, 2021
2 parents dd34316 + dec5121 commit 2c02dca
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 62 deletions.
99 changes: 85 additions & 14 deletions External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,22 +64,12 @@ static bool StoreCAS8(uint8_t &Expected, uint8_t Val, uint64_t Addr) {
return Atom->compare_exchange_strong(Expected, Val);
}

bool HandleCASPAL(void *_ucontext, void *_info, uint32_t Instr) {
mcontext_t* mcontext = &reinterpret_cast<ucontext_t*>(_ucontext)->uc_mcontext;
siginfo_t* info = reinterpret_cast<siginfo_t*>(_info);

if (info->si_code != BUS_ADRALN) {
// This only handles alignment problems
return false;
}

uint32_t Size = (Instr >> 30) & 1;

uint32_t DesiredReg1 = Instr & 0b11111;
uint32_t DesiredReg2 = DesiredReg1 + 1;
uint32_t ExpectedReg1 = (Instr >> 16) & 0b11111;
uint32_t ExpectedReg2 = ExpectedReg1 + 1;
uint32_t AddressReg = (Instr >> 5) & 0b11111;
static bool RunCASPAL(void *_ucontext, void *_info, uint32_t Size, uint32_t DesiredReg1, uint32_t DesiredReg2, uint32_t ExpectedReg1, uint32_t ExpectedReg2, uint32_t AddressReg) {
mcontext_t* mcontext = &reinterpret_cast<ucontext_t*>(_ucontext)->uc_mcontext;

//Bus_ADRALN check happens in HandleCASPAL and HandleCASPAL_ARMv8

if (Size == 0) {
// 32bit
Expand Down Expand Up @@ -259,6 +249,87 @@ bool HandleCASPAL(void *_ucontext, void *_info, uint32_t Instr) {
return false;
}

bool HandleCASPAL(void *_ucontext, void *_info, uint32_t Instr) {
siginfo_t* info = reinterpret_cast<siginfo_t*>(_info);

if (info->si_code != BUS_ADRALN) {
// This only handles alignment problems
return false;
}

uint32_t Size = (Instr >> 30) & 1;

uint32_t DesiredReg1 = Instr & 0b11111;
uint32_t DesiredReg2 = DesiredReg1 + 1;
uint32_t ExpectedReg1 = (Instr >> 16) & 0b11111;
uint32_t ExpectedReg2 = ExpectedReg1 + 1;
uint32_t AddressReg = (Instr >> 5) & 0b11111;

return RunCASPAL(_ucontext, _info, Size, DesiredReg1, DesiredReg2, ExpectedReg1, ExpectedReg2, AddressReg);
}

uint64_t HandleCASPAL_ARMv8(void *_ucontext, void *_info, uint32_t Instr) {
mcontext_t* mcontext = &reinterpret_cast<ucontext_t*>(_ucontext)->uc_mcontext;
siginfo_t* info = reinterpret_cast<siginfo_t*>(_info);

if (info->si_code != BUS_ADRALN) {
// This only handles alignment problems
return 0;
}
// caspair
// [1] ldaxp(TMP2.W(), TMP3.W(), MemOperand(MemSrc)); <-- DataReg & AddrReg
// [2] cmp(TMP2.W(), Expected.first.W()); <-- ExpectedReg1
// [3] ccmp(TMP3.W(), Expected.second.W(), NoFlag, Condition::eq); <-- ExpectedREg2
// [4] b(&LoopNotExpected, Condition::ne);
// [5] stlxp(TMP2.W(), Desired.first.W(), Desired.second.W(), MemOperand(MemSrc)); <-- DesiredReg
// [6] cbnz(TMP2.W(), &LoopTop);
// [7] mov(Dst.first.W(), Expected.first.W());
// [8] mov(Dst.second.W(), Expected.second.W());
// [9] b(&LoopExpected);
// [10] mov(Dst.first.W(), TMP2.W());
// [11] mov(Dst.second.W(), TMP3.W());
// [12] clrex();

uint32_t *PC = (uint32_t*)ArchHelpers::Context::GetPc(_ucontext);

uint32_t Size = (Instr >> 30) & 1;
uint32_t AddrReg = (Instr >> 5) & 0x1F;
uint32_t DataReg = Instr & 0x1F;
uint32_t DataReg2 = (Instr >> 10) & 0x1F;

uint32_t ExpectedReg1{};
uint32_t ExpectedReg2{};

uint32_t DesiredReg1{};
uint32_t DesiredReg2{};

if(Size != 0) { //Only 32-bit pairs
return 0;
}

for(int i = 1; i < 10; i++) {
uint32_t NextInstr = PC[i];
if ((NextInstr & FEXCore::ArchHelpers::Arm64::ALU_OP_MASK) == FEXCore::ArchHelpers::Arm64::CMP_INST) {
ExpectedReg1 = GetRmReg(NextInstr);
} else if ((NextInstr & FEXCore::ArchHelpers::Arm64::CCMP_MASK) == FEXCore::ArchHelpers::Arm64::CCMP_INST) {
ExpectedReg2 = GetRmReg(NextInstr);
} else if ((NextInstr & FEXCore::ArchHelpers::Arm64::STLXP_MASK) == FEXCore::ArchHelpers::Arm64::STLXP_INST) {
DesiredReg1 = (NextInstr & 0x1F);
DesiredReg2 = (NextInstr >> 10) & 0x1F;
}
}

//mov expected into the temp registers used by JIT
mcontext->regs[DataReg] = mcontext->regs[ExpectedReg1];
mcontext->regs[DataReg2] = mcontext->regs[ExpectedReg2];

if(RunCASPAL(_ucontext, _info, Size, DesiredReg1, DesiredReg2, DataReg, DataReg2, AddrReg)) {
return 9 * sizeof(uint32_t); // skip to mov + clrex
} else {
return 0;
}
}

uint16_t DoLoad16(uint64_t Addr) {
uint64_t AlignmentMask = 0b1111;
if ((Addr & AlignmentMask) == 15) {
Expand Down
5 changes: 5 additions & 0 deletions External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ namespace FEXCore::ArchHelpers::Arm64 {
constexpr uint32_t AND_INST = 0x0A'00'00'00;
constexpr uint32_t OR_INST = 0x2A'00'00'00;
constexpr uint32_t EOR_INST = 0x4A'00'00'00;

constexpr uint32_t CCMP_MASK = 0x7F'E0'0C'10;
constexpr uint32_t CCMP_INST = 0x7A'40'00'00;

enum ExclusiveAtomicPairType {
TYPE_SWAP,
TYPE_ADD,
Expand Down Expand Up @@ -78,6 +82,7 @@ namespace FEXCore::ArchHelpers::Arm64 {
bool HandleAtomicLoad128(void *_ucontext, void *_info, uint32_t Instr);
uint64_t HandleAtomicLoadstoreExclusive(void *_ucontext, void *_info);
bool HandleCASPAL(void *_ucontext, void *_info, uint32_t Instr);
uint64_t HandleCASPAL_ARMv8(void *_ucontext, void *_info, uint32_t Instr);
bool HandleCASAL(void *_ucontext, void *_info, uint32_t Instr);
bool HandleAtomicMemOp(void *_ucontext, void *_info, uint32_t Instr);
}
Original file line number Diff line number Diff line change
Expand Up @@ -1666,14 +1666,11 @@ void InterpreterOps::InterpretIR(FEXCore::Core::InternalThreadState *Thread, uin
// Size is the size of each pair element
switch (Size) {
case 4: {
std::atomic<uint64_t> *Data = *GetSrc<std::atomic<uint64_t> **>(SSAData, Op->Header.Args[2]);

uint64_t Src1 = *GetSrc<uint64_t*>(SSAData, Op->Header.Args[0]);
uint64_t Src2 = *GetSrc<uint64_t*>(SSAData, Op->Header.Args[1]);

uint64_t Expected = Src1;
bool Result = Data->compare_exchange_strong(Expected, Src2);
GD = Result ? Src1 : Expected;
GD = AtomicCompareAndSwap(
*GetSrc<uint64_t*>(SSAData, Op->Header.Args[0]),
*GetSrc<uint64_t*>(SSAData, Op->Header.Args[1]),
*GetSrc<uint64_t**>(SSAData, Op->Header.Args[2])
);
break;
}
case 8: {
Expand Down
10 changes: 2 additions & 8 deletions External/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,12 @@ DEF_OP(CASPair) {
aarch64::Label LoopNotExpected;
aarch64::Label LoopExpected;
bind(&LoopTop);
nop();

ldaxp(TMP2.W(), TMP3.W(), MemOperand(MemSrc));
nop();
cmp(TMP2.W(), Expected.first.W());
ccmp(TMP3.W(), Expected.second.W(), NoFlag, Condition::eq);
b(&LoopNotExpected, Condition::ne);
nop();
stlxp(TMP2.W(), Desired.first.W(), Desired.second.W(), MemOperand(MemSrc));
nop();
cbnz(TMP2.W(), &LoopTop);
mov(Dst.first.W(), Expected.first.W());
mov(Dst.second.W(), Expected.second.W());
Expand All @@ -73,15 +70,12 @@ DEF_OP(CASPair) {
aarch64::Label LoopNotExpected;
aarch64::Label LoopExpected;
bind(&LoopTop);
nop();

ldaxp(TMP2.X(), TMP3.X(), MemOperand(MemSrc));
nop();
cmp(TMP2.X(), Expected.first.X());
ccmp(TMP3.X(), Expected.second.X(), NoFlag, Condition::eq);
b(&LoopNotExpected, Condition::ne);
nop();
stlxp(TMP2.X(), Desired.first.X(), Desired.second.X(), MemOperand(MemSrc));
nop();
cbnz(TMP2.X(), &LoopTop);
mov(Dst.first.X(), Expected.first.X());
mov(Dst.second.X(), Expected.second.X());
Expand Down
38 changes: 14 additions & 24 deletions External/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,32 +392,22 @@ bool Arm64JITCore::HandleSIGBUS(int Signal, void *info, void *ucontext) {
}
}
else if ((Instr & FEXCore::ArchHelpers::Arm64::LDAXP_MASK) == FEXCore::ArchHelpers::Arm64::LDAXP_INST) { // LDAXP
uint32_t DataReg2 = (Instr >> 10) & 0x1F;
// Convert to LDP
uint32_t LDP = 0b0010'1001'0100'0000'0000'0000'0000'0000;
LDP |= Size << 31;
LDP |= DataReg2 << 10;
LDP |= AddrReg << 5;
LDP |= DataReg;
PC[-1] = DMB;
PC[0] = LDP;
PC[1] = DMB;
// Back up one instruction and have another go
ArchHelpers::Context::SetPc(ucontext, ArchHelpers::Context::GetPc(ucontext) - 4);
//Should be compare and swap pair only. LDAXP not used elsewhere
uint64_t BytesToSkip = FEXCore::ArchHelpers::Arm64::HandleCASPAL_ARMv8(ucontext, info, Instr);
if (BytesToSkip) {
// Skip this instruction now
ArchHelpers::Context::SetPc(ucontext, ArchHelpers::Context::GetPc(ucontext) + BytesToSkip);
return true;
}
else {
LogMan::Msg::EFmt("Unhandled JIT SIGBUS LDAXP: PC: {} Instruction: 0x{:08x}\n", fmt::ptr(PC), PC[0]);
return false;
}
}
else if ((Instr & FEXCore::ArchHelpers::Arm64::STLXP_MASK) == FEXCore::ArchHelpers::Arm64::STLXP_INST) { // STLXP
uint32_t DataReg2 = (Instr >> 10) & 0x1F;
// Convert to STP
uint32_t STP = 0b0010'1001'0000'0000'0000'0000'0000'0000;
STP |= Size << 31;
STP |= DataReg2 << 10;
STP |= AddrReg << 5;
STP |= DataReg;
PC[-1] = DMB;
PC[0] = STP;
PC[1] = DMB;
// Back up one instruction and have another go
ArchHelpers::Context::SetPc(ucontext, ArchHelpers::Context::GetPc(ucontext) - 4);
//Should not trigger - middle of an LDAXP/STAXP pair.
LogMan::Msg::EFmt("Unhandled JIT SIGBUS STLXP: PC: {} Instruction: 0x{:08x}\n", fmt::ptr(PC), PC[0]);
return false;
}
else if ((Instr & FEXCore::ArchHelpers::Arm64::CASPAL_MASK) == FEXCore::ArchHelpers::Arm64::CASPAL_INST) { // CASPAL
if (FEXCore::ArchHelpers::Arm64::HandleCASPAL(ucontext, info, Instr)) {
Expand Down
8 changes: 0 additions & 8 deletions unittests/ASM/Disabled_Tests_ARMv8.0
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
# Doesn't support unaligned on armv8.0
Test_Secondary/09_XX_01_7.asm
Test_Secondary/09_XX_01_8.asm
Test_Secondary/09_XX_01_9.asm
Test_Secondary/09_XX_01_11.asm
Test_Secondary/09_XX_01_12.asm
Test_Secondary/09_XX_01_13.asm

# Doesn't support unaligned atomic memory ops on armv8.0
Test_Primary/Primary_01_Atomic16.asm
Test_Primary/Primary_01_Atomic32.asm
Expand Down

0 comments on commit 2c02dca

Please sign in to comment.