From 8654d19f02cbccbc584238c31c19315cd5a5d944 Mon Sep 17 00:00:00 2001 From: CallumDev Date: Sun, 5 Sep 2021 16:54:53 +0930 Subject: [PATCH 1/3] JIT: Fix unaligned CASPair on ARMv8.0 --- .../Interface/Core/ArchHelpers/Arm64.cpp | 103 +++++++++++++++--- .../Source/Interface/Core/ArchHelpers/Arm64.h | 5 + .../Source/Interface/Core/JIT/Arm64/JIT.cpp | 38 +++---- unittests/ASM/Disabled_Tests_ARMv8.0 | 8 -- 4 files changed, 108 insertions(+), 46 deletions(-) diff --git a/External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.cpp b/External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.cpp index 197bb5e92d..b8ef90974c 100644 --- a/External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.cpp +++ b/External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.cpp @@ -64,22 +64,12 @@ static bool StoreCAS8(uint8_t &Expected, uint8_t Val, uint64_t Addr) { return Atom->compare_exchange_strong(Expected, Val); } -bool HandleCASPAL(void *_ucontext, void *_info, uint32_t Instr) { - mcontext_t* mcontext = &reinterpret_cast(_ucontext)->uc_mcontext; - siginfo_t* info = reinterpret_cast(_info); - - if (info->si_code != BUS_ADRALN) { - // This only handles alignment problems - return false; - } - uint32_t Size = (Instr >> 30) & 1; - uint32_t DesiredReg1 = Instr & 0b11111; - uint32_t DesiredReg2 = DesiredReg1 + 1; - uint32_t ExpectedReg1 = (Instr >> 16) & 0b11111; - uint32_t ExpectedReg2 = ExpectedReg1 + 1; - uint32_t AddressReg = (Instr >> 5) & 0b11111; +static bool RunCASPAL(void *_ucontext, void *_info, uint32_t Size, uint32_t DesiredReg1, uint32_t DesiredReg2, uint32_t ExpectedReg1, uint32_t ExpectedReg2, uint32_t AddressReg) { + mcontext_t* mcontext = &reinterpret_cast(_ucontext)->uc_mcontext; + + //Bus_ADRALN check happens in HandleCASPAL and HandleCASPAL_ARMv8 if (Size == 0) { // 32bit @@ -259,6 +249,91 @@ bool HandleCASPAL(void *_ucontext, void *_info, uint32_t Instr) { return false; } +bool HandleCASPAL(void *_ucontext, void *_info, uint32_t Instr) { + siginfo_t* info = reinterpret_cast(_info); + + if (info->si_code != BUS_ADRALN) { + // This only handles alignment problems + return false; + } + + uint32_t Size = (Instr >> 30) & 1; + + uint32_t DesiredReg1 = Instr & 0b11111; + uint32_t DesiredReg2 = DesiredReg1 + 1; + uint32_t ExpectedReg1 = (Instr >> 16) & 0b11111; + uint32_t ExpectedReg2 = ExpectedReg1 + 1; + uint32_t AddressReg = (Instr >> 5) & 0b11111; + + return RunCASPAL(_ucontext, _info, Size, DesiredReg1, DesiredReg2, ExpectedReg1, ExpectedReg2, AddressReg); +} + +uint64_t HandleCASPAL_ARMv8(void *_ucontext, void *_info, uint32_t Instr) { + mcontext_t* mcontext = &reinterpret_cast(_ucontext)->uc_mcontext; + siginfo_t* info = reinterpret_cast(_info); + + if (info->si_code != BUS_ADRALN) { + // This only handles alignment problems + return 0; + } + // caspair + // [0] (not reached) nop + // [1] ldaxp(TMP2.W(), TMP3.W(), MemOperand(MemSrc)); <-- DataReg & AddrReg + // [2] nop + // [3] cmp(TMP2.W(), Expected.first.W()); <-- ExpectedReg1 + // [4] ccmp(TMP3.W(), Expected.second.W(), NoFlag, Condition::eq); <-- ExpectedREg2 + // [5] b(&LoopNotExpected, Condition::ne); + // [6] nop + // [7] stlxp(TMP2.W(), Desired.first.W(), Desired.second.W(), MemOperand(MemSrc)); <-- DesiredReg + // [8] nop(); + // [9] cbnz(TMP2.W(), &LoopTop); + // [10] mov(Dst.first.W(), Expected.first.W()); + // [11] mov(Dst.second.W(), Expected.second.W()); + // [12] b(&LoopExpected); + // [13] mov(Dst.first.W(), TMP2.W()); + // [14] mov(Dst.second.W(), TMP3.W()); + // [15] clrex(); + + uint32_t *PC = (uint32_t*)ArchHelpers::Context::GetPc(_ucontext); + + uint32_t Size = (Instr >> 30) & 1; + uint32_t AddrReg = (Instr >> 5) & 0x1F; + uint32_t DataReg = Instr & 0x1F; + uint32_t DataReg2 = (Instr >> 10) & 0x1F; + + uint32_t ExpectedReg1{}; + uint32_t ExpectedReg2{}; + + uint32_t DesiredReg1{}; + uint32_t DesiredReg2{}; + + if(Size != 0) { //Only 32-bit pairs + return 0; + } + + for(int i = 1; i < 10; i++) { + uint32_t NextInstr = PC[i]; + if ((NextInstr & FEXCore::ArchHelpers::Arm64::ALU_OP_MASK) == FEXCore::ArchHelpers::Arm64::CMP_INST) { + ExpectedReg1 = GetRmReg(NextInstr); + } else if ((NextInstr & FEXCore::ArchHelpers::Arm64::CCMP_MASK) == FEXCore::ArchHelpers::Arm64::CCMP_INST) { + ExpectedReg2 = GetRmReg(NextInstr); + } else if ((NextInstr & FEXCore::ArchHelpers::Arm64::STLXP_MASK) == FEXCore::ArchHelpers::Arm64::STLXP_INST) { + DesiredReg1 = (NextInstr & 0x1F); + DesiredReg2 = (NextInstr >> 10) & 0x1F; + } + } + + //mov expected into the temp registers used by JIT + mcontext->regs[DataReg] = mcontext->regs[ExpectedReg1]; + mcontext->regs[DataReg2] = mcontext->regs[ExpectedReg2]; + + if(RunCASPAL(_ucontext, _info, Size, DesiredReg1, DesiredReg2, DataReg, DataReg2, AddrReg)) { + return 12 * sizeof(uint32_t); // skip to mov + clrex + } else { + return 0; + } +} + uint16_t DoLoad16(uint64_t Addr) { uint64_t AlignmentMask = 0b1111; if ((Addr & AlignmentMask) == 15) { diff --git a/External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.h b/External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.h index f65cc6e88a..69fef6dd6d 100644 --- a/External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.h +++ b/External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.h @@ -34,6 +34,10 @@ namespace FEXCore::ArchHelpers::Arm64 { constexpr uint32_t AND_INST = 0x0A'00'00'00; constexpr uint32_t OR_INST = 0x2A'00'00'00; constexpr uint32_t EOR_INST = 0x4A'00'00'00; + + constexpr uint32_t CCMP_MASK = 0x7F'E0'0C'10; + constexpr uint32_t CCMP_INST = 0x7A'40'00'00; + enum ExclusiveAtomicPairType { TYPE_SWAP, TYPE_ADD, @@ -78,6 +82,7 @@ namespace FEXCore::ArchHelpers::Arm64 { bool HandleAtomicLoad128(void *_ucontext, void *_info, uint32_t Instr); uint64_t HandleAtomicLoadstoreExclusive(void *_ucontext, void *_info); bool HandleCASPAL(void *_ucontext, void *_info, uint32_t Instr); + uint64_t HandleCASPAL_ARMv8(void *_ucontext, void *_info, uint32_t Instr); bool HandleCASAL(void *_ucontext, void *_info, uint32_t Instr); bool HandleAtomicMemOp(void *_ucontext, void *_info, uint32_t Instr); } diff --git a/External/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp b/External/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp index cb06259c35..c9eb730f99 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp +++ b/External/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp @@ -392,32 +392,22 @@ bool Arm64JITCore::HandleSIGBUS(int Signal, void *info, void *ucontext) { } } else if ((Instr & FEXCore::ArchHelpers::Arm64::LDAXP_MASK) == FEXCore::ArchHelpers::Arm64::LDAXP_INST) { // LDAXP - uint32_t DataReg2 = (Instr >> 10) & 0x1F; - // Convert to LDP - uint32_t LDP = 0b0010'1001'0100'0000'0000'0000'0000'0000; - LDP |= Size << 31; - LDP |= DataReg2 << 10; - LDP |= AddrReg << 5; - LDP |= DataReg; - PC[-1] = DMB; - PC[0] = LDP; - PC[1] = DMB; - // Back up one instruction and have another go - ArchHelpers::Context::SetPc(ucontext, ArchHelpers::Context::GetPc(ucontext) - 4); + //Should be compare and swap pair only. LDAXP not used elsewhere + uint64_t BytesToSkip = FEXCore::ArchHelpers::Arm64::HandleCASPAL_ARMv8(ucontext, info, Instr); + if (BytesToSkip) { + // Skip this instruction now + ArchHelpers::Context::SetPc(ucontext, ArchHelpers::Context::GetPc(ucontext) + BytesToSkip); + return true; + } + else { + LogMan::Msg::EFmt("Unhandled JIT SIGBUS LDAXP: PC: {} Instruction: 0x{:08x}\n", fmt::ptr(PC), PC[0]); + return false; + } } else if ((Instr & FEXCore::ArchHelpers::Arm64::STLXP_MASK) == FEXCore::ArchHelpers::Arm64::STLXP_INST) { // STLXP - uint32_t DataReg2 = (Instr >> 10) & 0x1F; - // Convert to STP - uint32_t STP = 0b0010'1001'0000'0000'0000'0000'0000'0000; - STP |= Size << 31; - STP |= DataReg2 << 10; - STP |= AddrReg << 5; - STP |= DataReg; - PC[-1] = DMB; - PC[0] = STP; - PC[1] = DMB; - // Back up one instruction and have another go - ArchHelpers::Context::SetPc(ucontext, ArchHelpers::Context::GetPc(ucontext) - 4); + //Should not trigger - middle of an LDAXP/STAXP pair. + LogMan::Msg::EFmt("Unhandled JIT SIGBUS STLXP: PC: {} Instruction: 0x{:08x}\n", fmt::ptr(PC), PC[0]); + return false; } else if ((Instr & FEXCore::ArchHelpers::Arm64::CASPAL_MASK) == FEXCore::ArchHelpers::Arm64::CASPAL_INST) { // CASPAL if (FEXCore::ArchHelpers::Arm64::HandleCASPAL(ucontext, info, Instr)) { diff --git a/unittests/ASM/Disabled_Tests_ARMv8.0 b/unittests/ASM/Disabled_Tests_ARMv8.0 index f736fe63fd..e57d35c09f 100644 --- a/unittests/ASM/Disabled_Tests_ARMv8.0 +++ b/unittests/ASM/Disabled_Tests_ARMv8.0 @@ -1,11 +1,3 @@ -# Doesn't support unaligned on armv8.0 -Test_Secondary/09_XX_01_7.asm -Test_Secondary/09_XX_01_8.asm -Test_Secondary/09_XX_01_9.asm -Test_Secondary/09_XX_01_11.asm -Test_Secondary/09_XX_01_12.asm -Test_Secondary/09_XX_01_13.asm - # Doesn't support unaligned atomic memory ops on armv8.0 Test_Primary/Primary_01_Atomic16.asm Test_Primary/Primary_01_Atomic32.asm From 5f7532c5695c3dcabf6be0220f7e9357ff2bf9a2 Mon Sep 17 00:00:00 2001 From: CallumDev Date: Sun, 5 Sep 2021 17:30:18 +0930 Subject: [PATCH 2/3] Interpreter: Lower 4 byte CASPair to inline assembly --- .../Interface/Core/Interpreter/InterpreterOps.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp index 149f714cff..6d47f1d3e0 100644 --- a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp +++ b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp @@ -1666,14 +1666,11 @@ void InterpreterOps::InterpretIR(FEXCore::Core::InternalThreadState *Thread, uin // Size is the size of each pair element switch (Size) { case 4: { - std::atomic *Data = *GetSrc **>(SSAData, Op->Header.Args[2]); - - uint64_t Src1 = *GetSrc(SSAData, Op->Header.Args[0]); - uint64_t Src2 = *GetSrc(SSAData, Op->Header.Args[1]); - - uint64_t Expected = Src1; - bool Result = Data->compare_exchange_strong(Expected, Src2); - GD = Result ? Src1 : Expected; + GD = AtomicCompareAndSwap( + *GetSrc(SSAData, Op->Header.Args[0]), + *GetSrc(SSAData, Op->Header.Args[1]), + *GetSrc(SSAData, Op->Header.Args[2]) + ); break; } case 8: { From dec512187ead8d48b99aed21c670a14f0033941c Mon Sep 17 00:00:00 2001 From: CallumDev Date: Sun, 5 Sep 2021 17:49:57 +0930 Subject: [PATCH 3/3] JIT: Remove nops in ARMv8.0 CASPair --- .../Interface/Core/ArchHelpers/Arm64.cpp | 28 ++++++++----------- .../Interface/Core/JIT/Arm64/AtomicOps.cpp | 10 ++----- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.cpp b/External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.cpp index b8ef90974c..624b233e13 100644 --- a/External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.cpp +++ b/External/FEXCore/Source/Interface/Core/ArchHelpers/Arm64.cpp @@ -277,22 +277,18 @@ uint64_t HandleCASPAL_ARMv8(void *_ucontext, void *_info, uint32_t Instr) { return 0; } // caspair - // [0] (not reached) nop // [1] ldaxp(TMP2.W(), TMP3.W(), MemOperand(MemSrc)); <-- DataReg & AddrReg - // [2] nop - // [3] cmp(TMP2.W(), Expected.first.W()); <-- ExpectedReg1 - // [4] ccmp(TMP3.W(), Expected.second.W(), NoFlag, Condition::eq); <-- ExpectedREg2 - // [5] b(&LoopNotExpected, Condition::ne); - // [6] nop - // [7] stlxp(TMP2.W(), Desired.first.W(), Desired.second.W(), MemOperand(MemSrc)); <-- DesiredReg - // [8] nop(); - // [9] cbnz(TMP2.W(), &LoopTop); - // [10] mov(Dst.first.W(), Expected.first.W()); - // [11] mov(Dst.second.W(), Expected.second.W()); - // [12] b(&LoopExpected); - // [13] mov(Dst.first.W(), TMP2.W()); - // [14] mov(Dst.second.W(), TMP3.W()); - // [15] clrex(); + // [2] cmp(TMP2.W(), Expected.first.W()); <-- ExpectedReg1 + // [3] ccmp(TMP3.W(), Expected.second.W(), NoFlag, Condition::eq); <-- ExpectedREg2 + // [4] b(&LoopNotExpected, Condition::ne); + // [5] stlxp(TMP2.W(), Desired.first.W(), Desired.second.W(), MemOperand(MemSrc)); <-- DesiredReg + // [6] cbnz(TMP2.W(), &LoopTop); + // [7] mov(Dst.first.W(), Expected.first.W()); + // [8] mov(Dst.second.W(), Expected.second.W()); + // [9] b(&LoopExpected); + // [10] mov(Dst.first.W(), TMP2.W()); + // [11] mov(Dst.second.W(), TMP3.W()); + // [12] clrex(); uint32_t *PC = (uint32_t*)ArchHelpers::Context::GetPc(_ucontext); @@ -328,7 +324,7 @@ uint64_t HandleCASPAL_ARMv8(void *_ucontext, void *_info, uint32_t Instr) { mcontext->regs[DataReg2] = mcontext->regs[ExpectedReg2]; if(RunCASPAL(_ucontext, _info, Size, DesiredReg1, DesiredReg2, DataReg, DataReg2, AddrReg)) { - return 12 * sizeof(uint32_t); // skip to mov + clrex + return 9 * sizeof(uint32_t); // skip to mov + clrex } else { return 0; } diff --git a/External/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp b/External/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp index bb715de9c8..712e306294 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp +++ b/External/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp @@ -44,15 +44,12 @@ DEF_OP(CASPair) { aarch64::Label LoopNotExpected; aarch64::Label LoopExpected; bind(&LoopTop); - nop(); + ldaxp(TMP2.W(), TMP3.W(), MemOperand(MemSrc)); - nop(); cmp(TMP2.W(), Expected.first.W()); ccmp(TMP3.W(), Expected.second.W(), NoFlag, Condition::eq); b(&LoopNotExpected, Condition::ne); - nop(); stlxp(TMP2.W(), Desired.first.W(), Desired.second.W(), MemOperand(MemSrc)); - nop(); cbnz(TMP2.W(), &LoopTop); mov(Dst.first.W(), Expected.first.W()); mov(Dst.second.W(), Expected.second.W()); @@ -73,15 +70,12 @@ DEF_OP(CASPair) { aarch64::Label LoopNotExpected; aarch64::Label LoopExpected; bind(&LoopTop); - nop(); + ldaxp(TMP2.X(), TMP3.X(), MemOperand(MemSrc)); - nop(); cmp(TMP2.X(), Expected.first.X()); ccmp(TMP3.X(), Expected.second.X(), NoFlag, Condition::eq); b(&LoopNotExpected, Condition::ne); - nop(); stlxp(TMP2.X(), Desired.first.X(), Desired.second.X(), MemOperand(MemSrc)); - nop(); cbnz(TMP2.X(), &LoopTop); mov(Dst.first.X(), Expected.first.X()); mov(Dst.second.X(), Expected.second.X());