Skip to content

Commit

Permalink
JitArm64: Use GetRegWithPreference for EmitBackpatchRoutine addr
Browse files Browse the repository at this point in the history
I'm adding a new function to the register cache called
GetRegWithPreference. If the passed-in register is unlocked, it gets
locked. Otherwise, GetReg is called. The function also has a
GetScopedRegWithPreference variant.

Then, I'm making JitArm64 call this function when allocating an address
register for use with EmitBackpatchRoutine. This way, when register
pressure is low we can use the optimal register, and when register
pressure is high (but not completely full) we can sacrifice a bit of
farcode size for not having to evict a register from the register cache.
  • Loading branch information
JosJuice committed Dec 30, 2024
1 parent c88c6f5 commit 7417efe
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 43 deletions.
43 changes: 22 additions & 21 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@ using namespace Arm64Gen;
void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W30);
if (jo.memcheck)
gpr.Lock(ARM64Reg::W0);

const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1);

gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
ARM64Reg dest_reg = gpr.R(dest);
ARM64Reg up_reg = ARM64Reg::INVALID_REG;
Expand All @@ -45,7 +47,6 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (offsetReg != -1 && !gpr.IsImm(offsetReg))
off_reg = gpr.R(offsetReg);

ARM64Reg addr_reg = ARM64Reg::W1;
u32 imm_addr = 0;
bool is_immediate = false;

Expand Down Expand Up @@ -124,7 +125,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
if (!update || early_update)
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (jo.memcheck)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;

Expand All @@ -141,7 +142,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
}
else if (mmio_address)
{
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
scratch_gprs[DecodeReg(addr_reg)] = true;
scratch_gprs[DecodeReg(ARM64Reg::W30)] = true;
scratch_gprs[DecodeReg(dest_reg)] = true;
MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
Expand All @@ -166,7 +167,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
MOV(gpr.R(addr), addr_reg);
}

gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W30);
if (jo.memcheck)
gpr.Unlock(ARM64Reg::W0);
}
Expand All @@ -175,7 +176,9 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W2, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W30);

const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W2);

// Don't materialize zero.
ARM64Reg RS = gpr.IsImm(value, 0) ? ARM64Reg::WZR : gpr.R(value);
Expand All @@ -188,8 +191,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
if (dest != -1 && !gpr.IsImm(dest))
reg_dest = gpr.R(dest);

ARM64Reg addr_reg = ARM64Reg::W2;

u32 imm_addr = 0;
bool is_immediate = false;

Expand Down Expand Up @@ -268,7 +269,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
if (!update || early_update)
scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
scratch_gprs[DecodeReg(addr_reg)] = true;

u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
Expand Down Expand Up @@ -309,7 +310,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
}
else if (mmio_address)
{
scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
scratch_gprs[DecodeReg(addr_reg)] = true;
scratch_gprs[DecodeReg(ARM64Reg::W30)] = true;
scratch_gprs[DecodeReg(RS)] = 0;
MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
Expand All @@ -330,7 +331,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
MOV(gpr.R(dest), addr_reg);
}

gpr.Unlock(ARM64Reg::W2, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W30);
}

FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
Expand Down Expand Up @@ -512,13 +513,13 @@ void JitArm64::lmw(UGeckoInstruction inst)
u32 a = inst.RA, d = inst.RD;
s32 offset = inst.SIMM_16;

gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W30);
if (jo.memcheck)
gpr.Lock(ARM64Reg::W0);

// MMU games make use of a >= d despite this being invalid according to the PEM.
// If a >= d occurs, we must make sure to not re-read rA after starting doing the loads.
ARM64Reg addr_reg = ARM64Reg::W1;
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1);
Arm64RegCache::ScopedARM64Reg addr_base_reg;
bool a_is_addr_base_reg = false;
if (!a)
Expand Down Expand Up @@ -634,7 +635,7 @@ void JitArm64::lmw(UGeckoInstruction inst)
}
}

gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W30);
if (jo.memcheck)
gpr.Unlock(ARM64Reg::W0);
}
Expand All @@ -647,9 +648,9 @@ void JitArm64::stmw(UGeckoInstruction inst)
u32 a = inst.RA, s = inst.RS;
s32 offset = inst.SIMM_16;

gpr.Lock(ARM64Reg::W2, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W30);

ARM64Reg addr_reg = ARM64Reg::W2;
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W2);
Arm64RegCache::ScopedARM64Reg addr_base_reg;
bool a_is_addr_base_reg = false;
if (!a)
Expand Down Expand Up @@ -767,7 +768,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
}
}

gpr.Unlock(ARM64Reg::W2, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W30);
}

void JitArm64::dcbx(UGeckoInstruction inst)
Expand Down Expand Up @@ -987,11 +988,11 @@ void JitArm64::dcbz(UGeckoInstruction inst)

int a = inst.RA, b = inst.RB;

gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W30);

Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); });
Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W30); });

constexpr ARM64Reg addr_reg = ARM64Reg::W1;
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1);
constexpr ARM64Reg temp_reg = ARM64Reg::W30;

// HACK: Don't clear any memory in the [0x8000'0000, 0x8000'8000) region.
Expand Down Expand Up @@ -1055,7 +1056,7 @@ void JitArm64::dcbz(UGeckoInstruction inst)

BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
scratch_gprs[DecodeReg(addr_reg)] = true;

EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1, addr_reg,
scratch_gprs, scratch_fprs);
Expand Down
16 changes: 8 additions & 8 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,12 @@ void JitArm64::lfXX(UGeckoInstruction inst)
const RegType type =
(flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle;

gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W30);
if (jo.memcheck)
gpr.Lock(ARM64Reg::W0);

const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1);
const ARM64Reg VD = fpr.RW(inst.FD, type, false);
ARM64Reg addr_reg = ARM64Reg::W1;

if (update)
{
Expand Down Expand Up @@ -164,7 +164,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
if (!update || early_update)
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (jo.memcheck)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;

Expand All @@ -187,7 +187,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg);
}

gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W30);
if (jo.memcheck)
gpr.Unlock(ARM64Reg::W0);
}
Expand Down Expand Up @@ -270,9 +270,9 @@ void JitArm64::stfXX(UGeckoInstruction inst)
V0 = std::move(single_reg);
}

gpr.Lock(ARM64Reg::W2, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W30);

ARM64Reg addr_reg = ARM64Reg::W2;
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W2);

if (update)
{
Expand Down Expand Up @@ -358,7 +358,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
if (!update || early_update)
scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
scratch_gprs[DecodeReg(addr_reg)] = true;

if (is_immediate)
{
Expand Down Expand Up @@ -409,5 +409,5 @@ void JitArm64::stfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg);
}

gpr.Unlock(ARM64Reg::W2, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W30);
}
26 changes: 14 additions & 12 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
const int i = indexed ? inst.Ix : inst.I;
const int w = indexed ? inst.Wx : inst.W;

gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
{
gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
}
else if (jo.memcheck)
Expand All @@ -50,7 +50,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
}

constexpr ARM64Reg type_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
const auto addr_reg = js.assumeNoPairedQuantize ? gpr.GetScopedRegWithPreference(ARM64Reg::W1) :
Arm64RegCache::ScopedARM64Reg(ARM64Reg::W1);
constexpr ARM64Reg scale_reg = ARM64Reg::W2;
ARM64Reg VS = fpr.RW(inst.RS, RegType::Single, false);

Expand Down Expand Up @@ -82,7 +83,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
BitSet32 scratch_fprs;

if (!update || early_update)
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (jo.memcheck)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;

Expand Down Expand Up @@ -127,10 +128,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
MOV(gpr.R(inst.RA), addr_reg);
}

gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
}
else if (jo.memcheck)
Expand Down Expand Up @@ -197,17 +198,18 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
}
}

gpr.Lock(ARM64Reg::W2, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
{
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W3);
}

constexpr ARM64Reg type_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
constexpr ARM64Reg addr_reg = ARM64Reg::W2;
const auto addr_reg = js.assumeNoPairedQuantize ? gpr.GetScopedRegWithPreference(ARM64Reg::W2) :
Arm64RegCache::ScopedARM64Reg(ARM64Reg::W2);

if (inst.RA || update) // Always uses the register on update
{
Expand Down Expand Up @@ -237,7 +239,7 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
BitSet32 scratch_fprs;

if (!update || early_update)
scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
scratch_gprs[DecodeReg(addr_reg)] = true;

u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
Expand Down Expand Up @@ -269,10 +271,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
MOV(gpr.R(inst.RA), addr_reg);
}

gpr.Unlock(ARM64Reg::W2, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
Expand Down
25 changes: 25 additions & 0 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include <algorithm>
#include <cstddef>
#include <ranges>
#include <vector>

#include "Common/Assert.h"
Expand Down Expand Up @@ -63,6 +64,30 @@ ARM64Reg Arm64RegCache::GetReg()
return ARM64Reg::INVALID_REG;
}

ARM64Reg Arm64RegCache::GetRegWithPreference(Arm64Gen::ARM64Reg preferred)
{
// In practice, the preferred register tends to be towards the end of m_host_registers,
// so we scan through m_host_registers backwards
for (auto& it : m_host_registers | std::views::reverse)
{
if (it.GetReg() == preferred)
{
if (it.IsLocked())
{
return GetReg();
}
else
{
it.Lock();
return it.GetReg();
}
}
}
ASSERT_MSG(DYNA_REC, false, "Preferred register {:#x} is not in register cache",
static_cast<int>(preferred));
return ARM64Reg::INVALID_REG;
}

void Arm64RegCache::UpdateLastUsed(BitSet32 regs_used)
{
for (size_t i = 0; i < m_guest_registers.size(); ++i)
Expand Down
11 changes: 9 additions & 2 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,16 @@ class Arm64RegCache
// Returns a temporary register for use
// Requires unlocking after done
Arm64Gen::ARM64Reg GetReg();
Arm64Gen::ARM64Reg GetRegWithPreference(Arm64Gen::ARM64Reg preferred);

class ScopedARM64Reg
{
public:
inline ScopedARM64Reg() = default;
ScopedARM64Reg(const ScopedARM64Reg&) = delete;
explicit inline ScopedARM64Reg(Arm64RegCache& cache) : m_reg(cache.GetReg()), m_gpr(&cache) {}
inline ScopedARM64Reg(Arm64RegCache& cache, Arm64Gen::ARM64Reg reg) : m_reg(reg), m_gpr(&cache)
{
}
inline ScopedARM64Reg(Arm64Gen::ARM64Reg reg) : m_reg(reg) {}
inline ScopedARM64Reg(ScopedARM64Reg&& scoped_reg) { *this = std::move(scoped_reg); }
inline ~ScopedARM64Reg() { Unlock(); }
Expand Down Expand Up @@ -235,7 +238,11 @@ class Arm64RegCache

// Returns a temporary register
// Unlocking is implicitly handled through RAII
inline ScopedARM64Reg GetScopedReg() { return ScopedARM64Reg(*this); }
inline ScopedARM64Reg GetScopedReg() { return ScopedARM64Reg(*this, GetReg()); }
inline ScopedARM64Reg GetScopedRegWithPreference(Arm64Gen::ARM64Reg preferred)
{
return ScopedARM64Reg(*this, GetRegWithPreference(preferred));
}

void UpdateLastUsed(BitSet32 regs_used);

Expand Down

0 comments on commit 7417efe

Please sign in to comment.