diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 36659dfd83863b..524b74e7adf09a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -412,6 +412,9 @@ config KASAN_SHADOW_OFFSET config UNWIND_TABLES bool +config ARM64_ACTLR_STATE + bool + source "arch/arm64/Kconfig.platforms" menu "Kernel Features" @@ -2205,6 +2208,17 @@ config ARM64_DEBUG_PRIORITY_MASKING If unsure, say N endif # ARM64_PSEUDO_NMI +config ARM64_MEMORY_MODEL_CONTROL + bool "Runtime memory model control" + default ARCH_APPLE + select ARM64_ACTLR_STATE + help + Some ARM64 CPUs support runtime switching of the CPU memory + model, which can be useful to emulate other CPU architectures + which have different memory models. Say Y to enable support + for the PR_SET_MEM_MODEL/PR_GET_MEM_MODEL prctl() calls on + CPUs with this feature. + config RELOCATABLE bool "Build a relocatable kernel image" if EXPERT select ARCH_HAS_RELR diff --git a/arch/arm64/include/asm/apple_cpufeature.h b/arch/arm64/include/asm/apple_cpufeature.h new file mode 100644 index 00000000000000..4370d91ffa3ec9 --- /dev/null +++ b/arch/arm64/include/asm/apple_cpufeature.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef __ASM_APPLE_CPUFEATURES_H +#define __ASM_APPLE_CPUFEATURES_H + +#include +#include + +#define AIDR_APPLE_TSO_SHIFT 9 +#define AIDR_APPLE_TSO BIT(9) + +#define ACTLR_APPLE_TSO_SHIFT 1 +#define ACTLR_APPLE_TSO BIT(1) + +#endif diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 55843426727156..c2cd79de92f388 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -909,6 +909,12 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) return 8; } +static __always_inline bool system_has_actlr_state(void) +{ + return IS_ENABLED(CONFIG_ARM64_ACTLR_STATE) && + alternative_has_cap_unlikely(ARM64_HAS_TSO_APPLE); +} + s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur); struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id); @@ -1032,6 +1038,10 @@ static inline bool cpu_has_lpa2(void) #endif } +void __init init_cpucap_indirect_list_impdef(void); +void __init init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps); +bool cpufeature_matches(u64 reg, const struct arm64_cpu_capabilities *entry); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index a601a9305b104f..8088b15c7c3ad3 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -80,6 +80,11 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { if (!vcpu_has_run_once(vcpu)) vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; + if (IS_ENABLED(CONFIG_ARM64_ACTLR_STATE) && ( + alternative_has_cap_unlikely(ARM64_HAS_ACTLR_VIRT) || + alternative_has_cap_unlikely(ARM64_HAS_ACTLR_VIRT_APPLE) + )) + vcpu->arch.hcr_el2 &= ~HCR_TACR; /* * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index f77371232d8c6d..d43c5791a35e22 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -184,6 +184,9 @@ struct thread_struct { u64 sctlr_user; u64 svcr; u64 tpidr2_el0; +#ifdef CONFIG_ARM64_ACTLR_STATE + u64 actlr; +#endif }; static inline unsigned int thread_get_vl(struct thread_struct *thread, diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2b112f3b75109a..2a11cdefbe041b 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -33,7 +33,8 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ - syscall.o proton-pack.o idle.o patching.o pi/ + syscall.o proton-pack.o idle.o patching.o \ + cpufeature_impdef.o pi/ obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 646ecd3069fdd9..d9c865fd0d0b3e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1029,7 +1029,7 @@ static void init_cpu_ftr_reg(u32 sys_reg, u64 new) extern const struct arm64_cpu_capabilities arm64_errata[]; static const struct arm64_cpu_capabilities arm64_features[]; -static void __init +void __init init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) { @@ -1541,8 +1541,8 @@ has_always(const struct arm64_cpu_capabilities *entry, int scope) return true; } -static bool -feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) +bool +cpufeature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { int val, min, max; u64 tmp; @@ -1595,14 +1595,14 @@ has_user_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) if (!mask) return false; - return feature_matches(val, entry); + return cpufeature_matches(val, entry); } static bool has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) { u64 val = read_scoped_sysreg(entry, scope); - return feature_matches(val, entry); + return cpufeature_matches(val, entry); } const struct cpumask *system_32bit_el0_cpumask(void) @@ -3144,10 +3144,38 @@ static void update_cpu_capabilities(u16 scope_mask) scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (i = 0; i < ARM64_NCAPS; i++) { + bool matches; + caps = cpucap_ptrs[i]; - if (!caps || !(caps->type & scope_mask) || - cpus_have_cap(caps->capability) || - !caps->matches(caps, cpucap_default_scope(caps))) + if (!caps || !(caps->type & scope_mask)) + continue; + + if (!(scope_mask & SCOPE_LOCAL_CPU) && cpus_have_cap(caps->capability)) + continue; + + matches = caps->matches(caps, cpucap_default_scope(caps)); + + if (matches == cpus_have_cap(caps->capability)) + continue; + + if (!matches) { + /* + * Cap detected on boot CPU but not this CPU, + * disable it if not optional. + */ + if (!cpucap_late_cpu_optional(caps)) { + __clear_bit(caps->capability, system_cpucaps); + pr_info("missing on secondary: %s\n", caps->desc); + } + continue; + } + + if (!(scope_mask & (SCOPE_BOOT_CPU | SCOPE_SYSTEM)) && + cpucap_late_cpu_permitted(caps)) + /* + * Cap detected on this CPU but not boot CPU, + * skip it if permitted for late CPUs. + */ continue; if (caps->desc && !caps->cpus) @@ -3495,6 +3523,7 @@ void __init setup_boot_cpu_features(void) * handle the boot CPU. */ init_cpucap_indirect_list(); + init_cpucap_indirect_list_impdef(); /* * Detect broken pseudo-NMI. Must be called _before_ the call to diff --git a/arch/arm64/kernel/cpufeature_impdef.c b/arch/arm64/kernel/cpufeature_impdef.c new file mode 100644 index 00000000000000..3407034b7878eb --- /dev/null +++ b/arch/arm64/kernel/cpufeature_impdef.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Contains implementation-defined CPU feature definitions. + */ + +#define pr_fmt(fmt) "CPU features: " fmt + +#include +#include +#include +#include +#include + +#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL +static bool has_apple_feature(const struct arm64_cpu_capabilities *entry, int scope) +{ + u64 val; + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + + if (read_cpuid_implementor() != ARM_CPU_IMP_APPLE) + return false; + + val = read_sysreg(aidr_el1); + return cpufeature_matches(val, entry); +} + +static bool has_apple_tso(const struct arm64_cpu_capabilities *entry, int scope) +{ + u64 val; + + if (!has_apple_feature(entry, scope)) + return false; + + /* + * KVM and old versions of the macOS hypervisor will advertise TSO in + * AIDR_EL1, but then ignore writes to ACTLR_EL1. Test that the bit is + * actually writable before enabling TSO. + */ + + val = read_sysreg(actlr_el1); + write_sysreg(val ^ ACTLR_APPLE_TSO, actlr_el1); + if (!((val ^ read_sysreg(actlr_el1)) & ACTLR_APPLE_TSO)) { + pr_info_once("CPU advertises Apple TSO but it is broken, ignoring\n"); + return false; + } + + write_sysreg(val, actlr_el1); + return true; +} + +static bool has_tso_fixed(const struct arm64_cpu_capabilities *entry, int scope) +{ + /* List of CPUs that always use the TSO memory model */ + static const struct midr_range fixed_tso_list[] = { + MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER), + MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), + MIDR_ALL_VERSIONS(MIDR_FUJITSU_A64FX), + { /* sentinel */ } + }; + + return is_midr_in_range_list(read_cpuid_id(), fixed_tso_list); +} +#endif + +static bool has_apple_actlr_virt_impdef(const struct arm64_cpu_capabilities *entry, int scope) +{ + u64 midr = read_cpuid_id() & MIDR_CPU_MODEL_MASK; + + return midr >= MIDR_APPLE_M1_ICESTORM && midr <= MIDR_APPLE_M1_FIRESTORM_MAX; +} + +static bool has_apple_actlr_virt(const struct arm64_cpu_capabilities *entry, int scope) +{ + u64 midr = read_cpuid_id() & MIDR_CPU_MODEL_MASK; + + return midr >= MIDR_APPLE_M2_BLIZZARD && midr <= MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, 0xfff); +} + +static const struct arm64_cpu_capabilities arm64_impdef_features[] = { +#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL + { + .desc = "TSO memory model (Apple)", + .capability = ARM64_HAS_TSO_APPLE, + .type = SCOPE_LOCAL_CPU | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU, + .matches = has_apple_tso, + .field_pos = AIDR_APPLE_TSO_SHIFT, + .field_width = 1, + .sign = FTR_UNSIGNED, + .min_field_value = 1, + .max_field_value = 1, + }, + { + .desc = "TSO memory model (Fixed)", + .capability = ARM64_HAS_TSO_FIXED, + .type = SCOPE_LOCAL_CPU | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU, + .matches = has_tso_fixed, + }, +#endif + { + .desc = "ACTLR virtualization (IMPDEF, Apple)", + .capability = ARM64_HAS_ACTLR_VIRT_APPLE, + .type = SCOPE_LOCAL_CPU | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU, + .matches = has_apple_actlr_virt_impdef, + }, + { + .desc = "ACTLR virtualization (architectural?)", + .capability = ARM64_HAS_ACTLR_VIRT, + .type = SCOPE_LOCAL_CPU | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU, + .matches = has_apple_actlr_virt, + }, + {}, +}; + +void __init init_cpucap_indirect_list_impdef(void) +{ + init_cpucap_indirect_list_from_array(arm64_impdef_features); +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4ae31b7af6c311..34a19ecfb630ba 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -41,8 +41,10 @@ #include #include #include +#include #include +#include #include #include #include @@ -371,6 +373,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (system_supports_tpidr2()) p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); +#ifdef CONFIG_ARM64_ACTLR_STATE + if (system_has_actlr_state()) + p->thread.actlr = read_sysreg(actlr_el1); +#endif + if (stack_start) { if (is_compat_thread(task_thread_info(p))) childregs->compat_sp = stack_start; @@ -513,6 +520,65 @@ void update_sctlr_el1(u64 sctlr) isb(); } +#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL +int arch_prctl_mem_model_get(struct task_struct *t) +{ + if (alternative_has_cap_unlikely(ARM64_HAS_TSO_APPLE) && + t->thread.actlr & ACTLR_APPLE_TSO) + return PR_SET_MEM_MODEL_TSO; + + return PR_SET_MEM_MODEL_DEFAULT; +} + +int arch_prctl_mem_model_set(struct task_struct *t, unsigned long val) +{ + if (alternative_has_cap_unlikely(ARM64_HAS_TSO_FIXED) && + val == PR_SET_MEM_MODEL_TSO) + return 0; + + if (alternative_has_cap_unlikely(ARM64_HAS_TSO_APPLE)) { + WARN_ON(!system_has_actlr_state()); + + switch (val) { + case PR_SET_MEM_MODEL_TSO: + t->thread.actlr |= ACTLR_APPLE_TSO; + break; + case PR_SET_MEM_MODEL_DEFAULT: + t->thread.actlr &= ~ACTLR_APPLE_TSO; + break; + default: + return -EINVAL; + } + write_sysreg(t->thread.actlr, actlr_el1); + return 0; + } + + if (val == PR_SET_MEM_MODEL_DEFAULT) + return 0; + + return -EINVAL; +} +#endif + +#ifdef CONFIG_ARM64_ACTLR_STATE +/* + * IMPDEF control register ACTLR_EL1 handling. Some CPUs use this to + * expose features that can be controlled by userspace. + */ +static void actlr_thread_switch(struct task_struct *next) +{ + if (!system_has_actlr_state()) + return; + + current->thread.actlr = read_sysreg(actlr_el1); + write_sysreg(next->thread.actlr, actlr_el1); +} +#else +static inline void actlr_thread_switch(struct task_struct *next) +{ +} +#endif + /* * Thread switching. */ @@ -530,6 +596,7 @@ struct task_struct *__switch_to(struct task_struct *prev, ssbs_thread_switch(next); erratum_1418040_thread_switch(next); ptrauth_thread_switch_user(next); + actlr_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case @@ -651,6 +718,10 @@ void arch_setup_new_exec(void) arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE); } + +#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL + arch_prctl_mem_model_set(current, PR_SET_MEM_MODEL_DEFAULT); +#endif } #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b22d28ec80284b..df587e39bfc11c 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -363,6 +363,14 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) */ init_task.thread_info.ttbr0 = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); #endif +#ifdef CONFIG_ARM64_ACTLR_STATE + /* Store the boot CPU ACTLR_EL1 value as the default. This will only + * be actually restored during context switching iff the platform is + * known to use ACTLR_EL1 for exposable features and its layout is + * known to be the same on all CPUs. + */ + init_task.thread.actlr = read_sysreg(actlr_el1); +#endif if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 4c0fdabaf8aec1..210e9f8081b6e8 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -16,6 +16,9 @@ #include #include +#define SYS_IMP_APL_ACTLR_EL12 sys_reg(3, 6, 15, 14, 6) +#define SYS_ACTLR_EL12 sys_reg(3, 5, 1, 0, 1) + static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); @@ -101,6 +104,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); + if (IS_ENABLED(CONFIG_ARM64_ACTLR_STATE)) { + if (alternative_has_cap_unlikely(ARM64_HAS_ACTLR_VIRT)) + ctxt_sys_reg(ctxt, ACTLR_EL1) = read_sysreg_s(SYS_ACTLR_EL12); + else if (alternative_has_cap_unlikely(ARM64_HAS_ACTLR_VIRT_APPLE)) + ctxt_sys_reg(ctxt, ACTLR_EL1) = read_sysreg_s(SYS_IMP_APL_ACTLR_EL12); + } } static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) @@ -171,6 +180,13 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + if (IS_ENABLED(CONFIG_ARM64_ACTLR_STATE)) { + if (alternative_has_cap_unlikely(ARM64_HAS_ACTLR_VIRT)) + write_sysreg_s(ctxt_sys_reg(ctxt, ACTLR_EL1), SYS_ACTLR_EL12); + else if (alternative_has_cap_unlikely(ARM64_HAS_ACTLR_VIRT_APPLE)) + write_sysreg_s(ctxt_sys_reg(ctxt, ACTLR_EL1), SYS_IMP_APL_ACTLR_EL12); + } + if (ctxt_has_mte(ctxt)) { write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR); write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index ac3429d892b9a7..f615e04be81b92 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -8,6 +8,8 @@ BTI # Unreliable: use system_supports_32bit_el0() instead. HAS_32BIT_EL0_DO_NOT_USE HAS_32BIT_EL1 +HAS_ACTLR_VIRT +HAS_ACTLR_VIRT_APPLE HAS_ADDRESS_AUTH HAS_ADDRESS_AUTH_ARCH_QARMA3 HAS_ADDRESS_AUTH_ARCH_QARMA5 @@ -52,6 +54,8 @@ HAS_STAGE2_FWB HAS_TCR2 HAS_TIDCP1 HAS_TLB_RANGE +HAS_TSO_APPLE +HAS_TSO_FIXED HAS_VA52 HAS_VIRT_HOST_EXTN HAS_WFXT diff --git a/include/linux/memory_ordering_model.h b/include/linux/memory_ordering_model.h new file mode 100644 index 00000000000000..267a12ca66307e --- /dev/null +++ b/include/linux/memory_ordering_model.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MEMORY_ORDERING_MODEL_H +#define __ASM_MEMORY_ORDERING_MODEL_H + +/* Arch hooks to implement the PR_{GET_SET}_MEM_MODEL prctls */ + +struct task_struct; +int arch_prctl_mem_model_get(struct task_struct *t); +int arch_prctl_mem_model_set(struct task_struct *t, unsigned long val); + +#endif diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 35791791a879b2..36c278683cd620 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -328,4 +328,9 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +#define PR_GET_MEM_MODEL 0x6d4d444c +#define PR_SET_MEM_MODEL 0x4d4d444c +# define PR_SET_MEM_MODEL_DEFAULT 0 +# define PR_SET_MEM_MODEL_TSO 1 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 3a2df1bd9f640e..d28e2a8177db68 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -2454,6 +2455,16 @@ static int prctl_get_auxv(void __user *addr, unsigned long len) return sizeof(mm->saved_auxv); } +int __weak arch_prctl_mem_model_get(struct task_struct *t) +{ + return -EINVAL; +} + +int __weak arch_prctl_mem_model_set(struct task_struct *t, unsigned long val) +{ + return -EINVAL; +} + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2782,6 +2793,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_RISCV_SET_ICACHE_FLUSH_CTX: error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3); break; + case PR_GET_MEM_MODEL: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + error = arch_prctl_mem_model_get(me); + break; + case PR_SET_MEM_MODEL: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_prctl_mem_model_set(me, arg2); + break; default: error = -EINVAL; break;