From d45d6246cda824b1c56c0d443d9b5066006466e2 Mon Sep 17 00:00:00 2001 From: antonyzhilin Date: Wed, 16 Oct 2024 15:43:35 +0300 Subject: [PATCH] feat concurrent: up librseq commit_hash:b19ebf58741f1e935cfd73a879bed9c90fb3b6ae --- .gitignore | 1 - .mapping.json | 40 +- core/src/concurrent/striped_counter.cpp | 8 - third_party/librseq/CMakeLists.txt | 11 +- third_party/librseq/README.md | 4 +- .../include/rseq/{rseq-abi.h => abi.h} | 4 +- third_party/librseq/include/rseq/arch.h | 94 ++ .../librseq/include/rseq/arch/aarch64.h | 272 ++++ .../librseq/include/rseq/arch/aarch64/bits.h | 398 +++++ third_party/librseq/include/rseq/arch/arm.h | 221 +++ .../librseq/include/rseq/arch/arm/bits.h | 500 ++++++ .../include/rseq/arch/generic/common.h | 83 + .../rseq/arch/generic/thread-pointer.h | 26 + third_party/librseq/include/rseq/arch/mips.h | 232 +++ .../librseq/include/rseq/arch/mips/bits.h | 469 ++++++ third_party/librseq/include/rseq/arch/ppc.h | 257 +++ .../librseq/include/rseq/arch/ppc/bits.h | 460 ++++++ .../include/rseq/arch/ppc/thread-pointer.h | 31 + third_party/librseq/include/rseq/arch/riscv.h | 240 +++ .../librseq/include/rseq/arch/riscv/bits.h | 368 +++++ .../include/rseq/arch/riscv/thread-pointer.h | 38 + third_party/librseq/include/rseq/arch/s390.h | 182 +++ .../librseq/include/rseq/arch/s390/bits.h | 477 ++++++ .../include/rseq/arch/templates/bits-reset.h | 11 + .../include/rseq/arch/templates/bits.h | 41 + third_party/librseq/include/rseq/arch/x86.h | 222 +++ .../rseq/{rseq-x86-bits.h => arch/x86/bits.h} | 429 ++--- .../x86/thread-pointer.h} | 8 +- third_party/librseq/include/rseq/compiler.h | 38 +- third_party/librseq/include/rseq/inject.h | 37 + third_party/librseq/include/rseq/mempool.h | 585 +++++++ third_party/librseq/include/rseq/pseudocode.h | 141 ++ .../librseq/include/rseq/rseq-bits-reset.h | 11 - .../librseq/include/rseq/rseq-bits-template.h | 41 - .../include/rseq/rseq-thread-pointer.h | 19 - third_party/librseq/include/rseq/rseq-x86.h | 234 --- third_party/librseq/include/rseq/rseq.h | 248 ++- .../librseq/include/rseq/thread-pointer.h | 38 + third_party/librseq/include/rseq/utils.h | 36 + third_party/librseq/src/config.h | 49 +- third_party/librseq/src/list.h | 198 +++ third_party/librseq/src/rseq-mempool.c | 1398 +++++++++++++++++ third_party/librseq/src/rseq-utils.h | 137 ++ third_party/librseq/src/rseq.c | 187 ++- third_party/librseq/src/smp.c | 278 ++++ third_party/librseq/src/smp.h | 12 + 46 files changed, 8079 insertions(+), 735 deletions(-) rename third_party/librseq/include/rseq/{rseq-abi.h => abi.h} (99%) create mode 100644 third_party/librseq/include/rseq/arch.h create mode 100644 third_party/librseq/include/rseq/arch/aarch64.h create mode 100644 third_party/librseq/include/rseq/arch/aarch64/bits.h create mode 100644 third_party/librseq/include/rseq/arch/arm.h create mode 100644 third_party/librseq/include/rseq/arch/arm/bits.h create mode 100644 third_party/librseq/include/rseq/arch/generic/common.h create mode 100644 third_party/librseq/include/rseq/arch/generic/thread-pointer.h create mode 100644 third_party/librseq/include/rseq/arch/mips.h create mode 100644 third_party/librseq/include/rseq/arch/mips/bits.h create mode 100644 third_party/librseq/include/rseq/arch/ppc.h create mode 100644 third_party/librseq/include/rseq/arch/ppc/bits.h create mode 100644 third_party/librseq/include/rseq/arch/ppc/thread-pointer.h create mode 100644 third_party/librseq/include/rseq/arch/riscv.h create mode 100644 third_party/librseq/include/rseq/arch/riscv/bits.h create mode 100644 third_party/librseq/include/rseq/arch/riscv/thread-pointer.h create mode 100644 third_party/librseq/include/rseq/arch/s390.h create mode 100644 third_party/librseq/include/rseq/arch/s390/bits.h create mode 100644 third_party/librseq/include/rseq/arch/templates/bits-reset.h create mode 100644 third_party/librseq/include/rseq/arch/templates/bits.h create mode 100644 third_party/librseq/include/rseq/arch/x86.h rename third_party/librseq/include/rseq/{rseq-x86-bits.h => arch/x86/bits.h} (65%) rename third_party/librseq/include/rseq/{rseq-x86-thread-pointer.h => arch/x86/thread-pointer.h} (74%) create mode 100644 third_party/librseq/include/rseq/inject.h create mode 100644 third_party/librseq/include/rseq/mempool.h create mode 100644 third_party/librseq/include/rseq/pseudocode.h delete mode 100644 third_party/librseq/include/rseq/rseq-bits-reset.h delete mode 100644 third_party/librseq/include/rseq/rseq-bits-template.h delete mode 100644 third_party/librseq/include/rseq/rseq-thread-pointer.h delete mode 100644 third_party/librseq/include/rseq/rseq-x86.h create mode 100644 third_party/librseq/include/rseq/thread-pointer.h create mode 100644 third_party/librseq/include/rseq/utils.h create mode 100644 third_party/librseq/src/list.h create mode 100644 third_party/librseq/src/rseq-mempool.c create mode 100644 third_party/librseq/src/rseq-utils.h create mode 100644 third_party/librseq/src/smp.c create mode 100644 third_party/librseq/src/smp.h diff --git a/.gitignore b/.gitignore index 77a54fdb5827..0d3c9a79677d 100644 --- a/.gitignore +++ b/.gitignore @@ -12,7 +12,6 @@ /build*/ /cmake-build-*/ /docs/ -/third_party/ CMakeLists.txt.user compile_commands.json tags diff --git a/.mapping.json b/.mapping.json index d31349ebf9b1..95f3b9643baa 100644 --- a/.mapping.json +++ b/.mapping.json @@ -3522,17 +3522,43 @@ "third_party/librseq/CMakeLists.txt":"taxi/uservices/userver/third_party/librseq/CMakeLists.txt", "third_party/librseq/LICENSE.md":"taxi/uservices/userver/third_party/librseq/LICENSE.md", "third_party/librseq/README.md":"taxi/uservices/userver/third_party/librseq/README.md", + "third_party/librseq/include/rseq/abi.h":"taxi/uservices/userver/third_party/librseq/include/rseq/abi.h", + "third_party/librseq/include/rseq/arch.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch.h", + "third_party/librseq/include/rseq/arch/aarch64.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/aarch64.h", + "third_party/librseq/include/rseq/arch/aarch64/bits.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/aarch64/bits.h", + "third_party/librseq/include/rseq/arch/arm.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/arm.h", + "third_party/librseq/include/rseq/arch/arm/bits.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/arm/bits.h", + "third_party/librseq/include/rseq/arch/generic/common.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/generic/common.h", + "third_party/librseq/include/rseq/arch/generic/thread-pointer.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/generic/thread-pointer.h", + "third_party/librseq/include/rseq/arch/mips.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/mips.h", + "third_party/librseq/include/rseq/arch/mips/bits.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/mips/bits.h", + "third_party/librseq/include/rseq/arch/ppc.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/ppc.h", + "third_party/librseq/include/rseq/arch/ppc/bits.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/ppc/bits.h", + "third_party/librseq/include/rseq/arch/ppc/thread-pointer.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/ppc/thread-pointer.h", + "third_party/librseq/include/rseq/arch/riscv.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/riscv.h", + "third_party/librseq/include/rseq/arch/riscv/bits.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/riscv/bits.h", + "third_party/librseq/include/rseq/arch/riscv/thread-pointer.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/riscv/thread-pointer.h", + "third_party/librseq/include/rseq/arch/s390.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/s390.h", + "third_party/librseq/include/rseq/arch/s390/bits.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/s390/bits.h", + "third_party/librseq/include/rseq/arch/templates/bits-reset.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/templates/bits-reset.h", + "third_party/librseq/include/rseq/arch/templates/bits.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/templates/bits.h", + "third_party/librseq/include/rseq/arch/x86.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/x86.h", + "third_party/librseq/include/rseq/arch/x86/bits.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/x86/bits.h", + "third_party/librseq/include/rseq/arch/x86/thread-pointer.h":"taxi/uservices/userver/third_party/librseq/include/rseq/arch/x86/thread-pointer.h", "third_party/librseq/include/rseq/compiler.h":"taxi/uservices/userver/third_party/librseq/include/rseq/compiler.h", - "third_party/librseq/include/rseq/rseq-abi.h":"taxi/uservices/userver/third_party/librseq/include/rseq/rseq-abi.h", - "third_party/librseq/include/rseq/rseq-bits-reset.h":"taxi/uservices/userver/third_party/librseq/include/rseq/rseq-bits-reset.h", - "third_party/librseq/include/rseq/rseq-bits-template.h":"taxi/uservices/userver/third_party/librseq/include/rseq/rseq-bits-template.h", - "third_party/librseq/include/rseq/rseq-thread-pointer.h":"taxi/uservices/userver/third_party/librseq/include/rseq/rseq-thread-pointer.h", - "third_party/librseq/include/rseq/rseq-x86-bits.h":"taxi/uservices/userver/third_party/librseq/include/rseq/rseq-x86-bits.h", - "third_party/librseq/include/rseq/rseq-x86-thread-pointer.h":"taxi/uservices/userver/third_party/librseq/include/rseq/rseq-x86-thread-pointer.h", - "third_party/librseq/include/rseq/rseq-x86.h":"taxi/uservices/userver/third_party/librseq/include/rseq/rseq-x86.h", + "third_party/librseq/include/rseq/inject.h":"taxi/uservices/userver/third_party/librseq/include/rseq/inject.h", + "third_party/librseq/include/rseq/mempool.h":"taxi/uservices/userver/third_party/librseq/include/rseq/mempool.h", + "third_party/librseq/include/rseq/pseudocode.h":"taxi/uservices/userver/third_party/librseq/include/rseq/pseudocode.h", "third_party/librseq/include/rseq/rseq.h":"taxi/uservices/userver/third_party/librseq/include/rseq/rseq.h", + "third_party/librseq/include/rseq/thread-pointer.h":"taxi/uservices/userver/third_party/librseq/include/rseq/thread-pointer.h", + "third_party/librseq/include/rseq/utils.h":"taxi/uservices/userver/third_party/librseq/include/rseq/utils.h", "third_party/librseq/src/config.h":"taxi/uservices/userver/third_party/librseq/src/config.h", + "third_party/librseq/src/list.h":"taxi/uservices/userver/third_party/librseq/src/list.h", + "third_party/librseq/src/rseq-mempool.c":"taxi/uservices/userver/third_party/librseq/src/rseq-mempool.c", + "third_party/librseq/src/rseq-utils.h":"taxi/uservices/userver/third_party/librseq/src/rseq-utils.h", "third_party/librseq/src/rseq.c":"taxi/uservices/userver/third_party/librseq/src/rseq.c", + "third_party/librseq/src/smp.c":"taxi/uservices/userver/third_party/librseq/src/smp.c", + "third_party/librseq/src/smp.h":"taxi/uservices/userver/third_party/librseq/src/smp.h", "third_party/llhttp/CMakeLists.txt":"taxi/uservices/userver/third_party/llhttp/CMakeLists.txt", "third_party/llhttp/LICENSE-MIT":"taxi/uservices/userver/third_party/llhttp/LICENSE-MIT", "third_party/llhttp/README.md":"taxi/uservices/userver/third_party/llhttp/README.md", diff --git a/core/src/concurrent/striped_counter.cpp b/core/src/concurrent/striped_counter.cpp index 2f38748f7b07..517dd004d416 100644 --- a/core/src/concurrent/striped_counter.cpp +++ b/core/src/concurrent/striped_counter.cpp @@ -6,14 +6,6 @@ #include #include -// rseq_addv was renamed into rseq_load_add_store__ptr, provide compatibility -// with the older versions -#if defined(__has_include) -#if __has_include() -#define rseq_load_add_store__ptr rseq_addv -#endif -#endif - USERVER_NAMESPACE_BEGIN namespace concurrent { diff --git a/third_party/librseq/CMakeLists.txt b/third_party/librseq/CMakeLists.txt index af801a211047..a25627344924 100644 --- a/third_party/librseq/CMakeLists.txt +++ b/third_party/librseq/CMakeLists.txt @@ -1,12 +1,17 @@ -project(userver-librseq) +project(userver-librseq C) SET(SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/src/rseq.c - ${CMAKE_CURRENT_SOURCE_DIR}/src/config.h) + src/rseq.c + src/rseq-mempool.c + src/smp.c +) add_library(${PROJECT_NAME} OBJECT ${SOURCES}) target_include_directories(${PROJECT_NAME} SYSTEM PUBLIC $ ) +target_compile_options(${PROJECT_NAME} PRIVATE -Wno-implicit-function-declaration) +target_compile_definitions(${PROJECT_NAME} PRIVATE _GNU_SOURCE) +set_target_properties(${PROJECT_NAME} PROPERTIES C_EXTENSIONS ON) _userver_install_targets(COMPONENT core TARGETS ${PROJECT_NAME}) diff --git a/third_party/librseq/README.md b/third_party/librseq/README.md index b6505e444777..d679305f1305 100644 --- a/third_party/librseq/README.md +++ b/third_party/librseq/README.md @@ -1,3 +1,5 @@ These headers are imported from https://github.com/compudj/librseq -Commit: https://github.com/compudj/librseq/commit/40797ae3069ce08b83c4221839b53e754105be78 +Commit: https://github.com/compudj/librseq/commit/1f62f2c757d10d55af2b9519a97f81ac1e5cb178 + +All platform libraries, except for x86, are stripped. diff --git a/third_party/librseq/include/rseq/rseq-abi.h b/third_party/librseq/include/rseq/abi.h similarity index 99% rename from third_party/librseq/include/rseq/rseq-abi.h rename to third_party/librseq/include/rseq/abi.h index 047b07f9dc74..1b12ebd8c9ef 100644 --- a/third_party/librseq/include/rseq/rseq-abi.h +++ b/third_party/librseq/include/rseq/abi.h @@ -4,9 +4,9 @@ #define _RSEQ_ABI_H /* - * rseq/rseq-abi.h + * rseq/abi.h * - * Restartable sequences system call API + * Restartable sequences system call ABI */ #include diff --git a/third_party/librseq/include/rseq/arch.h b/third_party/librseq/include/rseq/arch.h new file mode 100644 index 000000000000..58f7a82a15c9 --- /dev/null +++ b/third_party/librseq/include/rseq/arch.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2016-2022 Mathieu Desnoyers */ + +/* + * rseq/arch.h + */ + +#include + +/* + * Architecture detection using compiler defines. + * + * The following defines are used internally for architecture specific code. + * + * URCU_ARCH_X86 : All x86 variants 32 and 64 bits + * URCU_ARCH_I386 : Specific to the i386 + * URCU_ARCH_AMD64 : All 64 bits x86 variants + * + * URCU_ARCH_PPC : All PowerPC variants 32 and 64 bits + * URCU_ARCH_PPC64 : Specific to 64 bits variants + * + * URCU_ARCH_S390 : All IBM s390 / s390x variants + * URCU_ARCH_S390X : Specific to z/Architecture 64 bits + * + * URCU_ARCH_ARM : All ARM 32 bits variants + * URCU_ARCH_AARCH64 : All ARM 64 bits variants + * URCU_ARCH_MIPS : All MIPS variants + * URCU_ARCH_RISCV : All RISC-V variants + */ + +#ifndef _RSEQ_ARCH_H +#define _RSEQ_ARCH_H + +#if (defined(__amd64__) \ + || defined(__amd64) \ + || defined(__x86_64__) \ + || defined(__x86_64)) + +#define RSEQ_ARCH_X86 1 +#define RSEQ_ARCH_AMD64 1 +#include + +#elif (defined(__i386__) || defined(__i386)) + +#define RSEQ_ARCH_X86 1 +#include + +#elif (defined(__arm__) || defined(__arm)) + +#define RSEQ_ARCH_ARM 1 +#include + +#elif defined(__aarch64__) + +#define RSEQ_ARCH_AARCH64 1 +#include + +#elif (defined(__powerpc64__) || defined(__ppc64__)) + +#define RSEQ_ARCH_PPC 1 +#define RSEQ_ARCH_PPC64 1 +#include + +#elif (defined(__powerpc__) \ + || defined(__powerpc) \ + || defined(__ppc__)) + +#define RSEQ_ARCH_PPC 1 +#include + +#elif (defined(__mips__) || defined(__mips)) + +#define RSEQ_ARCH_MIPS 1 +#include + +#elif defined(__s390__) + +# if (defined(__s390x__) || defined(__zarch__)) +# define RSEQ_ARCH_S390X 1 +# endif + +#define RSEQ_ARCH_S390 1 +#include + +#elif defined(__riscv) + +#define RSEQ_ARCH_RISCV 1 +#include + +#else +#error "Cannot build: unrecognized architecture, see ." +#endif + +#endif /* _RSEQ_ARCH_H */ diff --git a/third_party/librseq/include/rseq/arch/aarch64.h b/third_party/librseq/include/rseq/arch/aarch64.h new file mode 100644 index 000000000000..cd1d4de5404d --- /dev/null +++ b/third_party/librseq/include/rseq/arch/aarch64.h @@ -0,0 +1,272 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2016-2024 Mathieu Desnoyers */ +/* SPDX-FileCopyrightText: 2018 Will Deacon */ + +/* + * rseq/arch/aarch64.h + */ + +#ifndef _RSEQ_RSEQ_H +#error "Never use directly; include instead." +#endif + +/* + * RSEQ_ASM_*() macro helpers are internal to the librseq headers. Those + * are not part of the public API. + */ + +/* + * aarch64 -mbig-endian generates mixed endianness code vs data: + * little-endian code and big-endian data. Ensure the RSEQ_SIG signature + * matches code endianness. + */ +#define RSEQ_SIG_CODE 0xd428bc00 /* BRK #0x45E0. */ + +#ifdef __AARCH64EB__ /* Big endian */ +# define RSEQ_SIG_DATA 0x00bc28d4 /* BRK #0x45E0. */ +#else /* Little endian */ +# define RSEQ_SIG_DATA RSEQ_SIG_CODE +#endif + +#define RSEQ_SIG RSEQ_SIG_DATA + +/* + * Refer to the Linux kernel memory model (LKMM) for documentation of + * the memory barriers. + */ + +/* CPU memory barrier. */ +#define rseq_smp_mb() __asm__ __volatile__ ("dmb ish" ::: "memory") +/* CPU read memory barrier */ +#define rseq_smp_rmb() __asm__ __volatile__ ("dmb ishld" ::: "memory") +/* CPU write memory barrier */ +#define rseq_smp_wmb() __asm__ __volatile__ ("dmb ishst" ::: "memory") + +/* Acquire: One-way permeable barrier. */ +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + union { rseq_unqual_scalar_typeof(*(p)) __val; char __c[sizeof(*(p))]; } __u; \ + switch (sizeof(*(p))) { \ + case 1: \ + __asm__ __volatile__ ("ldarb %w0, %1" \ + : "=r" (*(__u8 *)__u.__c) \ + : "Q" (*(p)) : "memory"); \ + break; \ + case 2: \ + __asm__ __volatile__ ("ldarh %w0, %1" \ + : "=r" (*(__u16 *)__u.__c) \ + : "Q" (*(p)) : "memory"); \ + break; \ + case 4: \ + __asm__ __volatile__ ("ldar %w0, %1" \ + : "=r" (*(__u32 *)__u.__c) \ + : "Q" (*(p)) : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ("ldar %0, %1" \ + : "=r" (*(__u64 *)__u.__c) \ + : "Q" (*(p)) : "memory"); \ + break; \ + } \ + (rseq_unqual_scalar_typeof(*(p)))__u.__val; \ +}) + +/* Acquire barrier after control dependency. */ +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +/* Release: One-way permeable barrier. */ +#define rseq_smp_store_release(p, v) \ +do { \ + union { rseq_unqual_scalar_typeof(*(p)) __val; char __c[sizeof(*(p))]; } __u = \ + { .__val = (rseq_unqual_scalar_typeof(*(p))) (v) }; \ + switch (sizeof(*(p))) { \ + case 1: \ + __asm__ __volatile__ ("stlrb %w1, %0" \ + : "=Q" (*(p)) \ + : "r" (*(__u8 *)__u.__c) \ + : "memory"); \ + break; \ + case 2: \ + __asm__ __volatile__ ("stlrh %w1, %0" \ + : "=Q" (*(p)) \ + : "r" (*(__u16 *)__u.__c) \ + : "memory"); \ + break; \ + case 4: \ + __asm__ __volatile__ ("stlr %w1, %0" \ + : "=Q" (*(p)) \ + : "r" (*(__u32 *)__u.__c) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ("stlr %1, %0" \ + : "=Q" (*(p)) \ + : "r" (*(__u64 *)__u.__c) \ + : "memory"); \ + break; \ + } \ +} while (0) + +#define RSEQ_ASM_U64_PTR(x) ".quad " x +#define RSEQ_ASM_U32(x) ".long " x + +/* Temporary scratch registers. */ +#define RSEQ_ASM_TMP_REG32 "w15" +#define RSEQ_ASM_TMP_REG "x15" +#define RSEQ_ASM_TMP_REG_2 "x14" + +/* Common architecture support macros. */ +#include "rseq/arch/generic/common.h" + +/* + * Define a critical section abort handler. + * + * @label: + * Local label to the abort handler. + * @teardown: + * Sequence of instructions to run on abort. + * @abort_label: + * C label to jump to at the end of the sequence. + */ +#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \ + " b 222f\n" \ + " .inst " __rseq_str(RSEQ_SIG_CODE) "\n" \ + __rseq_str(label) ":\n" \ + teardown \ + " b %l[" __rseq_str(abort_label) "]\n" \ + "222:\n" + +/* Jump to local label @label when @cpu_id != @current_cpu_id. */ +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + " adrp " RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n" \ + " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ + ", :lo12:" __rseq_str(cs_label) "\n" \ + " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n" \ + __rseq_str(label) ":\n" + +/* Store @value to address @var. */ +#define RSEQ_ASM_OP_STORE(value, var) \ + " str %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" + +/* Store-release @value to address @var. */ +#define RSEQ_ASM_OP_STORE_RELEASE(value, var) \ + " stlr %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" + +/* + * End-of-sequence store of @value to address @var. Emit + * @post_commit_label label after the store instruction. + */ +#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ + RSEQ_ASM_OP_STORE(value, var) \ + __rseq_str(post_commit_label) ":\n" + +/* + * End-of-sequence store-release of @value to address @var. Emit + * @post_commit_label label after the store instruction. + */ +#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label) \ + RSEQ_ASM_OP_STORE_RELEASE(value, var) \ + __rseq_str(post_commit_label) ":\n" + +/* Jump to local label @label when @var != @expect. */ +#define RSEQ_ASM_OP_CBNE(var, expect, label) \ + " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \ + " sub " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ + ", %[" __rseq_str(expect) "]\n" \ + " cbnz " RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n" + +/* + * Jump to local label @label when @var != @expect (32-bit register + * comparison). + */ +#define RSEQ_ASM_OP_CBNE32(var, expect, label) \ + " ldr " RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n" \ + " sub " RSEQ_ASM_TMP_REG32 ", " RSEQ_ASM_TMP_REG32 \ + ", %w[" __rseq_str(expect) "]\n" \ + " cbnz " RSEQ_ASM_TMP_REG32 ", " __rseq_str(label) "\n" + +/* Jump to local label @label when @var == @expect. */ +#define RSEQ_ASM_OP_CBEQ(var, expect, label) \ + " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \ + " sub " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ + ", %[" __rseq_str(expect) "]\n" \ + " cbz " RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n" + +/* Jump to local label @label when @cpu_id != @current_cpu_id. */ +#define RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + RSEQ_ASM_OP_CBNE32(current_cpu_id, cpu_id, label) + +/* Load @var into temporary register. */ +#define RSEQ_ASM_OP_R_LOAD(var) \ + " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" + +/* Store from temporary register into @var. */ +#define RSEQ_ASM_OP_R_STORE(var) \ + " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" + +/* Load from address in temporary register+@offset into temporary register. */ +#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \ + " ldr " RSEQ_ASM_TMP_REG ", [" RSEQ_ASM_TMP_REG \ + ", %[" __rseq_str(offset) "]]\n" + +/* Add @count to temporary register. */ +#define RSEQ_ASM_OP_R_ADD(count) \ + " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ + ", %[" __rseq_str(count) "]\n" + +/* + * End-of-sequence store of temporary register to address @var. Emit + * @post_commit_label label after the store instruction. + */ +#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ + " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \ + __rseq_str(post_commit_label) ":\n" + +/* + * Copy @len bytes from @src to @dst. This is an inefficient bytewise + * copy and could be improved in the future. + */ +#define RSEQ_ASM_OP_R_BYTEWISE_MEMCPY(dst, src, len) \ + " cbz %[" __rseq_str(len) "], 333f\n" \ + " mov " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(len) "]\n" \ + "222: sub " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", #1\n" \ + " ldrb " RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(src) "]" \ + ", " RSEQ_ASM_TMP_REG_2 "]\n" \ + " strb " RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(dst) "]" \ + ", " RSEQ_ASM_TMP_REG_2 "]\n" \ + " cbnz " RSEQ_ASM_TMP_REG_2 ", 222b\n" \ + "333:\n" + +/* Per-cpu-id indexing. */ + +#define RSEQ_TEMPLATE_INDEX_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/aarch64/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/aarch64/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_CPU_ID + +/* Per-mm-cid indexing. */ + +#define RSEQ_TEMPLATE_INDEX_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/aarch64/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/aarch64/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_MM_CID + +/* APIs which are not indexed. */ + +#define RSEQ_TEMPLATE_INDEX_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/aarch64/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_INDEX_NONE diff --git a/third_party/librseq/include/rseq/arch/aarch64/bits.h b/third_party/librseq/include/rseq/arch/aarch64/bits.h new file mode 100644 index 000000000000..a6a92a477492 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/aarch64/bits.h @@ -0,0 +1,398 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2016-2018 Mathieu Desnoyers */ +/* SPDX-FileCopyrightText: 2018 Will Deacon */ + +/* + * rseq/arch/aarch64/bits.h + */ + +#include "rseq/arch/templates/bits.h" + +/* + * Refer to rseq/pseudocode.h for documentation and pseudo-code of the + * rseq critical section helpers. + */ +#include "rseq/pseudocode.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CBNE(v, expect, %l[ne]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CBNE(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "Qo" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbeq_store_add_load_store__ptr)(intptr_t *v, intptr_t expectnot, + long voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[eq]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CBEQ(v, expectnot, %l[eq]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CBEQ(v, expectnot, %l[error2]) +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_STORE(load) + RSEQ_ASM_OP_R_LOAD_OFF(voffp) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "Qo" (*v), + [expectnot] "r" (expectnot), + [load] "Qo" (*load), + [voffp] "r" (voffp) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, eq +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +eq: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_add_store__ptr)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_ADD(count) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "Qo" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error3]) +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CBNE(v, expect, %l[ne]) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_OP_CBNE(v2, expect2, %l[ne]) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CBNE(v, expect, %l[error2]) + RSEQ_ASM_OP_CBNE(v2, expect2, %l[error3]) +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "Qo" (*v), + [expect] "r" (expect), + [v2] "Qo" (*v2), + [expect2] "r" (expect2), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +error3: + rseq_after_asm_goto(); + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store_store__ptr)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CBNE(v, expect, %l[ne]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CBNE(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) +#else + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) +#endif + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [expect] "r" (expect), + [v] "Qo" (*v), + [newv] "r" (newv), + [v2] "Qo" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_memcpy_store__ptr)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CBNE(v, expect, %l[ne]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CBNE(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_R_BYTEWISE_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) +#else + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) +#endif + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [expect] "r" (expect), + [v] "Qo" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2 + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ + +#include "rseq/arch/templates/bits-reset.h" diff --git a/third_party/librseq/include/rseq/arch/arm.h b/third_party/librseq/include/rseq/arch/arm.h new file mode 100644 index 000000000000..d4163370ba44 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/arm.h @@ -0,0 +1,221 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2016-2024 Mathieu Desnoyers */ + +/* + * rseq/arch/arm.h + */ + +#ifndef _RSEQ_RSEQ_H +#error "Never use directly; include instead." +#endif + +/* + * RSEQ_ASM_*() macro helpers are internal to the librseq headers. Those + * are not part of the public API. + */ + +/* + * - ARM little endian + * + * RSEQ_SIG uses the udf A32 instruction with an uncommon immediate operand + * value 0x5de3. This traps if user-space reaches this instruction by mistake, + * and the uncommon operand ensures the kernel does not move the instruction + * pointer to attacker-controlled code on rseq abort. + * + * The instruction pattern in the A32 instruction set is: + * + * e7f5def3 udf #24035 ; 0x5de3 + * + * This translates to the following instruction pattern in the T16 instruction + * set: + * + * little endian: + * def3 udf #243 ; 0xf3 + * e7f5 b.n <7f5> + * + * - ARMv6+ big endian (BE8): + * + * ARMv6+ -mbig-endian generates mixed endianness code vs data: little-endian + * code and big-endian data. The data value of the signature needs to have its + * byte order reversed to generate the trap instruction: + * + * Data: 0xf3def5e7 + * + * Translates to this A32 instruction pattern: + * + * e7f5def3 udf #24035 ; 0x5de3 + * + * Translates to this T16 instruction pattern: + * + * def3 udf #243 ; 0xf3 + * e7f5 b.n <7f5> + * + * - Prior to ARMv6 big endian (BE32): + * + * Prior to ARMv6, -mbig-endian generates big-endian code and data + * (which match), so the endianness of the data representation of the + * signature should not be reversed. However, the choice between BE32 + * and BE8 is done by the linker, so we cannot know whether code and + * data endianness will be mixed before the linker is invoked. So rather + * than try to play tricks with the linker, the rseq signature is simply + * data (not a trap instruction) prior to ARMv6 on big endian. This is + * why the signature is expressed as data (.word) rather than as + * instruction (.inst) in assembler. + */ + +#ifdef __ARMEB__ /* Big endian */ +# define RSEQ_SIG 0xf3def5e7 /* udf #24035 ; 0x5de3 (ARMv6+) */ +#else /* Little endian */ +# define RSEQ_SIG 0xe7f5def3 /* udf #24035 ; 0x5de3 */ +#endif + +/* + * Refer to the Linux kernel memory model (LKMM) for documentation of + * the memory barriers. + */ + +/* CPU memory barrier. */ +#define rseq_smp_mb() __asm__ __volatile__ ("dmb" ::: "memory", "cc") +/* CPU read memory barrier */ +#define rseq_smp_rmb() __asm__ __volatile__ ("dmb" ::: "memory", "cc") +/* CPU write memory barrier */ +#define rseq_smp_wmb() __asm__ __volatile__ ("dmb" ::: "memory", "cc") + +/* Acquire: One-way permeable barrier. */ +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ + rseq_smp_mb(); \ + ____p1; \ +}) + +/* Acquire barrier after control dependency. */ +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +/* Release: One-way permeable barrier. */ +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_smp_mb(); \ + RSEQ_WRITE_ONCE(*(p), v); \ +} while (0) + +#ifdef __ARMEB__ /* Big endian */ +# define RSEQ_ASM_U64_PTR(x) ".word 0x0, " x +#else /* Little endian */ +# define RSEQ_ASM_U64_PTR(x) ".word " x ", 0x0" +#endif + +#define RSEQ_ASM_U32(x) ".word " x + +/* Common architecture support macros. */ +#include "rseq/arch/generic/common.h" + +/* + * Store the address of the critical section descriptor structure at + * @cs_label into the @rseq_cs pointer and emit the label @label, which + * is the beginning of the sequence of consecutive assembly instructions. + * + * @label: + * Local label to the beginning of the sequence of consecutive assembly + * instructions. + * @cs_label: + * Source local label to the critical section descriptor structure. + * @rseq_cs: + * Destination pointer where to store the address of the critical + * section descriptor structure. + */ +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "adr r0, " __rseq_str(cs_label) "\n\t" \ + "str r0, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" + +/* Only used in RSEQ_ASM_DEFINE_ABORT. */ +#define __RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label, \ + table_label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".balign 32\n\t" \ + __rseq_str(table_label) ":\n\t" \ + __RSEQ_ASM_DEFINE_CS_FIELDS(version, flags, \ + start_ip, post_commit_offset, abort_ip) "\n\t" \ + RSEQ_ASM_U32(__rseq_str(RSEQ_SIG)) "\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "b %l[" __rseq_str(abort_label) "]\n\t" + +/* + * Define a critical section abort handler. + * + * @label: + * Local label to the abort handler. + * @teardown: + * Sequence of instructions to run on abort. + * @abort_label: + * C label to jump to at the end of the sequence. + * @table_label: + * Local label to the critical section descriptor copy placed near + * the program counter. This is done for performance reasons because + * computing this address is faster than accessing the program data. + * + * The purpose of @start_ip, @post_commit_ip, and @abort_ip are + * documented in RSEQ_ASM_DEFINE_TABLE. + */ +#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label, \ + table_label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label, \ + table_label, 0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +/* + * Define a critical section teardown handler. + * + * @label: + * Local label to the teardown handler. + * @teardown: + * Sequence of instructions to run on teardown. + * @target_label: + * C label to jump to at the end of the sequence. + */ +#define RSEQ_ASM_DEFINE_TEARDOWN(label, teardown, target_label) \ + __rseq_str(label) ":\n\t" \ + teardown \ + "b %l[" __rseq_str(target_label) "]\n\t" + +/* Jump to local label @label when @cpu_id != @current_cpu_id. */ +#define RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + "ldr r0, %[" __rseq_str(current_cpu_id) "]\n\t" \ + "cmp %[" __rseq_str(cpu_id) "], r0\n\t" \ + "bne " __rseq_str(label) "\n\t" + +/* Per-cpu-id indexing. */ + +#define RSEQ_TEMPLATE_INDEX_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/arm/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/arm/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_CPU_ID + +/* Per-mm-cid indexing. */ + +#define RSEQ_TEMPLATE_INDEX_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/arm/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/arm/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_MM_CID + +/* APIs which are not indexed. */ + +#define RSEQ_TEMPLATE_INDEX_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/arm/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_INDEX_NONE diff --git a/third_party/librseq/include/rseq/arch/arm/bits.h b/third_party/librseq/include/rseq/arch/arm/bits.h new file mode 100644 index 000000000000..b41da94812b7 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/arm/bits.h @@ -0,0 +1,500 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2016-2018 Mathieu Desnoyers */ + +/* + * rseq/arch/arm/bits.h + */ + +#include "rseq/arch/templates/bits.h" + +/* + * Refer to rseq/pseudocode.h for documentation and pseudo-code of the + * rseq critical section helpers. + */ +#include "rseq/pseudocode.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[ne]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[error2]\n\t" +#endif + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(4, "", abort, 3, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbeq_store_add_load_store__ptr)(intptr_t *v, intptr_t expectnot, + long voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[eq]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expectnot], r0\n\t" + "beq %l[eq]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expectnot], r0\n\t" + "beq %l[error2]\n\t" +#endif + "str r0, %[load]\n\t" + "add r0, %[voffp]\n\t" + "ldr r0, [r0]\n\t" + /* final store */ + "str r0, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(4, "", abort, 3, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "Ir" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, eq +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +eq: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_add_store__ptr)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + "ldr r0, %[v]\n\t" + "add r0, %[count]\n\t" + /* final store */ + "str r0, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(4, "", abort, 3, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [count] "Ir" (count) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[ne]\n\t" + RSEQ_INJECT_ASM(4) + "ldr r0, %[v2]\n\t" + "cmp %[expect2], r0\n\t" + "bne %l[ne]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[error2]\n\t" + "ldr r0, %[v2]\n\t" + "cmp %[expect2], r0\n\t" + "bne %l[error3]\n\t" +#endif + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(4, "", abort, 3, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("1st expected value comparison failed"); +error3: + rseq_after_asm_goto(); + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store_store__ptr)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[ne]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[error2]\n\t" +#endif + /* try store */ + "str %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + "dmb\n\t" /* full mb provides store-release */ +#endif + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(4, "", abort, 3, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_memcpy_store__ptr)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + /* + * Work-around register pressure limitations. + * Old gcc does not support output operands for asm goto, so + * input registers cannot simply be re-used as output registers. + * This is why clobbered registers are used. + */ + struct rseq_local { + uint32_t expect, dst, src, len, newv; + } rseq_local = { + .expect = (uint32_t) expect, + .dst = (uint32_t) dst, + .src = (uint32_t) src, + .len = (uint32_t) len, + .newv = (uint32_t) newv, + }; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + /* load expect into r5 */ + "ldr r5, %[expect]\n\t" + "cmp r5, r0\n\t" + "bne %l[ne]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp r5, r0\n\t" + "bne %l[error2]\n\t" +#endif + /* load dst into r5 */ + "ldr r5, %[dst]\n\t" + /* load src into r6 */ + "ldr r6, %[src]\n\t" + /* load len into r7 */ + "ldr r7, %[len]\n\t" + /* try memcpy */ + "cmp r7, #0\n\t" + "beq 333f\n\t" + "222:\n\t" + "ldrb %%r0, [r6]\n\t" + "strb %%r0, [r5]\n\t" + "adds r6, #1\n\t" + "adds r5, #1\n\t" + "subs r7, #1\n\t" + "bne 222b\n\t" + "333:\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + "dmb\n\t" /* full mb provides store-release */ +#endif + /* load newv into r5 */ + "ldr r5, %[newv]\n\t" + /* final store */ + "str r5, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(4, "", abort, 3, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + /* try memcpy input */ + [expect] "m" (rseq_local.expect), /* r5 */ + [dst] "m" (rseq_local.dst), /* r5 */ + [src] "m" (rseq_local.src), /* r6 */ + [len] "m" (rseq_local.len), /* r7 */ + [newv] "m" (rseq_local.newv) /* r5 */ + RSEQ_INJECT_INPUT + : "r0", "r5", "r6", "r7", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ + +#include "rseq/arch/templates/bits-reset.h" diff --git a/third_party/librseq/include/rseq/arch/generic/common.h b/third_party/librseq/include/rseq/arch/generic/common.h new file mode 100644 index 000000000000..8a912b2517cf --- /dev/null +++ b/third_party/librseq/include/rseq/arch/generic/common.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2024 Mathieu Desnoyers */ + +/* + * rseq/arch/generic/common.h: Common architecture support macros. + */ + +#ifndef _RSEQ_GENERIC_COMMON_H +#define _RSEQ_GENERIC_COMMON_H + +/* + * Define the rseq critical section descriptor fields. + */ + #define __RSEQ_ASM_DEFINE_CS_FIELDS(version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + RSEQ_ASM_U32(__rseq_str(version)) "\n\t" \ + RSEQ_ASM_U32(__rseq_str(flags)) "\n\t" \ + RSEQ_ASM_U64_PTR(__rseq_str(start_ip)) "\n\t" \ + RSEQ_ASM_U64_PTR(__rseq_str(post_commit_offset)) "\n\t" \ + RSEQ_ASM_U64_PTR(__rseq_str(abort_ip)) + +/* Only used in RSEQ_ASM_DEFINE_TABLE. */ +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".pushsection __rseq_cs, \"aw\"\n\t" \ + ".balign 32\n\t" \ + __rseq_str(label) ":\n\t" \ + __RSEQ_ASM_DEFINE_CS_FIELDS(version, flags, \ + start_ip, post_commit_offset, abort_ip) "\n\t" \ + RSEQ_ASM_U32(__rseq_str(version)) "\n\t" \ + RSEQ_ASM_U32(__rseq_str(flags)) "\n\t" \ + RSEQ_ASM_U64_PTR(__rseq_str(start_ip)) "\n\t" \ + RSEQ_ASM_U64_PTR(__rseq_str(post_commit_offset)) "\n\t" \ + RSEQ_ASM_U64_PTR(__rseq_str(abort_ip)) "\n\t" \ + ".popsection\n\t" \ + ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \ + RSEQ_ASM_U64_PTR(__rseq_str(label) "b") "\n\t" \ + ".popsection\n\t" + +/* + * Define an rseq critical section structure of version 0 with no flags. + * + * @label: + * Local label for the beginning of the critical section descriptor + * structure. + * @start_ip: + * Pointer to the first instruction of the sequence of consecutive assembly + * instructions. + * @post_commit_ip: + * Pointer to the instruction after the last instruction of the sequence of + * consecutive assembly instructions. + * @abort_ip: + * Pointer to the instruction where to move the execution flow in case of + * abort of the sequence of consecutive assembly instructions. + */ +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ + (post_commit_ip) - (start_ip), abort_ip) + +/* + * Define the @exit_ip pointer as an exit point for the sequence of consecutive + * assembly instructions at @start_ip. + * + * @start_ip: + * Pointer to the first instruction of the sequence of consecutive assembly + * instructions. + * @exit_ip: + * Pointer to an exit point instruction. + * + * Exit points of a rseq critical section consist of all instructions outside + * of the critical section where a critical section can either branch to or + * reach through the normal course of its execution. The abort IP and the + * post-commit IP are already part of the __rseq_cs section and should not be + * explicitly defined as additional exit points. Knowing all exit points is + * useful to assist debuggers stepping over the critical section. + */ +#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \ + ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \ + RSEQ_ASM_U64_PTR(__rseq_str(start_ip)) "\n\t" \ + RSEQ_ASM_U64_PTR(__rseq_str(exit_ip)) "\n\t" \ + ".popsection\n\t" + +#endif diff --git a/third_party/librseq/include/rseq/arch/generic/thread-pointer.h b/third_party/librseq/include/rseq/arch/generic/thread-pointer.h new file mode 100644 index 000000000000..300dfaaf9718 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/generic/thread-pointer.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2021 Mathieu Desnoyers */ + +/* + * rseq/arch/generic/thread-pointer.h + */ + +#ifndef _RSEQ_GENERIC_THREAD_POINTER_H +#define _RSEQ_GENERIC_THREAD_POINTER_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Use gcc builtin thread pointer. */ +static inline __attribute__((always_inline)) +void *rseq_thread_pointer(void) +{ + return __builtin_thread_pointer(); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/librseq/include/rseq/arch/mips.h b/third_party/librseq/include/rseq/arch/mips.h new file mode 100644 index 000000000000..a811a0726bdb --- /dev/null +++ b/third_party/librseq/include/rseq/arch/mips.h @@ -0,0 +1,232 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2018 MIPS Tech LLC */ +/* SPDX-FileCopyrightText: 2016-2024 Mathieu Desnoyers */ + +/* + * Author: Paul Burton + */ + +#ifndef _RSEQ_RSEQ_H +#error "Never use directly; include instead." +#endif + +#include + +/* + * RSEQ_ASM_*() macro helpers are internal to the librseq headers. Those + * are not part of the public API. + */ + +#if (RSEQ_BITS_PER_LONG != 64) && (RSEQ_BITS_PER_LONG != 32) +# error unsupported RSEQ_BITS_PER_LONG +#endif + +/* + * RSEQ_SIG uses the break instruction. The instruction pattern is: + * + * On MIPS: + * 0350000d break 0x350 + * + * On nanoMIPS: + * 00100350 break 0x350 + * + * On microMIPS: + * 0000d407 break 0x350 + * + * For nanoMIPS32 and microMIPS, the instruction stream is encoded as 16-bit + * halfwords, so the signature halfwords need to be swapped accordingly for + * little-endian. + */ +#if defined(__nanomips__) +# ifdef __MIPSEL__ +# define RSEQ_SIG 0x03500010 +# else +# define RSEQ_SIG 0x00100350 +# endif +#elif defined(__mips_micromips) +# ifdef __MIPSEL__ +# define RSEQ_SIG 0xd4070000 +# else +# define RSEQ_SIG 0x0000d407 +# endif +#elif defined(__mips__) +# define RSEQ_SIG 0x0350000d +#else +/* Unknown MIPS architecture. */ +#endif + +/* + * Refer to the Linux kernel memory model (LKMM) for documentation of + * the memory barriers. + */ + +/* CPU memory barrier. */ +#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory") +/* CPU read memory barrier */ +#define rseq_smp_rmb() rseq_smp_mb() +/* CPU write memory barrier */ +#define rseq_smp_wmb() rseq_smp_mb() + +/* Acquire: One-way permeable barrier. */ +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ + rseq_smp_mb(); \ + ____p1; \ +}) + +/* Acquire barrier after control dependency. */ +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +/* Release: One-way permeable barrier. */ +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_smp_mb(); \ + RSEQ_WRITE_ONCE(*(p), v); \ +} while (0) + +/* + * Helper macros to define and access a variable of long integer type. + * Only used internally in rseq headers. + */ +#if RSEQ_BITS_PER_LONG == 64 +# define RSEQ_ASM_LONG ".dword" +# define RSEQ_ASM_LONG_LA "dla" +# define RSEQ_ASM_LONG_L "ld" +# define RSEQ_ASM_LONG_S "sd" +# define RSEQ_ASM_LONG_ADDI "daddiu" +#else +# define RSEQ_ASM_LONG ".word" +# define RSEQ_ASM_LONG_LA "la" +# define RSEQ_ASM_LONG_L "lw" +# define RSEQ_ASM_LONG_S "sw" +# define RSEQ_ASM_LONG_ADDI "addiu" +#endif + +/* + * Helper macros to define a variable of pointer type stored in a 64-bit + * integer. Only used internally in rseq headers. + */ +#if RSEQ_BITS_PER_LONG == 64 +# define RSEQ_ASM_U64_PTR(x) ".dword " x +#else +# if defined(__BYTE_ORDER) ? (__BYTE_ORDER == __BIG_ENDIAN) : defined(__BIG_ENDIAN) +# define RSEQ_ASM_U64_PTR(x) ".word 0x0, " x +# else +# define RSEQ_ASM_U64_PTR(x) ".word " x ", 0x0" +# endif +#endif + +#define RSEQ_ASM_U32(x) ".word " x + +/* Common architecture support macros. */ +#include "rseq/arch/generic/common.h" + +/* Only used in RSEQ_ASM_DEFINE_ABORT. */ +#define __RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label, \ + table_label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".balign 32\n\t" \ + __rseq_str(table_label) ":\n\t" \ + __RSEQ_ASM_DEFINE_CS_FIELDS(version, flags, \ + start_ip, post_commit_offset, abort_ip) "\n\t" \ + RSEQ_ASM_U32(__rseq_str(RSEQ_SIG)) "\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "b %l[" __rseq_str(abort_label) "]\n\t" + +/* + * Define a critical section abort handler. + * + * @label: + * Local label to the abort handler. + * @teardown: + * Sequence of instructions to run on abort. + * @abort_label: + * C label to jump to at the end of the sequence. + * @table_label: + * Local label to the critical section descriptor copy placed near + * the program counter. This is done for performance reasons because + * computing this address is faster than accessing the program data. + * + * The purpose of @start_ip, @post_commit_ip, and @abort_ip are + * documented in RSEQ_ASM_DEFINE_TABLE. + */ +#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label, \ + table_label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label, \ + table_label, 0x0, 0x0, start_ip, \ + (post_commit_ip) - (start_ip), abort_ip) + +/* + * Define a critical section teardown handler. + * + * @label: + * Local label to the teardown handler. + * @teardown: + * Sequence of instructions to run on teardown. + * @target_label: + * C label to jump to at the end of the sequence. + */ +#define RSEQ_ASM_DEFINE_TEARDOWN(label, teardown, target_label) \ + __rseq_str(label) ":\n\t" \ + teardown \ + "b %l[" __rseq_str(target_label) "]\n\t" + +/* + * Store the address of the critical section descriptor structure at + * @cs_label into the @rseq_cs pointer and emit the label @label, which + * is the beginning of the sequence of consecutive assembly instructions. + * + * @label: + * Local label to the beginning of the sequence of consecutive assembly + * instructions. + * @cs_label: + * Source local label to the critical section descriptor structure. + * @rseq_cs: + * Destination pointer where to store the address of the critical + * section descriptor structure. + */ +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + RSEQ_ASM_LONG_LA " $4, " __rseq_str(cs_label) "\n\t" \ + RSEQ_ASM_LONG_S " $4, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" + +/* Jump to local label @label when @cpu_id != @current_cpu_id. */ +#define RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + "lw $4, %[" __rseq_str(current_cpu_id) "]\n\t" \ + "bne $4, %[" __rseq_str(cpu_id) "], " __rseq_str(label) "\n\t" + +/* Per-cpu-id indexing. */ + +#define RSEQ_TEMPLATE_INDEX_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/mips/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/mips/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_CPU_ID + +/* Per-mm-cid indexing. */ + +#define RSEQ_TEMPLATE_INDEX_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/mips/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/mips/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_MM_CID + +/* APIs which are not indexed. */ + +#define RSEQ_TEMPLATE_INDEX_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/mips/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_INDEX_NONE diff --git a/third_party/librseq/include/rseq/arch/mips/bits.h b/third_party/librseq/include/rseq/arch/mips/bits.h new file mode 100644 index 000000000000..b72353f0823a --- /dev/null +++ b/third_party/librseq/include/rseq/arch/mips/bits.h @@ -0,0 +1,469 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2018 MIPS Tech LLC */ +/* SPDX-FileCopyrightText: 2016-2018 Mathieu Desnoyers */ + +/* + * Author: Paul Burton + */ + +#include "rseq/arch/templates/bits.h" + +/* + * Refer to rseq/pseudocode.h for documentation and pseudo-code of the + * rseq critical section helpers. + */ +#include "rseq/pseudocode.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[ne]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" +#endif + /* final store */ + RSEQ_ASM_LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(4, "", abort, 3, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +ne: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbeq_store_add_load_store__ptr)(intptr_t *v, intptr_t expectnot, + long voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[eq]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_LONG_L " $4, %[v]\n\t" + "beq $4, %[expectnot], %l[eq]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_LONG_L " $4, %[v]\n\t" + "beq $4, %[expectnot], %l[error2]\n\t" +#endif + RSEQ_ASM_LONG_S " $4, %[load]\n\t" + RSEQ_ASM_LONG_ADDI " $4, %[voffp]\n\t" + RSEQ_ASM_LONG_L " $4, 0($4)\n\t" + /* final store */ + RSEQ_ASM_LONG_S " $4, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(4, "", abort, 3, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "Ir" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, eq +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +eq: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_add_store__ptr)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + RSEQ_ASM_LONG_L " $4, %[v]\n\t" + RSEQ_ASM_LONG_ADDI " $4, %[count]\n\t" + /* final store */ + RSEQ_ASM_LONG_S " $4, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(4, "", abort, 3, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [count] "Ir" (count) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[ne]\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_LONG_L " $4, %[v2]\n\t" + "bne $4, %[expect2], %l[ne]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" + RSEQ_ASM_LONG_L " $4, %[v2]\n\t" + "bne $4, %[expect2], %l[error3]\n\t" +#endif + /* final store */ + RSEQ_ASM_LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(4, "", abort, 3, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +ne: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("1st expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store_store__ptr)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[ne]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" +#endif + /* try store */ + RSEQ_ASM_LONG_S " %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + "sync\n\t" /* full sync provides store-release */ +#endif + /* final store */ + RSEQ_ASM_LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(4, "", abort, 3, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +ne: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_memcpy_store__ptr)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uintptr_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + RSEQ_ASM_LONG_S " %[src], %[rseq_scratch0]\n\t" + RSEQ_ASM_LONG_S " %[dst], %[rseq_scratch1]\n\t" + RSEQ_ASM_LONG_S " %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 6f) + RSEQ_ASM_LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], 7f\n\t" +#endif + /* try memcpy */ + "beqz %[len], 333f\n\t" \ + "222:\n\t" \ + "lb $4, 0(%[src])\n\t" \ + "sb $4, 0(%[dst])\n\t" \ + RSEQ_ASM_LONG_ADDI " %[src], 1\n\t" \ + RSEQ_ASM_LONG_ADDI " %[dst], 1\n\t" \ + RSEQ_ASM_LONG_ADDI " %[len], -1\n\t" \ + "bnez %[len], 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + "sync\n\t" /* full sync provides store-release */ +#endif + /* final store */ + RSEQ_ASM_LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + RSEQ_ASM_LONG_L " %[len], %[rseq_scratch2]\n\t" + RSEQ_ASM_LONG_L " %[dst], %[rseq_scratch1]\n\t" + RSEQ_ASM_LONG_L " %[src], %[rseq_scratch0]\n\t" + "b 8f\n\t" + RSEQ_ASM_DEFINE_ABORT(4, + /* teardown */ + RSEQ_ASM_LONG_L " %[len], %[rseq_scratch2]\n\t" + RSEQ_ASM_LONG_L " %[dst], %[rseq_scratch1]\n\t" + RSEQ_ASM_LONG_L " %[src], %[rseq_scratch0]\n\t", + abort, 3, 1b, 2b, 4f) + RSEQ_ASM_DEFINE_TEARDOWN(5, + /* teardown */ + RSEQ_ASM_LONG_L " %[len], %[rseq_scratch2]\n\t" + RSEQ_ASM_LONG_L " %[dst], %[rseq_scratch1]\n\t" + RSEQ_ASM_LONG_L " %[src], %[rseq_scratch0]\n\t", + ne) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_TEARDOWN(6, + /* teardown */ + RSEQ_ASM_LONG_L " %[len], %[rseq_scratch2]\n\t" + RSEQ_ASM_LONG_L " %[dst], %[rseq_scratch1]\n\t" + RSEQ_ASM_LONG_L " %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_TEARDOWN(7, + /* teardown */ + RSEQ_ASM_LONG_L " %[len], %[rseq_scratch2]\n\t" + RSEQ_ASM_LONG_L " %[dst], %[rseq_scratch1]\n\t" + RSEQ_ASM_LONG_L " %[src], %[rseq_scratch0]\n\t", + error2) +#endif + "8:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +ne: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ + +#include "rseq/arch/templates/bits-reset.h" diff --git a/third_party/librseq/include/rseq/arch/ppc.h b/third_party/librseq/include/rseq/arch/ppc.h new file mode 100644 index 000000000000..d1b25cfac788 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/ppc.h @@ -0,0 +1,257 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2016-2024 Mathieu Desnoyers */ +/* SPDX-FileCopyrightText: 2016-2018 Boqun Feng */ + +/* + * rseq/arch/ppc.h + */ + +#ifndef _RSEQ_RSEQ_H +#error "Never use directly; include instead." +#endif + +/* + * RSEQ_ASM_*() macro helpers are internal to the librseq headers. Those + * are not part of the public API. + */ + +/* + * RSEQ_SIG is used with the following trap instruction: + * + * powerpc-be: 0f e5 00 0b twui r5,11 + * powerpc64-le: 0b 00 e5 0f twui r5,11 + * powerpc64-be: 0f e5 00 0b twui r5,11 + */ + +#define RSEQ_SIG 0x0fe5000b + +/* + * Refer to the Linux kernel memory model (LKMM) for documentation of + * the memory barriers. + */ + +/* CPU memory barrier. */ +#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory", "cc") +/* Only used internally in this header. */ +#define __rseq_smp_lwsync() __asm__ __volatile__ ("lwsync" ::: "memory", "cc") +/* CPU read memory barrier */ +#define rseq_smp_rmb() __rseq_smp_lwsync() +/* CPU write memory barrier */ +#define rseq_smp_wmb() __rseq_smp_lwsync() + +/* Acquire: One-way permeable barrier. */ +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ + __rseq_smp_lwsync(); \ + ____p1; \ +}) + +/* Acquire barrier after control dependency. */ +#define rseq_smp_acquire__after_ctrl_dep() __rseq_smp_lwsync() + +/* Release: One-way permeable barrier. */ +#define rseq_smp_store_release(p, v) \ +do { \ + __rseq_smp_lwsync(); \ + RSEQ_WRITE_ONCE(*(p), v); \ +} while (0) + +/* + * Helper macros to define and access a variable of long integer type. + * Only used internally in rseq headers. + */ +#ifdef RSEQ_ARCH_PPC64 +# define RSEQ_ASM_STORE_LONG(arg) "std%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +# define RSEQ_ASM_STORE_INT(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +# define RSEQ_ASM_LOAD_LONG(arg) "ld%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +# define RSEQ_ASM_LOAD_INT(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +# define RSEQ_ASM_LOADX_LONG "ldx " /* From base register ("b" constraint) */ +# define RSEQ_ASM_CMP_LONG "cmpd " /* Register-to-register comparison */ +# define RSEQ_ASM_CMP_LONG_INT "cmpdi " /* Register-to-immediate comparison */ +#else +# define RSEQ_ASM_STORE_LONG(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +# define RSEQ_ASM_STORE_INT(arg) RSEQ_ASM_STORE_LONG(arg) /* To memory ("m" constraint) */ +# define RSEQ_ASM_LOAD_LONG(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +# define RSEQ_ASM_LOAD_INT(arg) RSEQ_ASM_LOAD_LONG(arg) /* From memory ("m" constraint) */ +# define RSEQ_ASM_LOADX_LONG "lwzx " /* From base register ("b" constraint) */ +# define RSEQ_ASM_CMP_LONG "cmpw " /* Register-to-register comparison */ +# define RSEQ_ASM_CMP_LONG_INT "cmpwi " /* Register-to-immediate comparison */ +#endif + +/* + * Helper macros to define a variable of pointer type stored in a 64-bit + * integer. Only used internally in rseq headers. + */ +#ifdef RSEQ_ARCH_PPC64 +# define RSEQ_ASM_U64_PTR(x) ".quad " x +#else +/* 32-bit only supported on big endian. */ +# define RSEQ_ASM_U64_PTR(x) ".long 0x0, " x +#endif + +#define RSEQ_ASM_U32(x) ".long " x + +/* Common architecture support macros. */ +#include "rseq/arch/generic/common.h" + +/* + * Define a critical section abort handler. + * + * @label: + * Local label to the abort handler. + * @teardown: + * Sequence of instructions to run on abort. + * @abort_label: + * C label to jump to at the end of the sequence. + */ +#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + RSEQ_ASM_U32(__rseq_str(RSEQ_SIG)) "\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "b %l[" __rseq_str(abort_label) "]\n\t" \ + ".popsection\n\t" + +/* + * Store the address of the critical section descriptor structure at + * @cs_label into the @rseq_cs pointer and emit the label @label, which + * is the beginning of the sequence of consecutive assembly instructions. + * + * @label: + * Local label to the beginning of the sequence of consecutive assembly + * instructions. + * @cs_label: + * Source local label to the critical section descriptor structure. + * @rseq_cs: + * Destination pointer where to store the address of the critical + * section descriptor structure. + */ +#ifdef RSEQ_ARCH_PPC64 +# define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "lis %%r17, (" __rseq_str(cs_label) ")@highest\n\t" \ + "ori %%r17, %%r17, (" __rseq_str(cs_label) ")@higher\n\t" \ + "rldicr %%r17, %%r17, 32, 31\n\t" \ + "oris %%r17, %%r17, (" __rseq_str(cs_label) ")@high\n\t" \ + "ori %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \ + "std %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" +#else +# define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \ + "addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \ + RSEQ_ASM_STORE_INT(rseq_cs) "%%r17, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" +#endif + +/* Jump to local label @label when @cpu_id != @current_cpu_id. */ +#define RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + RSEQ_ASM_LOAD_INT(current_cpu_id) "%%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \ + "cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t" \ + "bne- cr7, " __rseq_str(label) "\n\t" + +/* + * RSEQ_ASM_OPs: asm operations for rseq. Only used internally by rseq headers. + * RSEQ_ASM_OP_R_*: has hard-coded registers in it + * RSEQ_ASM_OP_* (else): doesn't have hard-coded registers(unless cr7) + */ + +/* Jump to local label @label when @var != @expect. */ +#define RSEQ_ASM_OP_CBNE(var, expect, label) \ + RSEQ_ASM_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_ASM_CMP_LONG "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \ + "bne- cr7, " __rseq_str(label) "\n\t" + +/* Jump to local label @label when @var == @expect. */ +#define RSEQ_ASM_OP_CBEQ(var, expect, label) \ + RSEQ_ASM_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_ASM_CMP_LONG "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \ + "beq- cr7, " __rseq_str(label) "\n\t" + +/* Store @value to address @var. */ +#define RSEQ_ASM_OP_STORE(value, var) \ + RSEQ_ASM_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" + +/* Load @var to r17 */ +#define RSEQ_ASM_OP_R_LOAD(var) \ + RSEQ_ASM_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" + +/* Store r17 to @var */ +#define RSEQ_ASM_OP_R_STORE(var) \ + RSEQ_ASM_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" + +/* Add @count to r17 */ +#define RSEQ_ASM_OP_R_ADD(count) \ + "add %%r17, %[" __rseq_str(count) "], %%r17\n\t" + +/* Load (r17 + voffp) to r17 */ +#define RSEQ_ASM_OP_R_LOADX(voffp) \ + RSEQ_ASM_LOADX_LONG "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t" + +/* + * Copy @len bytes from @src to @dst. This is an inefficient bytewise + * copy and could be improved in the future. + */ +#define RSEQ_ASM_OP_R_BYTEWISE_MEMCPY() \ + RSEQ_ASM_CMP_LONG_INT "%%r19, 0\n\t" \ + "beq 333f\n\t" \ + "addi %%r20, %%r20, -1\n\t" \ + "addi %%r21, %%r21, -1\n\t" \ + "222:\n\t" \ + "lbzu %%r18, 1(%%r20)\n\t" \ + "stbu %%r18, 1(%%r21)\n\t" \ + "addi %%r19, %%r19, -1\n\t" \ + RSEQ_ASM_CMP_LONG_INT "%%r19, 0\n\t" \ + "bne 222b\n\t" \ + "333:\n\t" \ + +/* + * End-of-sequence store of r17 to address @var. Emit + * @post_commit_label label after the store instruction. + */ +#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ + RSEQ_ASM_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ + __rseq_str(post_commit_label) ":\n\t" + +/* + * End-of-sequence store of @value to address @var. Emit + * @post_commit_label label after the store instruction. + */ +#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ + RSEQ_ASM_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \ + __rseq_str(post_commit_label) ":\n\t" + +/* Per-cpu-id indexing. */ + +#define RSEQ_TEMPLATE_INDEX_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/ppc/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/ppc/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_CPU_ID + +/* Per-mm-cid indexing. */ + +#define RSEQ_TEMPLATE_INDEX_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/ppc/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/ppc/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_MM_CID + +/* APIs which are not indexed. */ + +#define RSEQ_TEMPLATE_INDEX_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/ppc/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_INDEX_NONE diff --git a/third_party/librseq/include/rseq/arch/ppc/bits.h b/third_party/librseq/include/rseq/arch/ppc/bits.h new file mode 100644 index 000000000000..35ad5dbdb1c6 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/ppc/bits.h @@ -0,0 +1,460 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2016-2018 Mathieu Desnoyers */ +/* SPDX-FileCopyrightText: 2016-2018 Boqun Feng */ + +/* + * rseq/arch/ppc/bits.h + */ + +#include "rseq/arch/templates/bits.h" + +/* + * Refer to rseq/pseudocode.h for documentation and pseudo-code of the + * rseq critical section helpers. + */ +#include "rseq/pseudocode.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CBNE(v, expect, %l[ne]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CBNE(v, expect, %l[error2]) +#endif + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbeq_store_add_load_store__ptr)(intptr_t *v, intptr_t expectnot, + long voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[eq]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v not equal to @expectnot */ + RSEQ_ASM_OP_CBEQ(v, expectnot, %l[eq]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v not equal to @expectnot */ + RSEQ_ASM_OP_CBEQ(v, expectnot, %l[error2]) +#endif + /* load the value of @v */ + RSEQ_ASM_OP_R_LOAD(v) + /* store it in @load */ + RSEQ_ASM_OP_R_STORE(load) + /* dereference voffp(v) */ + RSEQ_ASM_OP_R_LOADX(voffp) + /* final store the value at voffp(v) */ + RSEQ_ASM_OP_R_FINAL_STORE(v, 2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "b" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, eq +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +eq: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_add_store__ptr)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + /* load the value of @v */ + RSEQ_ASM_OP_R_LOAD(v) + /* add @count to it */ + RSEQ_ASM_OP_R_ADD(count) + /* final store */ + RSEQ_ASM_OP_R_FINAL_STORE(v, 2) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CBNE(v, expect, %l[ne]) + RSEQ_INJECT_ASM(4) + /* cmp @v2 equal to @expct2 */ + RSEQ_ASM_OP_CBNE(v2, expect2, %l[ne]) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CBNE(v, expect, %l[error2]) + /* cmp @v2 equal to @expct2 */ + RSEQ_ASM_OP_CBNE(v2, expect2, %l[error3]) +#endif + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("1st expected value comparison failed"); +error3: + rseq_after_asm_goto(); + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store_store__ptr)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CBNE(v, expect, %l[ne]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CBNE(v, expect, %l[error2]) +#endif + /* try store */ + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + /* for 'release' */ + "lwsync\n\t" +#endif + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_memcpy_store__ptr)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* setup for mempcy */ + "mr %%r19, %[len]\n\t" + "mr %%r20, %[src]\n\t" + "mr %%r21, %[dst]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CBNE(v, expect, %l[ne]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CBNE(v, expect, %l[error2]) +#endif + /* try memcpy */ + RSEQ_ASM_OP_R_BYTEWISE_MEMCPY() + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + /* for 'release' */ + "lwsync\n\t" +#endif + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(6) + /* teardown */ + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17", "r18", "r19", "r20", "r21" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ + +#include "rseq/arch/templates/bits-reset.h" diff --git a/third_party/librseq/include/rseq/arch/ppc/thread-pointer.h b/third_party/librseq/include/rseq/arch/ppc/thread-pointer.h new file mode 100644 index 000000000000..48da16893776 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/ppc/thread-pointer.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2021 Mathieu Desnoyers */ + +/* + * rseq/arch/ppc/thread-pointer.h + */ + +#ifndef _RSEQ_PPC_THREAD_POINTER +#define _RSEQ_PPC_THREAD_POINTER + +#ifdef __cplusplus +extern "C" { +#endif + +static inline __attribute__((always_inline)) +void *rseq_thread_pointer(void) +{ +#ifdef __powerpc64__ + register void *__result asm ("r13"); +#else + register void *__result asm ("r2"); +#endif + asm ("" : "=r" (__result)); + return __result; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/librseq/include/rseq/arch/riscv.h b/third_party/librseq/include/rseq/arch/riscv.h new file mode 100644 index 000000000000..c6adc7f3ba18 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/riscv.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2022 Vincent Chen */ +/* SPDX-FileCopyrightText: 2024 Mathieu Desnoyers */ + +/* + * rseq-riscv.h + */ + +/* + * RSEQ_ASM_*() macro helpers are internal to the librseq headers. Those + * are not part of the public API. + */ + +#ifndef _RSEQ_RSEQ_H +#error "Never use directly; include instead." +#endif + +/* + * Select the instruction "csrw mhartid, x0" as the RSEQ_SIG. Unlike + * other architectures, the ebreak instruction has no immediate field for + * distinguishing purposes. Hence, ebreak is not suitable as RSEQ_SIG. + * "csrw mhartid, x0" can also satisfy the RSEQ requirement because it + * is an uncommon instruction and will raise an illegal instruction + * exception when executed in all modes. + */ +#include + +#if defined(__BYTE_ORDER) ? (__BYTE_ORDER == __LITTLE_ENDIAN) : defined(__LITTLE_ENDIAN) +#define RSEQ_SIG 0xf1401073 /* csrr mhartid, x0 */ +#else +#error "Currently, RSEQ only supports Little-Endian version" +#endif + +/* + * Instruction selection between 32-bit/64-bit. Used internally in the + * rseq headers. + */ +#if __riscv_xlen == 64 +#define __RSEQ_ASM_REG_SEL(a, b) a +#elif __riscv_xlen == 32 +#define __RSEQ_ASM_REG_SEL(a, b) b +#endif + +#define RSEQ_ASM_REG_L __RSEQ_ASM_REG_SEL("ld ", "lw ") +#define RSEQ_ASM_REG_S __RSEQ_ASM_REG_SEL("sd ", "sw ") + +/* + * Refer to the Linux kernel memory model (LKMM) for documentation of + * the memory barriers. + */ + +/* Only used internally in rseq headers. */ +#define RSEQ_ASM_RISCV_FENCE(p, s) \ + __asm__ __volatile__ ("fence " #p "," #s : : : "memory") +/* CPU memory barrier. */ +#define rseq_smp_mb() RSEQ_ASM_RISCV_FENCE(rw, rw) +/* CPU read memory barrier */ +#define rseq_smp_rmb() RSEQ_ASM_RISCV_FENCE(r, r) +/* CPU write memory barrier */ +#define rseq_smp_wmb() RSEQ_ASM_RISCV_FENCE(w, w) + +/* Acquire: One-way permeable barrier. */ +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ + RSEQ_ASM_RISCV_FENCE(r, rw); \ + ____p1; \ +}) + +/* Acquire barrier after control dependency. */ +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +/* Release: One-way permeable barrier. */ +#define rseq_smp_store_release(p, v) \ +do { \ + RSEQ_ASM_RISCV_FENCE(rw, w); \ + RSEQ_WRITE_ONCE(*(p), v); \ +} while (0) + +#define RSEQ_ASM_U64_PTR(x) ".quad " x +#define RSEQ_ASM_U32(x) ".long " x + +/* Temporary registers. */ +#define RSEQ_ASM_TMP_REG_1 "t6" +#define RSEQ_ASM_TMP_REG_2 "t5" +#define RSEQ_ASM_TMP_REG_3 "t4" +#define RSEQ_ASM_TMP_REG_4 "t3" + +/* Common architecture support macros. */ +#include "rseq/arch/generic/common.h" + +/* + * Define a critical section abort handler. + * + * @label: + * Local label to the abort handler. + * @teardown: + * Sequence of instructions to run on abort. + * @abort_label: + * C label to jump to at the end of the sequence. + */ +#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \ + "j 222f\n" \ + ".balign 4\n" \ + RSEQ_ASM_U32(__rseq_str(RSEQ_SIG)) "\n" \ + __rseq_str(label) ":\n" \ + teardown \ + "j %l[" __rseq_str(abort_label) "]\n" \ + "222:\n" + +/* Jump to local label @label when @cpu_id != @current_cpu_id. */ +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "la " RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \ + RSEQ_ASM_REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n" \ + __rseq_str(label) ":\n" + +/* Store @value to address @var. */ +#define RSEQ_ASM_OP_STORE(value, var) \ + RSEQ_ASM_REG_S "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" + +/* Jump to local label @label when @var != @expect. */ +#define RSEQ_ASM_OP_CBNE(var, expect, label) \ + RSEQ_ASM_REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ + __rseq_str(label) "\n" + +/* + * Jump to local label @label when @var != @expect (32-bit register + * comparison). + */ +#define RSEQ_ASM_OP_CBNE32(var, expect, label) \ + "lw " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ + __rseq_str(label) "\n" + +/* Jump to local label @label when @var == @expect. */ +#define RSEQ_ASM_OP_CBEQ(var, expect, label) \ + RSEQ_ASM_REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + "beq " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ + __rseq_str(label) "\n" + +/* Jump to local label @label when @cpu_id != @current_cpu_id. */ +#define RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + RSEQ_ASM_OP_CBNE32(current_cpu_id, cpu_id, label) + +/* Load @var into temporary register. */ +#define RSEQ_ASM_OP_R_LOAD(var) \ + RSEQ_ASM_REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" + +/* Store from temporary register into @var. */ +#define RSEQ_ASM_OP_R_STORE(var) \ + RSEQ_ASM_REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" + +/* Load from address in temporary register+@offset into temporary register. */ +#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \ + "add " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \ + RSEQ_ASM_TMP_REG_1 "\n" \ + RSEQ_ASM_REG_L RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n" + +/* Add @count to temporary register. */ +#define RSEQ_ASM_OP_R_ADD(count) \ + "add " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \ + ", %[" __rseq_str(count) "]\n" + +/* + * End-of-sequence store of @value to address @var. Emit + * @post_commit_label label after the store instruction. + */ +#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ + RSEQ_ASM_OP_STORE(value, var) \ + __rseq_str(post_commit_label) ":\n" + +/* + * End-of-sequence store-release of @value to address @var. Emit + * @post_commit_label label after the store instruction. + */ +#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label) \ + "fence rw, w\n" \ + RSEQ_ASM_OP_STORE(value, var) \ + __rseq_str(post_commit_label) ":\n" + +/* + * End-of-sequence store of temporary register to address @var. Emit + * @post_commit_label label after the store instruction. + */ +#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ + RSEQ_ASM_REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + __rseq_str(post_commit_label) ":\n" + +/* + * Copy @len bytes from @src to @dst. This is an inefficient bytewise + * copy and could be improved in the future. + */ +#define RSEQ_ASM_OP_R_BYTEWISE_MEMCPY(dst, src, len) \ + "beqz %[" __rseq_str(len) "], 333f\n" \ + "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(len) "]\n" \ + "mv " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(src) "]\n" \ + "mv " RSEQ_ASM_TMP_REG_3 ", %[" __rseq_str(dst) "]\n" \ + "222:\n" \ + "lb " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_2 ")\n" \ + "sb " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_3 ")\n" \ + "addi " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 ", -1\n" \ + "addi " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", 1\n" \ + "addi " RSEQ_ASM_TMP_REG_3 ", " RSEQ_ASM_TMP_REG_3 ", 1\n" \ + "bnez " RSEQ_ASM_TMP_REG_1 ", 222b\n" \ + "333:\n" + +/* Per-cpu-id indexing. */ + +#define RSEQ_TEMPLATE_INDEX_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/riscv/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/riscv/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_CPU_ID + +/* Per-mm-cid indexing. */ + +#define RSEQ_TEMPLATE_INDEX_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/riscv/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/riscv/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_MM_CID + +/* APIs which are not indexed. */ + +#define RSEQ_TEMPLATE_INDEX_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/riscv/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_INDEX_NONE diff --git a/third_party/librseq/include/rseq/arch/riscv/bits.h b/third_party/librseq/include/rseq/arch/riscv/bits.h new file mode 100644 index 000000000000..799e5b950708 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/riscv/bits.h @@ -0,0 +1,368 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2022 Vincent Chen */ + +#include "rseq/arch/templates/bits.h" + +/* + * Refer to rseq/pseudocode.h for documentation and pseudo-code of the + * rseq critical section helpers. + */ +#include "rseq/pseudocode.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[ne]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CBNE(v, expect, "%l[ne]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CBNE(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +ne: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbeq_store_add_load_store__ptr)(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[eq]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CBEQ(v, expectnot, "%l[eq]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CBEQ(v, expectnot, "%l[error2]") +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_STORE(load) + RSEQ_ASM_OP_R_LOAD_OFF(voffp) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expectnot] "r" (expectnot), + [load] "m" (*load), + [voffp] "r" (voffp) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, eq +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +eq: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_add_store__ptr)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_ADD(count) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[ne]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error3]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CBNE(v, expect, "%l[ne]") + RSEQ_INJECT_ASM(4) + RSEQ_ASM_OP_CBNE(v2, expect2, "%l[ne]") + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CBNE(v, expect, "%l[error2]") + RSEQ_ASM_OP_CBNE(v2, expect2, "%l[error3]") +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [v2] "m" (*v2), + [expect2] "r" (expect2), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +ne: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store_store__ptr)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[ne]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CBNE(v, expect, "%l[ne]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CBNE(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) +#else + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) +#endif + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [v2] "m" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +ne: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_memcpy_store__ptr)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[ne]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CBNE(v, expect, "%l[ne]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CBNE(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_R_BYTEWISE_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) +#else + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) +#endif + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2, + RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4 + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +ne: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ + +#include "rseq/arch/templates/bits-reset.h" diff --git a/third_party/librseq/include/rseq/arch/riscv/thread-pointer.h b/third_party/librseq/include/rseq/arch/riscv/thread-pointer.h new file mode 100644 index 000000000000..7fa2253847df --- /dev/null +++ b/third_party/librseq/include/rseq/arch/riscv/thread-pointer.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2024 Michael Jeanson */ + +/* + * rseq/arch/riscv/thread-pointer.h + */ + +#ifndef _RSEQ_RISCV_THREAD_POINTER +#define _RSEQ_RISCV_THREAD_POINTER + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if __GNUC_PREREQ (10, 3) +static inline __attribute__((always_inline)) +void *rseq_thread_pointer(void) +{ + return __builtin_thread_pointer(); +} +#else +static inline __attribute__((always_inline)) +void *rseq_thread_pointer(void) +{ + void *__result; + + __asm__ ("mv %0, tp" : "=r" (__result)); + return __result; +} +#endif /* !GCC 10.3 */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/librseq/include/rseq/arch/s390.h b/third_party/librseq/include/rseq/arch/s390.h new file mode 100644 index 000000000000..9804b9d20001 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/s390.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2018 Vasily Gorbik */ +/* SPDX-FileCopyrightText: 2024 Mathieu Desnoyers */ + +/* + * rseq-s390.h + */ + +/* + * RSEQ_ASM_*() macro helpers are internal to the librseq headers. Those + * are not part of the public API. + */ + +#ifndef _RSEQ_RSEQ_H +#error "Never use directly; include instead." +#endif + +/* + * RSEQ_SIG uses the trap4 instruction. As Linux does not make use of the + * access-register mode nor the linkage stack this instruction will always + * cause a special-operation exception (the trap-enabled bit in the DUCT + * is and will stay 0). The instruction pattern is + * b2 ff 0f ff trap4 4095(%r0) + */ +#define RSEQ_SIG 0xB2FF0FFF + +/* + * Refer to the Linux kernel memory model (LKMM) for documentation of + * the memory barriers. + */ + +/* CPU memory barrier. */ +#define rseq_smp_mb() __asm__ __volatile__ ("bcr 15,0" ::: "memory") +/* CPU read memory barrier */ +#define rseq_smp_rmb() rseq_smp_mb() +/* CPU write memory barrier */ +#define rseq_smp_wmb() rseq_smp_mb() + +/* Acquire: One-way permeable barrier. */ +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ + rseq_barrier(); \ + ____p1; \ +}) + +/* Acquire barrier after control dependency. */ +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +/* Release: One-way permeable barrier. */ +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_barrier(); \ + RSEQ_WRITE_ONCE(*(p), v); \ +} while (0) + +/* + * Helper macros to access a variable of long integer type. Only used + * internally in rseq headers. + */ +#ifdef RSEQ_ARCH_S390X +# define RSEQ_ASM_LONG_L "lg" +# define RSEQ_ASM_LONG_S "stg" +# define RSEQ_ASM_LONG_LT_R "ltgr" +# define RSEQ_ASM_LONG_CMP "cg" +# define RSEQ_ASM_LONG_CMP_R "cgr" +# define RSEQ_ASM_LONG_ADDI "aghi" +# define RSEQ_ASM_LONG_ADD_R "agr" +#else +# define RSEQ_ASM_LONG_L "l" +# define RSEQ_ASM_LONG_S "st" +# define RSEQ_ASM_LONG_LT_R "ltr" +# define RSEQ_ASM_LONG_CMP "c" +# define RSEQ_ASM_LONG_CMP_R "cr" +# define RSEQ_ASM_LONG_ADDI "ahi" +# define RSEQ_ASM_LONG_ADD_R "ar" +#endif + +#ifdef RSEQ_ARCH_S390X +# define RSEQ_ASM_U64_PTR(x) ".quad " x +#else +/* 32-bit only supported on big endian. */ +# define RSEQ_ASM_U64_PTR(x) ".long 0x0, " x +#endif + +#define RSEQ_ASM_U32(x) ".long " x + +/* Common architecture support macros. */ +#include "rseq/arch/generic/common.h" + +/* + * Define a critical section abort handler. + * + * @label: + * Local label to the abort handler. + * @teardown: + * Sequence of instructions to run on abort. + * @abort_label: + * C label to jump to at the end of the sequence. + */ +#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + RSEQ_ASM_U32(__rseq_str(RSEQ_SIG)) "\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "jg %l[" __rseq_str(abort_label) "]\n\t" \ + ".popsection\n\t" + +/* + * Define a critical section teardown handler. + * + * @label: + * Local label to the teardown handler. + * @teardown: + * Sequence of instructions to run on teardown. + * @target_label: + * C label to jump to at the end of the sequence. + */ +#define RSEQ_ASM_DEFINE_TEARDOWN(label, teardown, target_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "jg %l[" __rseq_str(target_label) "]\n\t" \ + ".popsection\n\t" + +/* + * Store the address of the critical section descriptor structure at + * @cs_label into the @rseq_cs pointer and emit the label @label, which + * is the beginning of the sequence of consecutive assembly instructions. + * + * @label: + * Local label to the beginning of the sequence of consecutive assembly + * instructions. + * @cs_label: + * Source local label to the critical section descriptor structure. + * @rseq_cs: + * Destination pointer where to store the address of the critical + * section descriptor structure. + */ +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "larl %%r0, " __rseq_str(cs_label) "\n\t" \ + RSEQ_ASM_LONG_S " %%r0, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" + +/* Jump to local label @label when @cpu_id != @current_cpu_id. */ +#define RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + "c %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \ + "jnz " __rseq_str(label) "\n\t" + +/* Per-cpu-id indexing. */ + +#define RSEQ_TEMPLATE_INDEX_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/s390/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/s390/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_CPU_ID + +/* Per-mm-cid indexing. */ + +#define RSEQ_TEMPLATE_INDEX_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/s390/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/s390/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_MM_CID + +/* APIs which are not indexed. */ + +#define RSEQ_TEMPLATE_INDEX_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/s390/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_INDEX_NONE diff --git a/third_party/librseq/include/rseq/arch/s390/bits.h b/third_party/librseq/include/rseq/arch/s390/bits.h new file mode 100644 index 000000000000..ef2ff69eff88 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/s390/bits.h @@ -0,0 +1,477 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2018 Vasily Gorbik */ + +#include "rseq/arch/templates/bits.h" + +/* + * Refer to rseq/pseudocode.h for documentation and pseudo-code of the + * rseq critical section helpers. + */ +#include "rseq/pseudocode.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[ne]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[error2]\n\t" +#endif + /* final store */ + RSEQ_ASM_LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbeq_store_add_load_store__ptr)(intptr_t *v, intptr_t expectnot, + long voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[eq]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_LONG_L " %%r1, %[v]\n\t" + RSEQ_ASM_LONG_CMP_R " %%r1, %[expectnot]\n\t" + "je %l[eq]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_LONG_L " %%r1, %[v]\n\t" + RSEQ_ASM_LONG_CMP_R " %%r1, %[expectnot]\n\t" + "je %l[error2]\n\t" +#endif + RSEQ_ASM_LONG_S " %%r1, %[load]\n\t" + RSEQ_ASM_LONG_ADD_R " %%r1, %[voffp]\n\t" + RSEQ_ASM_LONG_L " %%r1, 0(%%r1)\n\t" + /* final store */ + RSEQ_ASM_LONG_S " %%r1, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "r" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0", "r1" + RSEQ_INJECT_CLOBBER + : abort, eq +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +eq: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_add_store__ptr)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + RSEQ_ASM_LONG_L " %%r0, %[v]\n\t" + RSEQ_ASM_LONG_ADD_R " %%r0, %[count]\n\t" + /* final store */ + RSEQ_ASM_LONG_S " %%r0, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[ne]\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_LONG_CMP " %[expect2], %[v2]\n\t" + "jnz %l[ne]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[error2]\n\t" + RSEQ_ASM_LONG_CMP " %[expect2], %[v2]\n\t" + "jnz %l[error3]\n\t" +#endif + /* final store */ + RSEQ_ASM_LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("1st expected value comparison failed"); +error3: + rseq_after_asm_goto(); + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) + +/* s390 is TSO. */ +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store_store__ptr)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[ne]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[error2]\n\t" +#endif + /* try store */ + RSEQ_ASM_LONG_S " %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + RSEQ_ASM_LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +/* s390 is TSO. */ +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_memcpy_store__ptr)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uint64_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + RSEQ_ASM_LONG_S " %[src], %[rseq_scratch0]\n\t" + RSEQ_ASM_LONG_S " %[dst], %[rseq_scratch1]\n\t" + RSEQ_ASM_LONG_S " %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_LONG_CMP " %[expect], %[v]\n\t" + "jnz 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, 6f) + RSEQ_ASM_LONG_CMP " %[expect], %[v]\n\t" + "jnz 7f\n\t" +#endif + /* try memcpy */ + RSEQ_ASM_LONG_LT_R " %[len], %[len]\n\t" + "jz 333f\n\t" + "222:\n\t" + "ic %%r0,0(%[src])\n\t" + "stc %%r0,0(%[dst])\n\t" + RSEQ_ASM_LONG_ADDI " %[src], 1\n\t" + RSEQ_ASM_LONG_ADDI " %[dst], 1\n\t" + RSEQ_ASM_LONG_ADDI " %[len], -1\n\t" + "jnz 222b\n\t" + "333:\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + RSEQ_ASM_LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + RSEQ_ASM_LONG_L " %[len], %[rseq_scratch2]\n\t" + RSEQ_ASM_LONG_L " %[dst], %[rseq_scratch1]\n\t" + RSEQ_ASM_LONG_L " %[src], %[rseq_scratch0]\n\t" + RSEQ_ASM_DEFINE_ABORT(4, + RSEQ_ASM_LONG_L " %[len], %[rseq_scratch2]\n\t" + RSEQ_ASM_LONG_L " %[dst], %[rseq_scratch1]\n\t" + RSEQ_ASM_LONG_L " %[src], %[rseq_scratch0]\n\t", + abort) + RSEQ_ASM_DEFINE_TEARDOWN(5, + RSEQ_ASM_LONG_L " %[len], %[rseq_scratch2]\n\t" + RSEQ_ASM_LONG_L " %[dst], %[rseq_scratch1]\n\t" + RSEQ_ASM_LONG_L " %[src], %[rseq_scratch0]\n\t", + ne) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_TEARDOWN(6, + RSEQ_ASM_LONG_L " %[len], %[rseq_scratch2]\n\t" + RSEQ_ASM_LONG_L " %[dst], %[rseq_scratch1]\n\t" + RSEQ_ASM_LONG_L " %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_TEARDOWN(7, + RSEQ_ASM_LONG_L " %[len], %[rseq_scratch2]\n\t" + RSEQ_ASM_LONG_L " %[dst], %[rseq_scratch1]\n\t" + RSEQ_ASM_LONG_L " %[src], %[rseq_scratch0]\n\t", + error2) +#endif + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, ne +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +ne: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ + +#include "rseq/arch/templates/bits-reset.h" diff --git a/third_party/librseq/include/rseq/arch/templates/bits-reset.h b/third_party/librseq/include/rseq/arch/templates/bits-reset.h new file mode 100644 index 000000000000..db9874d94d2b --- /dev/null +++ b/third_party/librseq/include/rseq/arch/templates/bits-reset.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2016-2022 Mathieu Desnoyers */ + +/* + * rseq/arch/templates/bits-reset.h + */ + +#undef RSEQ_TEMPLATE_IDENTIFIER +#undef RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD +#undef RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET +#undef RSEQ_TEMPLATE_SUFFIX diff --git a/third_party/librseq/include/rseq/arch/templates/bits.h b/third_party/librseq/include/rseq/arch/templates/bits.h new file mode 100644 index 000000000000..1f8a38e85104 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/templates/bits.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2016-2022 Mathieu Desnoyers */ + +/* + * rseq/arch/templates/bits.h + */ + +#ifdef RSEQ_TEMPLATE_INDEX_CPU_ID +# define RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET RSEQ_ASM_CPU_ID_OFFSET +# define RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD cpu_id +# ifdef RSEQ_TEMPLATE_MO_RELEASE +# define RSEQ_TEMPLATE_SUFFIX _release_cpu_id +# elif defined (RSEQ_TEMPLATE_MO_RELAXED) +# define RSEQ_TEMPLATE_SUFFIX _relaxed_cpu_id +# else +# error "Never use directly; include instead." +# endif +#elif defined(RSEQ_TEMPLATE_INDEX_MM_CID) +# define RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET RSEQ_ASM_MM_CID_OFFSET +# define RSEQ_TEMPLATE_INDEX_CPU_ID_FIELD mm_cid +# ifdef RSEQ_TEMPLATE_MO_RELEASE +# define RSEQ_TEMPLATE_SUFFIX _release_mm_cid +# elif defined (RSEQ_TEMPLATE_MO_RELAXED) +# define RSEQ_TEMPLATE_SUFFIX _relaxed_mm_cid +# else +# error "Never use directly; include instead." +# endif +#elif defined (RSEQ_TEMPLATE_INDEX_NONE) +# ifdef RSEQ_TEMPLATE_MO_RELEASE +# define RSEQ_TEMPLATE_SUFFIX _release +# elif defined (RSEQ_TEMPLATE_MO_RELAXED) +# define RSEQ_TEMPLATE_SUFFIX _relaxed +# else +# error "Never use directly; include instead." +# endif +#else +# error "Never use directly; include instead." +#endif + +#define RSEQ_TEMPLATE_IDENTIFIER(x) RSEQ_COMBINE_TOKENS(x, RSEQ_TEMPLATE_SUFFIX) + diff --git a/third_party/librseq/include/rseq/arch/x86.h b/third_party/librseq/include/rseq/arch/x86.h new file mode 100644 index 000000000000..a6a406811301 --- /dev/null +++ b/third_party/librseq/include/rseq/arch/x86.h @@ -0,0 +1,222 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2016-2024 Mathieu Desnoyers */ + +/* + * rseq/arch/x86.h + */ + +#ifndef _RSEQ_RSEQ_H +#error "Never use directly; include instead." +#endif + +#include + +/* + * RSEQ_ASM_*() macro helpers are internal to the librseq headers. Those + * are not part of the public API. + */ + +/* + * RSEQ_SIG is used with the following reserved undefined instructions, which + * trap in user-space: + * + * x86-32: 0f b9 3d 53 30 05 53 ud1 0x53053053,%edi + * x86-64: 0f b9 3d 53 30 05 53 ud1 0x53053053(%rip),%edi + */ +#define RSEQ_SIG 0x53053053 + +/* + * Due to a compiler optimization bug in gcc-8 with asm goto and TLS asm input + * operands, we cannot use "m" input operands, and rather pass the __rseq_abi + * address through a "r" input operand. + */ + +/* + * Offset of cpu_id, rseq_cs, and mm_cid fields in struct rseq. Those + * are defined explicitly as macros to be used from assembly. + */ +#define RSEQ_ASM_CPU_ID_OFFSET 4 +#define RSEQ_ASM_CS_OFFSET 8 +#define RSEQ_ASM_MM_CID_OFFSET 24 + +/* + * Refer to the Linux kernel memory model (LKMM) for documentation of + * the memory barriers. Expect all x86 hardware to be x86-TSO (Total + * Store Order). + */ + +/* CPU memory barrier. */ +#define rseq_smp_mb() \ + __asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") +/* CPU read memory barrier */ +#define rseq_smp_rmb() rseq_barrier() +/* CPU write memory barrier */ +#define rseq_smp_wmb() rseq_barrier() + +/* Acquire: One-way permeable barrier. */ +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ + rseq_barrier(); \ + ____p1; \ +}) + +/* Acquire barrier after control dependency. */ +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +/* Release: One-way permeable barrier. */ +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_barrier(); \ + RSEQ_WRITE_ONCE(*(p), v); \ +} while (0) + +/* Segment selector for the thread pointer. */ +#ifdef RSEQ_ARCH_AMD64 +# define RSEQ_ASM_TP_SEGMENT %%fs +#else +# define RSEQ_ASM_TP_SEGMENT %%gs +#endif + +/* + * Helper macro to define a variable of pointer type stored in a 64-bit + * integer. Only used internally in rseq headers. + */ +#ifdef RSEQ_ARCH_AMD64 +# define RSEQ_ASM_U64_PTR(x) ".quad " x +#else +# define RSEQ_ASM_U64_PTR(x) ".long " x ", 0x0" +#endif + +#define RSEQ_ASM_U32(x) ".long " x + +/* Common architecture support macros. */ +#include "rseq/arch/generic/common.h" + +/* + * Define a critical section abort handler. + * + * @label: + * Local label to the abort handler. + * @teardown: + * Sequence of instructions to run on abort. + * @abort_label: + * C label to jump to at the end of the sequence. + */ +#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + /* \ + * Disassembler-friendly signature: \ + * x86-32: ud1 ,%edi \ + * x86-64: ud1 (%rip),%edi \ + */ \ + ".byte 0x0f, 0xb9, 0x3d\n\t" \ + ".long " __rseq_str(RSEQ_SIG) "\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "jmp %l[" __rseq_str(abort_label) "]\n\t" \ + ".popsection\n\t" + +/* + * Define a critical section teardown handler. + * + * @label: + * Local label to the teardown handler. + * @teardown: + * Sequence of instructions to run on teardown. + * @target_label: + * C label to jump to at the end of the sequence. + */ +#define RSEQ_ASM_DEFINE_TEARDOWN(label, teardown, target_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "jmp %l[" __rseq_str(target_label) "]\n\t" \ + ".popsection\n\t" + +/* + * Store the address of the critical section descriptor structure at + * @cs_label into the @rseq_cs pointer and emit the label @label, which + * is the beginning of the sequence of consecutive assembly instructions. + * + * @label: + * Local label to the beginning of the sequence of consecutive assembly + * instructions. + * @cs_label: + * Source local label to the critical section descriptor structure. + * @rseq_cs: + * Destination pointer where to store the address of the critical + * section descriptor structure. + */ +#ifdef RSEQ_ARCH_AMD64 +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t" \ + "movq %%rax, " __rseq_str(rseq_cs) "\n\t" \ + __rseq_str(label) ":\n\t" +#else +# define RSEQ_ASM_REF_LABEL 881 +/* + * Use ip-relative addressing to get the address to the rseq critical + * section descriptor. On x86-32, this requires a "call" instruction to + * get the instruction pointer, which modifies the stack. Beware of this + * side-effect if this scheme is used within a rseq critical section. + * This computation is performed immediately before storing the rseq_cs, + * which is outside of the critical section. + * Balance call/ret to help speculation. + * Save this ip address to ref_ip for use by the critical section so + * ip-relative addressing can be done without modifying the stack + * pointer by using ref_ip and calculating the relative offset from + * ref_label. + */ +# define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs, ref_ip, ref_label) \ + "jmp 779f\n\t" \ + "880:\n\t" \ + "movl (%%esp), %%eax\n\t" \ + "ret\n\t" \ + "779:\n\t" \ + "call 880b\n\t" \ + __rseq_str(ref_label) ":\n\t" \ + "movl %%eax, " __rseq_str(ref_ip) "\n\t" \ + "leal (" __rseq_str(cs_label) " - " __rseq_str(ref_label) "b)(%%eax), %%eax\n\t" \ + "movl %%eax, " __rseq_str(rseq_cs) "\n\t" \ + __rseq_str(label) ":\n\t" +#endif + +/* Jump to local label @label when @cpu_id != @current_cpu_id. */ +#define RSEQ_ASM_CBNE_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + "cmpl %[" __rseq_str(cpu_id) "], " __rseq_str(current_cpu_id) "\n\t" \ + "jnz " __rseq_str(label) "\n\t" + +/* Per-cpu-id indexing. */ + +#define RSEQ_TEMPLATE_INDEX_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/x86/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/x86/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_CPU_ID + +/* Per-mm-cid indexing. */ + +#define RSEQ_TEMPLATE_INDEX_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/x86/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq/arch/x86/bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_INDEX_MM_CID + +/* APIs which are not indexed. */ + +#define RSEQ_TEMPLATE_INDEX_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq/arch/x86/bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_INDEX_NONE diff --git a/third_party/librseq/include/rseq/rseq-x86-bits.h b/third_party/librseq/include/rseq/arch/x86/bits.h similarity index 65% rename from third_party/librseq/include/rseq/rseq-x86-bits.h rename to third_party/librseq/include/rseq/arch/x86/bits.h index 8ee71cb87461..998c059aa37f 100644 --- a/third_party/librseq/include/rseq/rseq-x86-bits.h +++ b/third_party/librseq/include/rseq/arch/x86/bits.h @@ -1,39 +1,46 @@ /* SPDX-License-Identifier: MIT */ /* SPDX-FileCopyrightText: 2016-2018 Mathieu Desnoyers */ + /* - * rseq-x86-bits.h + * rseq/arch/x86/bits.h */ -#include "rseq-bits-template.h" +#include "rseq/arch/templates/bits.h" + +/* + * Refer to rseq/pseudocode.h for pseudo-code of the rseq critical + * section helpers. + */ +#include "rseq/pseudocode.h" #ifdef __x86_64__ #if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ - (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) { RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" - "jnz %l[cmpfail]\n\t" + "jne %l[ne]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" - "jnz %l[error2]\n\t" + "jne %l[error2]\n\t" #endif /* final store */ "movq %[newv], %[v]\n\t" @@ -48,7 +55,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, i [newv] "r" (newv) : "memory", "cc", "rax" RSEQ_INJECT_CLOBBER - : abort, cmpfail + : abort, ne #ifdef RSEQ_COMPARE_TWICE , error1, error2 #endif @@ -59,7 +66,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, i rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; -cmpfail: +ne: rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE @@ -72,33 +79,29 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, i #endif } -/* - * Compare @v against @expectnot. When it does _not_ match, load @v - * into @load, and store the content of *@v + voffp into @v. - */ static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot, +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbeq_store_add_load_store__ptr)(intptr_t *v, intptr_t expectnot, long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[eq]) #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movq %[v], %%rbx\n\t" "cmpq %%rbx, %[expectnot]\n\t" - "je %l[cmpfail]\n\t" + "je %l[eq]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movq %[v], %%rbx\n\t" "cmpq %%rbx, %[expectnot]\n\t" "je %l[error2]\n\t" @@ -121,7 +124,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t e [load] "m" (*load) : "memory", "cc", "rax", "rbx" RSEQ_INJECT_CLOBBER - : abort, cmpfail + : abort, eq #ifdef RSEQ_COMPARE_TWICE , error1, error2 #endif @@ -132,7 +135,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t e rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; -cmpfail: +eq: rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE @@ -146,7 +149,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t e } static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_add_store__ptr)(intptr_t *v, intptr_t count, int cpu) { RSEQ_INJECT_C(9) @@ -156,11 +159,11 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* final store */ "addq %[count], %[v]\n\t" @@ -193,14 +196,10 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) #endif } -#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV +#define rseq_arch_has_load_add_load_load_add_store -/* - * pval = *(ptr+off) - * *pval += inc; - */ static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, long off, intptr_t inc, int cpu) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_add_load_load_add_store__ptr)(intptr_t *ptr, long off, intptr_t inc, int cpu) { RSEQ_INJECT_C(9) @@ -210,11 +209,11 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, long off, in RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* get p+v */ "movq %[ptr], %%rbx\n\t" @@ -240,18 +239,21 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, long off, in , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect, +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, intptr_t *v2, intptr_t expect2, intptr_t newv, int cpu) { @@ -259,28 +261,28 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t ex __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" - "jnz %l[cmpfail]\n\t" + "jne %l[ne]\n\t" RSEQ_INJECT_ASM(4) "cmpq %[v2], %[expect2]\n\t" - "jnz %l[cmpfail]\n\t" + "jne %l[ne]\n\t" RSEQ_INJECT_ASM(5) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" - "jnz %l[error2]\n\t" + "jne %l[error2]\n\t" "cmpq %[v2], %[expect2]\n\t" - "jnz %l[error3]\n\t" + "jne %l[error3]\n\t" #endif /* final store */ "movq %[newv], %[v]\n\t" @@ -299,7 +301,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t ex [newv] "r" (newv) : "memory", "cc", "rax" RSEQ_INJECT_CLOBBER - : abort, cmpfail + : abort, ne #ifdef RSEQ_COMPARE_TWICE , error1, error2, error3 #endif @@ -310,7 +312,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t ex rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; -cmpfail: +ne: rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE @@ -327,13 +329,13 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t ex } #endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && - (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ - (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect, +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store_store__ptr)(intptr_t *v, intptr_t expect, intptr_t *v2, intptr_t newv2, intptr_t newv, int cpu) { @@ -341,22 +343,22 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" - "jnz %l[cmpfail]\n\t" + "jne %l[ne]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" - "jnz %l[error2]\n\t" + "jne %l[error2]\n\t" #endif /* try store */ "movq %[newv2], %[v2]\n\t" @@ -378,7 +380,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t [newv] "r" (newv) : "memory", "cc", "rax" RSEQ_INJECT_CLOBBER - : abort, cmpfail + : abort, ne #ifdef RSEQ_COMPARE_TWICE , error1, error2 #endif @@ -389,7 +391,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; -cmpfail: +ne: rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE @@ -403,7 +405,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t } static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect, +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_memcpy_store__ptr)(intptr_t *v, intptr_t expect, void *dst, void *src, size_t len, intptr_t newv, int cpu) { @@ -413,7 +415,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) @@ -422,20 +424,20 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t "movq %[dst], %[rseq_scratch1]\n\t" "movq %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" - "jnz 5f\n\t" + "jne 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 6f) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 6f) "cmpq %[v], %[expect]\n\t" - "jnz 7f\n\t" + "jne 7f\n\t" #endif /* try memcpy */ "test %[len], %[len]\n\t" \ - "jz 333f\n\t" \ + "je 333f\n\t" \ "222:\n\t" \ "movb (%[src]), %%al\n\t" \ "movb %%al, (%[dst])\n\t" \ @@ -458,18 +460,18 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t "movq %[rseq_scratch1], %[dst]\n\t" "movq %[rseq_scratch0], %[src]\n\t", abort) - RSEQ_ASM_DEFINE_CMPFAIL(5, + RSEQ_ASM_DEFINE_TEARDOWN(5, "movq %[rseq_scratch2], %[len]\n\t" "movq %[rseq_scratch1], %[dst]\n\t" "movq %[rseq_scratch0], %[src]\n\t", - cmpfail) + ne) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_CMPFAIL(6, + RSEQ_ASM_DEFINE_TEARDOWN(6, "movq %[rseq_scratch2], %[len]\n\t" "movq %[rseq_scratch1], %[dst]\n\t" "movq %[rseq_scratch0], %[src]\n\t", error1) - RSEQ_ASM_DEFINE_CMPFAIL(7, + RSEQ_ASM_DEFINE_TEARDOWN(7, "movq %[rseq_scratch2], %[len]\n\t" "movq %[rseq_scratch1], %[dst]\n\t" "movq %[rseq_scratch0], %[src]\n\t", @@ -491,7 +493,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t [rseq_scratch2] "m" (rseq_scratch[2]) : "memory", "cc", "rax" RSEQ_INJECT_CLOBBER - : abort, cmpfail + : abort, ne #ifdef RSEQ_COMPARE_TWICE , error1, error2 #endif @@ -502,7 +504,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; -cmpfail: +ne: rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE @@ -516,36 +518,50 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t } #endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && - (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ #elif defined(__i386__) +/* + * On x86-32, use eax as scratch register and take memory operands as + * input to lessen register pressure. Especially needed when compiling + * in O0. + */ + #if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ - (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) { + /* + * ref_ip is used to store a reference instruction pointer + * for ip-relative addressing. + */ + struct rseq_local { + uint32_t ref_ip; + } rseq_local; + RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset]), %[ref_ip], RSEQ_ASM_REF_LABEL) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" - "jnz %l[cmpfail]\n\t" + "jne %l[ne]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" - "jnz %l[error2]\n\t" + "jne %l[error2]\n\t" #endif /* final store */ "movl %[newv], %[v]\n\t" @@ -557,10 +573,11 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, i [rseq_offset] "r" (rseq_offset), [v] "m" (*v), [expect] "r" (expect), - [newv] "r" (newv) + [newv] "r" (newv), + [ref_ip] "m" (rseq_local.ref_ip) : "memory", "cc", "eax" RSEQ_INJECT_CLOBBER - : abort, cmpfail + : abort, ne #ifdef RSEQ_COMPARE_TWICE , error1, error2 #endif @@ -571,7 +588,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, i rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; -cmpfail: +ne: rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE @@ -584,33 +601,37 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, i #endif } -/* - * Compare @v against @expectnot. When it does _not_ match, load @v - * into @load, and store the content of *@v + voffp into @v. - */ static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot, +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbeq_store_add_load_store__ptr)(intptr_t *v, intptr_t expectnot, long voffp, intptr_t *load, int cpu) { + /* + * ref_ip is used to store a reference instruction pointer + * for ip-relative addressing. + */ + struct rseq_local { + uint32_t ref_ip; + } rseq_local; + RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[eq]) #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset]), %[ref_ip], RSEQ_ASM_REF_LABEL) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[v], %%ebx\n\t" "cmpl %%ebx, %[expectnot]\n\t" - "je %l[cmpfail]\n\t" + "je %l[eq]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movl %[v], %%ebx\n\t" "cmpl %%ebx, %[expectnot]\n\t" "je %l[error2]\n\t" @@ -630,10 +651,11 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t e [v] "m" (*v), [expectnot] "r" (expectnot), [voffp] "ir" (voffp), - [load] "m" (*load) + [load] "m" (*load), + [ref_ip] "m" (rseq_local.ref_ip) : "memory", "cc", "eax", "ebx" RSEQ_INJECT_CLOBBER - : abort, cmpfail + : abort, eq #ifdef RSEQ_COMPARE_TWICE , error1, error2 #endif @@ -644,7 +666,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t e rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; -cmpfail: +eq: rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE @@ -658,8 +680,16 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t e } static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_add_store__ptr)(intptr_t *v, intptr_t count, int cpu) { + /* + * ref_ip is used to store a reference instruction pointer + * for ip-relative addressing. + */ + struct rseq_local { + uint32_t ref_ip; + } rseq_local; + RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( @@ -668,11 +698,11 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset]), %[ref_ip], RSEQ_ASM_REF_LABEL) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* final store */ "addl %[count], %[v]\n\t" @@ -684,7 +714,8 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), - [count] "ir" (count) + [count] "ir" (count), + [ref_ip] "m" (rseq_local.ref_ip) : "memory", "cc", "eax" RSEQ_INJECT_CLOBBER : abort @@ -706,36 +737,44 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) } static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect, +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_load_cbne_store__ptr)(intptr_t *v, intptr_t expect, intptr_t *v2, intptr_t expect2, intptr_t newv, int cpu) { + /* + * ref_ip is used to store a reference instruction pointer + * for ip-relative addressing. + */ + struct rseq_local { + uint32_t ref_ip; + } rseq_local; + RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset]), %[ref_ip], RSEQ_ASM_REF_LABEL) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" - "jnz %l[cmpfail]\n\t" + "jne %l[ne]\n\t" RSEQ_INJECT_ASM(4) "cmpl %[expect2], %[v2]\n\t" - "jnz %l[cmpfail]\n\t" + "jne %l[ne]\n\t" RSEQ_INJECT_ASM(5) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" - "jnz %l[error2]\n\t" + "jne %l[error2]\n\t" "cmpl %[expect2], %[v2]\n\t" - "jnz %l[error3]\n\t" + "jne %l[error3]\n\t" #endif "movl %[newv], %%eax\n\t" /* final store */ @@ -752,10 +791,11 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t ex /* final store input */ [v] "m" (*v), [expect] "r" (expect), - [newv] "m" (newv) + [newv] "m" (newv), + [ref_ip] "m" (rseq_local.ref_ip) : "memory", "cc", "eax" RSEQ_INJECT_CLOBBER - : abort, cmpfail + : abort, ne #ifdef RSEQ_COMPARE_TWICE , error1, error2, error3 #endif @@ -766,7 +806,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t ex rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; -cmpfail: +ne: rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE @@ -783,38 +823,46 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t ex } #endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && - (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ - (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect, +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_store_store__ptr)(intptr_t *v, intptr_t expect, intptr_t *v2, intptr_t newv2, intptr_t newv, int cpu) { + /* + * ref_ip is used to store a reference instruction pointer + * for ip-relative addressing. + */ + struct rseq_local { + uint32_t ref_ip; + } rseq_local; + RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset]), %[ref_ip], RSEQ_ASM_REF_LABEL) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %[v], %%eax\n\t" - "jnz %l[cmpfail]\n\t" + "jne %l[ne]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movl %[expect], %%eax\n\t" "cmpl %[v], %%eax\n\t" - "jnz %l[error2]\n\t" + "jne %l[error2]\n\t" #endif /* try store */ "movl %[newv2], %[v2]\n\t" @@ -836,10 +884,11 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t /* final store input */ [v] "m" (*v), [expect] "m" (expect), - [newv] "r" (newv) + [newv] "r" (newv), + [ref_ip] "m" (rseq_local.ref_ip) : "memory", "cc", "eax" RSEQ_INJECT_CLOBBER - : abort, cmpfail + : abort, ne #ifdef RSEQ_COMPARE_TWICE , error1, error2 #endif @@ -850,7 +899,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; -cmpfail: +ne: rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE @@ -866,101 +915,95 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t /* TODO: implement a faster memcpy. */ static inline __attribute__((always_inline)) -int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect, +int RSEQ_TEMPLATE_IDENTIFIER(rseq_load_cbne_memcpy_store__ptr)(intptr_t *v, intptr_t expect, void *dst, void *src, size_t len, intptr_t newv, int cpu) { - uint32_t rseq_scratch[3]; + /* + * Work-around register pressure limitations. + * Old gcc does not support output operands for asm goto, so + * input registers cannot simply be re-used as output registers. + * This is why clobbered registers are used. + * ref_ip is used to store a reference instruction pointer + * for ip-relative addressing. + */ + struct rseq_local { + uint32_t expect, dst, src, len, newv, ref_ip; + } rseq_local = { + .expect = (uint32_t) expect, + .dst = (uint32_t) dst, + .src = (uint32_t) src, + .len = (uint32_t) len, + .newv = (uint32_t) newv, + .ref_ip = 0, + }; RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[ne]) #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif - "movl %[src], %[rseq_scratch0]\n\t" - "movl %[dst], %[rseq_scratch1]\n\t" - "movl %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_ASM_CS_OFFSET(%[rseq_offset]), %[ref_ip], RSEQ_ASM_REF_LABEL) + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) - "movl %[expect], %%eax\n\t" - "cmpl %%eax, %[v]\n\t" - "jnz 5f\n\t" + /* load expect into ebx */ + "movl %[expect], %%ebx\n\t" + "cmpl %%ebx, %[v]\n\t" + "jne %l[ne]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 6f) - "movl %[expect], %%eax\n\t" - "cmpl %%eax, %[v]\n\t" - "jnz 7f\n\t" + RSEQ_ASM_CBNE_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_INDEX_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + "cmpl %%ebx, %[v]\n\t" + "jne %l[error2]\n\t" #endif /* try memcpy */ - "test %[len], %[len]\n\t" \ - "jz 333f\n\t" \ - "222:\n\t" \ - "movb (%[src]), %%al\n\t" \ - "movb %%al, (%[dst])\n\t" \ - "inc %[src]\n\t" \ - "inc %[dst]\n\t" \ - "dec %[len]\n\t" \ - "jnz 222b\n\t" \ - "333:\n\t" \ + /* load dst into ebx */ + "movl %[dst], %%ebx\n\t" + /* load src into ecx */ + "movl %[src], %%ecx\n\t" + /* load len into edx */ + "movl %[len], %%edx\n\t" + "test %%edx, %%edx\n\t" + "je 333f\n\t" + "222:\n\t" + "movb (%%ecx), %%al\n\t" + "movb %%al, (%%ebx)\n\t" + "inc %%ecx\n\t" + "inc %%ebx\n\t" + "dec %%edx\n\t" + "jnz 222b\n\t" + "333:\n\t" RSEQ_INJECT_ASM(5) #ifdef RSEQ_TEMPLATE_MO_RELEASE "lock; addl $0,-128(%%esp)\n\t" #endif - "movl %[newv], %%eax\n\t" + /* load newv into ebx */ + "movl %[newv], %%ebx\n\t" /* final store */ - "movl %%eax, %[v]\n\t" + "movl %%ebx, %[v]\n\t" "2:\n\t" RSEQ_INJECT_ASM(6) - /* teardown */ - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t" - RSEQ_ASM_DEFINE_ABORT(4, - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t", - abort) - RSEQ_ASM_DEFINE_CMPFAIL(5, - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t", - cmpfail) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_CMPFAIL(6, - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t", - error1) - RSEQ_ASM_DEFINE_CMPFAIL(7, - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t", - error2) -#endif + RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), - [expect] "m" (expect), - [newv] "m" (newv), /* try memcpy input */ - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len), - [rseq_scratch0] "m" (rseq_scratch[0]), - [rseq_scratch1] "m" (rseq_scratch[1]), - [rseq_scratch2] "m" (rseq_scratch[2]) - : "memory", "cc", "eax" + [expect] "m" (rseq_local.expect), /* ebx */ + [dst] "m" (rseq_local.dst), /* ebx */ + [src] "m" (rseq_local.src), /* ecx */ + [len] "m" (rseq_local.len), /* edx */ + [newv] "m" (rseq_local.newv), /* ebx */ + [ref_ip] "m" (rseq_local.ref_ip) + : "memory", "cc", "eax", "ebx", "ecx", "edx" RSEQ_INJECT_CLOBBER - : abort, cmpfail + : abort, ne #ifdef RSEQ_COMPARE_TWICE , error1, error2 #endif @@ -971,7 +1014,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; -cmpfail: +ne: rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE @@ -985,8 +1028,8 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t } #endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && - (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + (defined(RSEQ_TEMPLATE_INDEX_CPU_ID) || defined(RSEQ_TEMPLATE_INDEX_MM_CID)) */ #endif -#include "rseq-bits-reset.h" +#include "rseq/arch/templates/bits-reset.h" diff --git a/third_party/librseq/include/rseq/rseq-x86-thread-pointer.h b/third_party/librseq/include/rseq/arch/x86/thread-pointer.h similarity index 74% rename from third_party/librseq/include/rseq/rseq-x86-thread-pointer.h rename to third_party/librseq/include/rseq/arch/x86/thread-pointer.h index 2b67f69f93d9..bb17cd8d7041 100644 --- a/third_party/librseq/include/rseq/rseq-x86-thread-pointer.h +++ b/third_party/librseq/include/rseq/arch/x86/thread-pointer.h @@ -2,7 +2,7 @@ /* SPDX-FileCopyrightText: 2021 Mathieu Desnoyers */ /* - * rseq-x86-thread-pointer.h + * rseq/arch/x86/thread-pointer.h */ #ifndef _RSEQ_X86_THREAD_POINTER @@ -15,12 +15,14 @@ extern "C" { #endif #if __GNUC_PREREQ (11, 1) -static inline void *rseq_thread_pointer(void) +static inline __attribute__((always_inline)) +void *rseq_thread_pointer(void) { return __builtin_thread_pointer(); } #else -static inline void *rseq_thread_pointer(void) +static inline __attribute__((always_inline)) +void *rseq_thread_pointer(void) { void *__result; diff --git a/third_party/librseq/include/rseq/compiler.h b/third_party/librseq/include/rseq/compiler.h index 3f8eec50018c..9a202cde9246 100644 --- a/third_party/librseq/include/rseq/compiler.h +++ b/third_party/librseq/include/rseq/compiler.h @@ -7,13 +7,34 @@ * Work-around asm goto compiler bugs. */ -#ifndef RSEQ_COMPILER_H -#define RSEQ_COMPILER_H +#ifndef _RSEQ_COMPILER_H +#define _RSEQ_COMPILER_H #if defined __cplusplus # include /* for std::remove_cv */ #endif +#define rseq_likely(x) __builtin_expect(!!(x), 1) +#define rseq_unlikely(x) __builtin_expect(!!(x), 0) +#define rseq_barrier() __asm__ __volatile__("" : : : "memory") + +/* + * Instruct the compiler to perform only a single access to a variable + * (prohibits merging and refetching). The compiler is also forbidden to reorder + * successive instances of RSEQ_ACCESS_ONCE(), but only when the compiler is aware of + * particular ordering. Compiler ordering can be ensured, for example, by + * putting two RSEQ_ACCESS_ONCE() in separate C statements. + * + * This macro does absolutely -nothing- to prevent the CPU from reordering, + * merging, or refetching absolutely anything at any time. Its main intended + * use is to mediate communication between process-level code and irq/NMI + * handlers, all running on the same CPU. + */ +#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x)) + +#define RSEQ_WRITE_ONCE(x, v) __extension__ ({ RSEQ_ACCESS_ONCE(x) = (v); }) +#define RSEQ_READ_ONCE(x) RSEQ_ACCESS_ONCE(x) + /* * gcc prior to 4.8.2 miscompiles asm goto. * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 @@ -71,4 +92,15 @@ ) #endif -#endif /* RSEQ_COMPILER_H_ */ +/* + * RSEQ_PARAM_SELECT_ARG1 + * + * Select second argument. Use inside macros to implement optional last + * macro argument, such as: + * + * #define macro(_a, _b, _c, _optional...) \ + * RSEQ_PARAM_SELECT_ARG1(_, ##_optional, do_default_macro()) + */ +#define RSEQ_PARAM_SELECT_ARG1(_arg0, _arg1, ...) _arg1 + +#endif /* _RSEQ_COMPILER_H */ diff --git a/third_party/librseq/include/rseq/inject.h b/third_party/librseq/include/rseq/inject.h new file mode 100644 index 000000000000..ae44df1f6cc5 --- /dev/null +++ b/third_party/librseq/include/rseq/inject.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2016-2022 Mathieu Desnoyers */ + +/* + * rseq/inject.h + */ + +#ifndef _RSEQ_INJECT_H +#define _RSEQ_INJECT_H + +/* + * Empty code injection macros, override when testing. + * It is important to consider that the ASM injection macros need to be + * fully reentrant (e.g. do not modify the stack). + */ + +#ifndef RSEQ_INJECT_ASM +#define RSEQ_INJECT_ASM(n) +#endif + +#ifndef RSEQ_INJECT_C +#define RSEQ_INJECT_C(n) +#endif + +#ifndef RSEQ_INJECT_INPUT +#define RSEQ_INJECT_INPUT +#endif + +#ifndef RSEQ_INJECT_CLOBBER +#define RSEQ_INJECT_CLOBBER +#endif + +#ifndef RSEQ_INJECT_FAILED +#define RSEQ_INJECT_FAILED +#endif + +#endif /* _RSEQ_INJECT_H */ diff --git a/third_party/librseq/include/rseq/mempool.h b/third_party/librseq/include/rseq/mempool.h new file mode 100644 index 000000000000..b28d18f081f1 --- /dev/null +++ b/third_party/librseq/include/rseq/mempool.h @@ -0,0 +1,585 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2024 Mathieu Desnoyers */ +/* SPDX-FileCopyrightText: 2024 Olivier Dion */ + +#ifndef _RSEQ_MEMPOOL_H +#define _RSEQ_MEMPOOL_H + +#include +#include +#include +#include +#include + +/* + * rseq/mempool.h: rseq memory pool allocator. + * + * The rseq memory pool allocator can be configured as either a global + * allocator (default) or a per-CPU memory allocator. + * + * The rseq global memory allocator allows the application to request + * memory pools of global memory each of containing objects of a + * given size (rounded to next power of 2), reserving a given virtual + * address size of the requested stride. + * + * The rseq per-CPU memory allocator allows the application the request + * memory pools of CPU-Local memory each of containing objects of a + * given size (rounded to next power of 2), reserving a given virtual + * address size per CPU, for a given maximum number of CPUs. + * + * The per-CPU memory allocator is analogous to TLS (Thread-Local + * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU + * memory allocator provides CPU-Local Storage. + * + * Memory pool sets can be created by adding one or more pools into + * them. They can be used to perform allocation of variable length + * objects. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The percpu offset stride can be overridden by the user code. + * The stride *must* match for all objects belonging to a given pool + * between arguments to: + * + * - rseq_mempool_create(), + * - rseq_percpu_ptr(). + * - rseq_mempool_percpu_free(), + */ +#define RSEQ_MEMPOOL_STRIDE (1U << 16) /* stride: 64kB */ + +/* + * Tag pointers returned by: + * - rseq_mempool_percpu_malloc(), + * - rseq_mempool_percpu_zmalloc(), + * - rseq_mempool_percpu_malloc_init(), + * - rseq_mempool_set_percpu_malloc(), + * - rseq_mempool_set_percpu_zmalloc(), + * - rseq_mempool_set_percpu_malloc_init(). + * + * and passed as parameter to: + * - rseq_percpu_ptr(), + * - rseq_mempool_percpu_free(). + * + * with __rseq_percpu for use by static analyzers. + */ +#define __rseq_percpu + +struct rseq_mempool_attr; +struct rseq_mempool; + +/* + * rseq_mempool_create: Create a memory pool. + * + * Create a memory pool for items of size @item_len (rounded to + * next power of two). + * + * The @attr pointer used to specify the pool attributes. If NULL, use a + * default attribute values. The @attr can be destroyed immediately + * after rseq_mempool_create() returns. The caller keeps ownership + * of @attr. Default attributes select a per-cpu mempool type. + * + * The argument @pool_name can be used to give a name to the pool for + * debugging purposes. It can be NULL if no name is given. + * + * Returns a pointer to the created memory pool. Return NULL on error, + * with errno set accordingly: + * + * EINVAL: Invalid argument. + * ENOMEM: Not enough resources (memory or pool indexes) available to + * allocate pool. + * + * In addition, if the attr mmap callback fails, NULL is returned and + * errno is propagated from the callback. The default callback can + * return errno=ENOMEM. + * + * This API is MT-safe. + */ +struct rseq_mempool *rseq_mempool_create(const char *pool_name, + size_t item_len, const struct rseq_mempool_attr *attr); + +/* + * rseq_mempool_destroy: Destroy a per-cpu memory pool. + * + * Destroy a per-cpu memory pool, unmapping its memory and removing the + * pool entry from the global index. No pointers allocated from the + * pool should be used when it is destroyed. This includes rseq_percpu_ptr(). + * + * Argument @pool is a pointer to the per-cpu pool to destroy. + * + * Return values: 0 on success, -1 on error, with errno set accordingly: + * + * ENOENT: Trying to free a pool which was not allocated. + * + * If the munmap_func callback fails, -1 is returned and errno is + * propagated from the callback. The default callback can return + * errno=EINVAL. + * + * This API is MT-safe. + */ +int rseq_mempool_destroy(struct rseq_mempool *pool); + +/* + * rseq_mempool_percpu_malloc: Allocate memory from a per-cpu pool. + * + * Allocate an item from a per-cpu @pool. The allocation will reserve + * an item of the size specified by @item_len (rounded to next power of + * two) at pool creation. This effectively reserves space for this item + * on all CPUs. + * + * On success, return a "__rseq_percpu" encoded pointer to the pool + * item. This encoded pointer is meant to be passed to rseq_percpu_ptr() + * to be decoded to a valid address before being accessed. + * + * Return NULL (errno=ENOMEM) if there is not enough space left in the + * pool to allocate an item. + * + * This API is MT-safe. + */ +void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool); + +/* + * rseq_mempool_percpu_zmalloc: Allocate zero-initialized memory from a per-cpu pool. + * + * Allocate memory for an item within the pool, and zero-initialize its + * memory on all CPUs. See rseq_mempool_percpu_malloc for details. + * + * This API is MT-safe. + */ +void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool); + +/* + * rseq_mempool_percpu_malloc_init: Allocate initialized memory from a per-cpu pool. + * + * Allocate memory for an item within the pool, and initialize its + * memory on all CPUs with content from @init_ptr of length @init_len. + * See rseq_mempool_percpu_malloc for details. + * + * Return NULL (errno=ENOMEM) if there is not enough space left in the + * pool to allocate an item. Return NULL (errno=EINVAL) if init_len is + * larger than the pool item_len. + * + * This API is MT-safe. + */ +void __rseq_percpu *rseq_mempool_percpu_malloc_init(struct rseq_mempool *pool, + void *init_ptr, size_t init_len); + +/* + * rseq_mempool_malloc: Allocate memory from a global pool. + * + * Wrapper to allocate memory from a global pool, which can be + * used directly without per-cpu indexing. Would normally be used + * with pools created with max_nr_cpus=1. + */ +static inline +void *rseq_mempool_malloc(struct rseq_mempool *pool) +{ + return (void *) rseq_mempool_percpu_malloc(pool); +} + +/* + * rseq_mempool_zmalloc: Allocate zero-initialized memory from a global pool. + * + * Wrapper to allocate memory from a global pool, which can be + * used directly without per-cpu indexing. Would normally be used + * with pools created with max_nr_cpus=1. + */ +static inline +void *rseq_mempool_zmalloc(struct rseq_mempool *pool) +{ + return (void *) rseq_mempool_percpu_zmalloc(pool); +} + +/* + * rseq_mempool_malloc_init: Allocate initialized memory from a global pool. + * + * Wrapper to allocate memory from a global pool, which can be + * used directly without per-cpu indexing. Would normally be used + * with pools created with max_nr_cpus=1. + */ +static inline +void *rseq_mempool_malloc_init(struct rseq_mempool *pool, + void *init_ptr, size_t init_len) +{ + return (void *) rseq_mempool_percpu_malloc_init(pool, init_ptr, init_len); +} + +/* + * rseq_mempool_percpu_free: Free memory from a per-cpu pool. + * + * Free an item pointed to by @ptr from its per-cpu pool. + * + * The @ptr argument is a __rseq_percpu encoded pointer returned by + * either: + * + * - rseq_mempool_percpu_malloc(), + * - rseq_mempool_percpu_zmalloc(), + * - rseq_mempool_percpu_malloc_init(), + * - rseq_mempool_set_percpu_malloc(), + * - rseq_mempool_set_percpu_zmalloc(), + * - rseq_mempool_set_percpu_malloc_init(). + * + * The @stride optional argument to rseq_percpu_free() is a configurable + * stride, which must match the stride received by pool creation. + * If the argument is not present, use the default RSEQ_MEMPOOL_STRIDE. + * + * This API is MT-safe. + */ +void librseq_mempool_percpu_free(void __rseq_percpu *ptr, size_t stride); + +#define rseq_mempool_percpu_free(_ptr, _stride...) \ + librseq_mempool_percpu_free(_ptr, RSEQ_PARAM_SELECT_ARG1(_, ##_stride, RSEQ_MEMPOOL_STRIDE)) + +/* + * rseq_free: Free memory from a global pool. + * + * Free an item pointed to by @ptr from its global pool. Would normally + * be used with pools created with max_nr_cpus=1. + * + * The @ptr argument is a pointer returned by either: + * + * - rseq_mempool_malloc(), + * - rseq_mempool_zmalloc(), + * - rseq_mempool_malloc_init(), + * - rseq_mempool_set_malloc(), + * - rseq_mempool_set_zmalloc(), + * - rseq_mempool_set_malloc_init(). + * + * The @stride optional argument to rseq_free() is a configurable + * stride, which must match the stride received by pool creation. If + * the argument is not present, use the default RSEQ_MEMPOOL_STRIDE. + * The stride is needed even for a global pool to know the mapping + * address range. + * + * This API is MT-safe. + */ +#define rseq_mempool_free(_ptr, _stride...) \ + librseq_mempool_percpu_free((void __rseq_percpu *) _ptr, RSEQ_PARAM_SELECT_ARG1(_, ##_stride, RSEQ_MEMPOOL_STRIDE)) + +/* + * rseq_percpu_ptr: Offset a per-cpu pointer for a given CPU. + * + * Offset a per-cpu pointer @ptr to get the associated pointer for the + * given @cpu. The @ptr argument is a __rseq_percpu pointer returned by + * either: + * + * - rseq_mempool_percpu_malloc(), + * - rseq_mempool_percpu_zmalloc(), + * - rseq_mempool_percpu_malloc_init(), + * - rseq_mempool_set_percpu_malloc(), + * - rseq_mempool_set_percpu_zmalloc(), + * - rseq_mempool_set_percpu_malloc_init(). + * + * The macro rseq_percpu_ptr() preserves the type of the @ptr parameter + * for the returned pointer, but removes the __rseq_percpu annotation. + * + * The macro rseq_percpu_ptr() takes an optional @stride argument. If + * the argument is not present, use the default RSEQ_MEMPOOL_STRIDE. + * This must match the stride used for pool creation. + * + * This API is MT-safe. + */ +#define rseq_percpu_ptr(_ptr, _cpu, _stride...) \ + ((__typeof__(*(_ptr)) *) ((uintptr_t) (_ptr) + \ + ((unsigned int) (_cpu) * \ + (uintptr_t) RSEQ_PARAM_SELECT_ARG1(_, ##_stride, RSEQ_MEMPOOL_STRIDE)))) + +/* + * rseq_mempool_set_create: Create a pool set. + * + * Create a set of pools. Its purpose is to offer a memory allocator API + * for variable-length items (e.g. variable length strings). When + * created, the pool set has no pool. Pools can be created and added to + * the set. One common approach would be to create pools for each + * relevant power of two allocation size useful for the application. + * Only one pool can be added to the pool set for each power of two + * allocation size. + * + * Returns a pool set pointer on success, else returns NULL with + * errno=ENOMEM (out of memory). + * + * This API is MT-safe. + */ +struct rseq_mempool_set *rseq_mempool_set_create(void); + +/* + * rseq_mempool_set_destroy: Destroy a pool set. + * + * Destroy a pool set and its associated resources. The pools that were + * added to the pool set are destroyed as well. + * + * Returns 0 on success, -1 on failure (or partial failure), with errno + * set by rseq_percpu_pool_destroy(). Using a pool set after destroy + * failure is undefined. + * + * This API is MT-safe. + */ +int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set); + +/* + * rseq_mempool_set_add_pool: Add a pool to a pool set. + * + * Add a @pool to the @pool_set. On success, its ownership is handed + * over to the pool set, so the caller should not destroy it explicitly. + * Only one pool can be added to the pool set for each power of two + * allocation size. + * + * Returns 0 on success, -1 on error with the following errno: + * - EBUSY: A pool already exists in the pool set for this power of two + * allocation size. + * + * This API is MT-safe. + */ +int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, + struct rseq_mempool *pool); + +/* + * rseq_mempool_set_percpu_malloc: Allocate memory from a per-cpu pool set. + * + * Allocate an item from a per-cpu @pool. The allocation will reserve + * an item of the size specified by @len (rounded to next power of + * two). This effectively reserves space for this item on all CPUs. + * + * The space reservation will search for the smallest pool within + * @pool_set which respects the following conditions: + * + * - it has an item size large enough to fit @len, + * - it has space available. + * + * On success, return a "__rseq_percpu" encoded pointer to the pool + * item. This encoded pointer is meant to be passed to rseq_percpu_ptr() + * to be decoded to a valid address before being accessed. + * + * Return NULL (errno=ENOMEM) if there is not enough space left in the + * pool to allocate an item. + * + * This API is MT-safe. + */ +void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len); + +/* + * rseq_mempool_set_percpu_zmalloc: Allocate zero-initialized memory from a per-cpu pool set. + * + * Allocate memory for an item within the pool, and zero-initialize its + * memory on all CPUs. See rseq_mempool_set_percpu_malloc for details. + * + * This API is MT-safe. + */ +void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len); + +/* + * rseq_mempool_set_percpu_malloc_init: Allocate initialized memory from a per-cpu pool set. + * + * Allocate memory for an item within the pool, and initialize its + * memory on all CPUs with content from @init_ptr of length @len. + * See rseq_mempool_set_percpu_malloc for details. + * + * This API is MT-safe. + */ +void __rseq_percpu *rseq_mempool_set_percpu_malloc_init(struct rseq_mempool_set *pool_set, + void *init_ptr, size_t len); + +/* + * rseq_mempool_set_malloc: Allocate memory from a global pool set. + * + * Wrapper to allocate memory from a global pool, which can be + * used directly without per-cpu indexing. Would normally be used + * with pools created with max_nr_cpus=1. + */ +static inline +void *rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, size_t len) +{ + return (void *) rseq_mempool_set_percpu_malloc(pool_set, len); +} + +/* + * rseq_mempool_set_zmalloc: Allocate zero-initialized memory from a global pool set. + * + * Wrapper to allocate memory from a global pool, which can be + * used directly without per-cpu indexing. Would normally be used + * with pools created with max_nr_cpus=1. + */ +static inline +void *rseq_mempool_set_zmalloc(struct rseq_mempool_set *pool_set, size_t len) +{ + return (void *) rseq_mempool_set_percpu_zmalloc(pool_set, len); +} + +/* + * rseq_mempool_set_malloc_init: Allocate initialized memory from a global pool set. + * + * Wrapper to allocate memory from a global pool, which can be + * used directly without per-cpu indexing. Would normally be used + * with pools created with max_nr_cpus=1. + */ +static inline +void *rseq_mempool_set_malloc_init(struct rseq_mempool_set *pool_set, void *init_ptr, size_t len) +{ + return (void *) rseq_mempool_set_percpu_malloc_init(pool_set, init_ptr, len); +} + +/* + * rseq_mempool_attr_create: Create a pool attribute structure. + */ +struct rseq_mempool_attr *rseq_mempool_attr_create(void); + +/* + * rseq_mempool_attr_destroy: Destroy a pool attribute structure. + */ +void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr); + +/* + * rseq_mempool_attr_set_init: Set pool attribute structure memory init functions. + * + * The @init_func callback used to initialized memory after allocation + * for the pool. The @cpu argument of @init_func, if >= 0, is the cpu to + * which belongs the range starting at @addr of length @len. If cpu is + * -1, it means the range belongs to a global pool. The @init_func + * callback must return 0 on success, -1 on error with errno set. If + * @init_func returns failure, the allocation of the pool memory fails, + * which either causes the pool creation to fail or memory allocation to + * fail (for extensible memory pools). + * + * The @init_priv argument is a private data pointer passed to the + * @init_func callback. + * + * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid. + */ +int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr, + int (*init_func)(void *priv, void *addr, size_t len, int cpu), + void *init_priv); + +/* + * rseq_mempool_attr_set_robust: Set pool robust attribute. + * + * The robust pool attribute enables runtime validation of the pool: + * + * - Check for double-free of pointers. + * + * - Detect memory leaks on pool destruction. + * + * - Detect free-list corruption on pool destruction. + * + * - Detect poison value corruption on allocation and pool destruction. + * + * There is a marginal runtime overhead on malloc/free operations. + * + * The memory overhead is (pool->percpu_len / pool->item_len) / CHAR_BIT + * bytes, plus one additional stride range for a separate free list, + * over the lifetime of the pool. + * + * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid. + */ +int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr); + +/* + * rseq_mempool_attr_set_percpu: Set pool type as percpu. + * + * A pool created with this type is a per-cpu memory pool. The reserved + * allocation size is @stride, and the maximum CPU value expected + * is (@max_nr_cpus - 1). A @stride of 0 uses the default + * RSEQ_MEMPOOL_STRIDE. + * + * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid. + */ +int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr, + size_t stride, int max_nr_cpus); + +/* + * rseq_mempool_attr_set_global: Set pool type as global. + * + * A pool created with this type is a global memory pool. The reserved + * allocation size is @stride. A @stride of 0 uses the default + * RSEQ_MEMPOOL_STRIDE. + * + * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid. + */ +int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr, size_t stride); + +/* + * rseq_mempool_attr_set_max_nr_ranges: Set upper-limit to range allocation. + * + * Set an upper-limit to range allocation. A @max_nr_ranges value of + * 0 means no limit (default). + * + * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid. + */ +int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr, + unsigned long max_nr_ranges); + +/* + * rseq_mempool_attr_set_poison: Set pool poison value. + * + * Set a poison value to be set over freed pool entries. This can be + * used to anonymize freed memory, and for memory corruption checks + * with the robust attribute. + * + * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid. + */ +int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr, + uintptr_t poison); + +enum rseq_mempool_populate_policy { + /* + * RSEQ_MEMPOOL_POPULATE_COW_INIT (default): + * Rely on copy-on-write (COW) of per-cpu pages to populate + * per-cpu pages from the initial values pages on first write. + * Note that this type of pool cannot be accessed from + * children processes across fork. It is however valid to + * destroy a pool from a child process after a fork to free + * its remaining resources. + */ + RSEQ_MEMPOOL_POPULATE_COW_INIT = 0, + + /* + * RSEQ_MEMPOOL_POPULATE_COW_ZERO: + * Rely on copy-on-write (COW) of per-cpu pages to populate + * per-cpu pages from the zero page on first write. As long + * as the user only uses malloc, zmalloc, or malloc_init with + * zeroed content to allocate items, it does not trigger COW + * of per-cpu pages, leaving in place the zero page until an + * active CPU writes to its per-cpu item. The recommended (and + * default) poison value for this pool policy is 0 to prevent + * useless COW page allocation. + */ + RSEQ_MEMPOOL_POPULATE_COW_ZERO = 1, +}; + +/* + * rseq_mempool_attr_set_populate_policy: Set pool page populate policy. + * + * Set page populate policy for the mempool. + * + * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid. + */ +int rseq_mempool_attr_set_populate_policy(struct rseq_mempool_attr *attr, + enum rseq_mempool_populate_policy policy); + +/* + * rseq_mempool_range_init_numa: NUMA initialization helper for memory range. + * + * Helper which can be used from mempool_attr @init_func to move a CPU + * memory range to the NUMA node associated to its topology. + * + * Returns 0 on success, -1 with errno set by move_pages(2) on error. + * Returns -1, errno=ENOSYS if NUMA support is not present. + */ +int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags); + +/* + * rseq_mempool_get_max_nr_cpus: Get the max_nr_cpus value configured for a pool. + * + * Returns a value >= 0 for a per-cpu pool. + * Returns -1, errno=EINVAL if the mempool is NULL or if the pool has a + * global pool type. + */ +int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool); + +#ifdef __cplusplus +} +#endif + +#endif /* _RSEQ_MEMPOOL_H */ diff --git a/third_party/librseq/include/rseq/pseudocode.h b/third_party/librseq/include/rseq/pseudocode.h new file mode 100644 index 000000000000..c6fb9494475e --- /dev/null +++ b/third_party/librseq/include/rseq/pseudocode.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2024 Mathieu Desnoyers */ + +/* + * rseq/pseudocode.h + * + * This file contains the pseudo-code of rseq critical section helpers, + * to be used as reference for architecture implementation. + */ + +#ifndef _RSEQ_PSEUDOCODE_H +#define _RSEQ_PSEUDOCODE_H + +/* + * Pseudo-code conventions: + * + * rX: Register X + * [var]: Register associated with C variable "var". + * [label]: Jump target associated with C label "label". + * + * load(rX, address): load from memory address to rX + * store(rX, address): store to memory address from rX + * cbne(rX, rY, target): compare-and-branch to target if rX != rY + * cbeq(rX, rY, target): compare-and-branch to target if rX == rY + * add(rX, rY): add rY to register rX + * memcpy(dest_address, src_address, len): copy len bytes from src_address to dst_address + * + * Critical section helpers identifier convention: + * - Begin with an "rseq_" prefix, + * - Followed by their simplified pseudo-code, + * - Followed by __ and the type (or eventually types) on which the API + * applies (similar to the approach taken for C++ mangling). + */ + +/* + * rseq_load_cbne_store(v, expect, newv) + * + * Pseudo-code: + * load(r1, [v]) + * cbne(r1, [expect], [ne]) + * store([newv], [v]) + * + * Return values: + * success: 0 + * ne: 1 + * abort: -1 + */ + +/* + * rseq_load_add_store(v, count) + * + * Pseudo-code: + * load(r1, [v]) + * add(r1, [count]) + * store(r1, [v]) + * + * Return values: + * success: 0 + * abort: -1 + */ + +/* + * rseq_load_cbeq_store_add_load_store(v, expectnot, voffp, load) + * + * Pseudo-code: + * load(r1, [v]) + * cbeq(r1, [expectnot], [eq]) + * store(r1, [load]) + * add(r1, [voffp]) + * load(r2, r1) + * store(r2, [v]) + * + * Return values: + * success: 0 + * eq: 1 + * abort: -1 + */ + +/* + * rseq_load_add_load_load_add_store(ptr, off, inc) + * + * Pseudo-code: + * load(r1, [ptr]) + * add(r1, [off]) + * load(r2, r1) + * load(r3, r2) + * add(r3, [inc]) + * store(r3, r2) + * + * Return values: + * success: 0 + * abort: -1 + */ + +/* + * rseq_load_cbne_load_cbne_store(v, expect, v2, expect2, newv) + * + * Pseudo-code: + * load(r1, [v]) + * cbne(r1, [expect], [ne]) + * load(r2, [v2]) + * cbne(r2, [expect2], [ne]) + * store([newv], [v]) + * + * Return values: + * success: 0 + * ne: 1 + * abort: -1 + */ + +/* + * rseq_load_cbne_store_store(v, expect, v2, newv2, newv) + * + * Pseudo-code: + * load(r1, [v]) + * cbne(r1, [expect], [ne]) + * store([newv2], [v2]) // Store attempt + * store([newv], [v]) // Final store + * + * Return values: + * success: 0 + * ne: 1 + * abort: -1 + */ + +/* + * rseq_load_cbne_memcpy_store(v, expect, dst, src, len, newv) + * + * Pseudo-code: + * load(r1, [v]) + * cbne(r1, [expect], [ne]) + * memcpy([dst], [src], [len]) // Memory copy attempt + * store([newv], [v]) // Final store + * + * Return values: + * success: 0 + * ne: 1 + * abort: -1 + */ + +#endif /* _RSEQ_PSEUDOCODE_H */ diff --git a/third_party/librseq/include/rseq/rseq-bits-reset.h b/third_party/librseq/include/rseq/rseq-bits-reset.h deleted file mode 100644 index 1a45c2d2dbb5..000000000000 --- a/third_party/librseq/include/rseq/rseq-bits-reset.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * rseq-bits-reset.h - * - * (C) Copyright 2016-2022 - Mathieu Desnoyers - */ - -#undef RSEQ_TEMPLATE_IDENTIFIER -#undef RSEQ_TEMPLATE_CPU_ID_FIELD -#undef RSEQ_TEMPLATE_CPU_ID_OFFSET -#undef RSEQ_TEMPLATE_SUFFIX diff --git a/third_party/librseq/include/rseq/rseq-bits-template.h b/third_party/librseq/include/rseq/rseq-bits-template.h deleted file mode 100644 index d65c611c9177..000000000000 --- a/third_party/librseq/include/rseq/rseq-bits-template.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * rseq-bits-template.h - * - * (C) Copyright 2016-2022 - Mathieu Desnoyers - */ - -#ifdef RSEQ_TEMPLATE_CPU_ID -# define RSEQ_TEMPLATE_CPU_ID_OFFSET RSEQ_CPU_ID_OFFSET -# define RSEQ_TEMPLATE_CPU_ID_FIELD cpu_id -# ifdef RSEQ_TEMPLATE_MO_RELEASE -# define RSEQ_TEMPLATE_SUFFIX _release_cpu_id -# elif defined (RSEQ_TEMPLATE_MO_RELAXED) -# define RSEQ_TEMPLATE_SUFFIX _relaxed_cpu_id -# else -# error "Never use directly; include instead." -# endif -#elif defined(RSEQ_TEMPLATE_MM_CID) -# define RSEQ_TEMPLATE_CPU_ID_OFFSET RSEQ_MM_CID_OFFSET -# define RSEQ_TEMPLATE_CPU_ID_FIELD mm_cid -# ifdef RSEQ_TEMPLATE_MO_RELEASE -# define RSEQ_TEMPLATE_SUFFIX _release_mm_cid -# elif defined (RSEQ_TEMPLATE_MO_RELAXED) -# define RSEQ_TEMPLATE_SUFFIX _relaxed_mm_cid -# else -# error "Never use directly; include instead." -# endif -#elif defined (RSEQ_TEMPLATE_CPU_ID_NONE) -# ifdef RSEQ_TEMPLATE_MO_RELEASE -# define RSEQ_TEMPLATE_SUFFIX _release -# elif defined (RSEQ_TEMPLATE_MO_RELAXED) -# define RSEQ_TEMPLATE_SUFFIX _relaxed -# else -# error "Never use directly; include instead." -# endif -#else -# error "Never use directly; include instead." -#endif - -#define RSEQ_TEMPLATE_IDENTIFIER(x) RSEQ_COMBINE_TOKENS(x, RSEQ_TEMPLATE_SUFFIX) - diff --git a/third_party/librseq/include/rseq/rseq-thread-pointer.h b/third_party/librseq/include/rseq/rseq-thread-pointer.h deleted file mode 100644 index a64fb0f3add6..000000000000 --- a/third_party/librseq/include/rseq/rseq-thread-pointer.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* SPDX-FileCopyrightText: 2021 Mathieu Desnoyers */ - -/* - * rseq-thread-pointer.h - */ - -#ifndef _RSEQ_THREAD_POINTER -#define _RSEQ_THREAD_POINTER - -#if defined(__x86_64__) || defined(__i386__) -#include -#elif defined(__PPC__) -#include -#else -#include -#endif - -#endif diff --git a/third_party/librseq/include/rseq/rseq-x86.h b/third_party/librseq/include/rseq/rseq-x86.h deleted file mode 100644 index 9672cb8b379c..000000000000 --- a/third_party/librseq/include/rseq/rseq-x86.h +++ /dev/null @@ -1,234 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* SPDX-FileCopyrightText: 2016-2018 Mathieu Desnoyers */ - -/* - * rseq-x86.h - */ - -#ifndef RSEQ_H -#error "Never use directly; include instead." -#endif - -#include - -/* - * RSEQ_SIG is used with the following reserved undefined instructions, which - * trap in user-space: - * - * x86-32: 0f b9 3d 53 30 05 53 ud1 0x53053053,%edi - * x86-64: 0f b9 3d 53 30 05 53 ud1 0x53053053(%rip),%edi - */ -#define RSEQ_SIG 0x53053053 - -/* - * Due to a compiler optimization bug in gcc-8 with asm goto and TLS asm input - * operands, we cannot use "m" input operands, and rather pass the __rseq_abi - * address through a "r" input operand. - */ - -/* Offset of cpu_id, rseq_cs, and mm_cid fields in struct rseq. */ -#define RSEQ_CPU_ID_OFFSET 4 -#define RSEQ_CS_OFFSET 8 -#define RSEQ_MM_CID_OFFSET 24 - -#ifdef __x86_64__ - -#define RSEQ_ASM_TP_SEGMENT %%fs - -#define rseq_smp_mb() \ - __asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") -#define rseq_smp_rmb() rseq_barrier() -#define rseq_smp_wmb() rseq_barrier() - -#define rseq_smp_load_acquire(p) \ -__extension__ ({ \ - rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ - rseq_barrier(); \ - ____p1; \ -}) - -#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() - -#define rseq_smp_store_release(p, v) \ -do { \ - rseq_barrier(); \ - RSEQ_WRITE_ONCE(*(p), v); \ -} while (0) - -#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ - start_ip, post_commit_offset, abort_ip) \ - ".pushsection __rseq_cs, \"aw\"\n\t" \ - ".balign 32\n\t" \ - __rseq_str(label) ":\n\t" \ - ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ - ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \ - ".popsection\n\t" \ - ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \ - ".quad " __rseq_str(label) "b\n\t" \ - ".popsection\n\t" - - -#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ - __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ - (post_commit_ip - start_ip), abort_ip) - -/* - * Exit points of a rseq critical section consist of all instructions outside - * of the critical section where a critical section can either branch to or - * reach through the normal course of its execution. The abort IP and the - * post-commit IP are already part of the __rseq_cs section and should not be - * explicitly defined as additional exit points. Knowing all exit points is - * useful to assist debuggers stepping over the critical section. - */ -#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \ - ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \ - ".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n\t" \ - ".popsection\n\t" - -#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ - RSEQ_INJECT_ASM(1) \ - "leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t" \ - "movq %%rax, " __rseq_str(rseq_cs) "\n\t" \ - __rseq_str(label) ":\n\t" - -#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ - RSEQ_INJECT_ASM(2) \ - "cmpl %[" __rseq_str(cpu_id) "], " __rseq_str(current_cpu_id) "\n\t" \ - "jnz " __rseq_str(label) "\n\t" - -#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \ - ".pushsection __rseq_failure, \"ax\"\n\t" \ - /* Disassembler-friendly signature: ud1 (%rip),%edi. */ \ - ".byte 0x0f, 0xb9, 0x3d\n\t" \ - ".long " __rseq_str(RSEQ_SIG) "\n\t" \ - __rseq_str(label) ":\n\t" \ - teardown \ - "jmp %l[" __rseq_str(abort_label) "]\n\t" \ - ".popsection\n\t" - -#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \ - ".pushsection __rseq_failure, \"ax\"\n\t" \ - __rseq_str(label) ":\n\t" \ - teardown \ - "jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \ - ".popsection\n\t" - -#elif defined(__i386__) - -#define RSEQ_ASM_TP_SEGMENT %%gs - -#define rseq_smp_mb() \ - __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc") -#define rseq_smp_rmb() \ - __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc") -#define rseq_smp_wmb() \ - __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc") - -#define rseq_smp_load_acquire(p) \ -__extension__ ({ \ - rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ - rseq_smp_mb(); \ - ____p1; \ -}) - -#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() - -#define rseq_smp_store_release(p, v) \ -do { \ - rseq_smp_mb(); \ - RSEQ_WRITE_ONCE(*p, v); \ -} while (0) - -/* - * Use eax as scratch register and take memory operands as input to - * lessen register pressure. Especially needed when compiling in O0. - */ -#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ - start_ip, post_commit_offset, abort_ip) \ - ".pushsection __rseq_cs, \"aw\"\n\t" \ - ".balign 32\n\t" \ - __rseq_str(label) ":\n\t" \ - ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ - ".long " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \ - ".popsection\n\t" \ - ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \ - ".long " __rseq_str(label) "b, 0x0\n\t" \ - ".popsection\n\t" - -#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ - __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ - (post_commit_ip - start_ip), abort_ip) - -/* - * Exit points of a rseq critical section consist of all instructions outside - * of the critical section where a critical section can either branch to or - * reach through the normal course of its execution. The abort IP and the - * post-commit IP are already part of the __rseq_cs section and should not be - * explicitly defined as additional exit points. Knowing all exit points is - * useful to assist debuggers stepping over the critical section. - */ -#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \ - ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \ - ".long " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) ", 0x0\n\t" \ - ".popsection\n\t" - -#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ - RSEQ_INJECT_ASM(1) \ - "movl $" __rseq_str(cs_label) ", " __rseq_str(rseq_cs) "\n\t" \ - __rseq_str(label) ":\n\t" - -#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ - RSEQ_INJECT_ASM(2) \ - "cmpl %[" __rseq_str(cpu_id) "], " __rseq_str(current_cpu_id) "\n\t" \ - "jnz " __rseq_str(label) "\n\t" - -#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \ - ".pushsection __rseq_failure, \"ax\"\n\t" \ - /* Disassembler-friendly signature: ud1 ,%edi. */ \ - ".byte 0x0f, 0xb9, 0x3d\n\t" \ - ".long " __rseq_str(RSEQ_SIG) "\n\t" \ - __rseq_str(label) ":\n\t" \ - teardown \ - "jmp %l[" __rseq_str(abort_label) "]\n\t" \ - ".popsection\n\t" - -#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \ - ".pushsection __rseq_failure, \"ax\"\n\t" \ - __rseq_str(label) ":\n\t" \ - teardown \ - "jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \ - ".popsection\n\t" - -#endif - -/* Per-cpu-id indexing. */ - -#define RSEQ_TEMPLATE_CPU_ID -#define RSEQ_TEMPLATE_MO_RELAXED -#include "rseq-x86-bits.h" -#undef RSEQ_TEMPLATE_MO_RELAXED - -#define RSEQ_TEMPLATE_MO_RELEASE -#include "rseq-x86-bits.h" -#undef RSEQ_TEMPLATE_MO_RELEASE -#undef RSEQ_TEMPLATE_CPU_ID - -/* Per-mm-cid indexing. */ - -#define RSEQ_TEMPLATE_MM_CID -#define RSEQ_TEMPLATE_MO_RELAXED -#include "rseq-x86-bits.h" -#undef RSEQ_TEMPLATE_MO_RELAXED - -#define RSEQ_TEMPLATE_MO_RELEASE -#include "rseq-x86-bits.h" -#undef RSEQ_TEMPLATE_MO_RELEASE -#undef RSEQ_TEMPLATE_MM_CID - -/* APIs which are not based on cpu ids. */ - -#define RSEQ_TEMPLATE_CPU_ID_NONE -#define RSEQ_TEMPLATE_MO_RELAXED -#include "rseq-x86-bits.h" -#undef RSEQ_TEMPLATE_MO_RELAXED -#undef RSEQ_TEMPLATE_CPU_ID_NONE diff --git a/third_party/librseq/include/rseq/rseq.h b/third_party/librseq/include/rseq/rseq.h index 080508e2988d..d8c7ef3f3816 100644 --- a/third_party/librseq/include/rseq/rseq.h +++ b/third_party/librseq/include/rseq/rseq.h @@ -2,11 +2,11 @@ /* SPDX-FileCopyrightText: 2016-2022 Mathieu Desnoyers */ /* - * rseq.h + * rseq/rseq.h */ -#ifndef RSEQ_H -#define RSEQ_H +#ifndef _RSEQ_RSEQ_H +#define _RSEQ_RSEQ_H #include #include @@ -18,42 +18,31 @@ #include #include #include -#include -#include - -#ifndef rseq_sizeof_field -#define rseq_sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) -#endif - -#ifndef rseq_offsetofend -#define rseq_offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + rseq_sizeof_field(TYPE, MEMBER)) -#endif -/* - * Empty code injection macros, override when testing. - * It is important to consider that the ASM injection macros need to be - * fully reentrant (e.g. do not modify the stack). - */ -#ifndef RSEQ_INJECT_ASM -#define RSEQ_INJECT_ASM(n) -#endif - -#ifndef RSEQ_INJECT_C -#define RSEQ_INJECT_C(n) -#endif +#include +#include +#include +#include +#include -#ifndef RSEQ_INJECT_INPUT -#define RSEQ_INJECT_INPUT -#endif +enum rseq_mo { + RSEQ_MO_RELAXED = 0, + RSEQ_MO_CONSUME = 1, /* Unused */ + RSEQ_MO_ACQUIRE = 2, /* Unused */ + RSEQ_MO_RELEASE = 3, + RSEQ_MO_ACQ_REL = 4, /* Unused */ + RSEQ_MO_SEQ_CST = 5, /* Unused */ +}; -#ifndef RSEQ_INJECT_CLOBBER -#define RSEQ_INJECT_CLOBBER -#endif +enum rseq_percpu_mode { + RSEQ_PERCPU_CPU_ID = 0, + RSEQ_PERCPU_MM_CID = 1, +}; -#ifndef RSEQ_INJECT_FAILED -#define RSEQ_INJECT_FAILED -#endif +enum rseq_available_query { + RSEQ_AVAILABLE_QUERY_KERNEL = 0, + RSEQ_AVAILABLE_QUERY_LIBC = 1, +}; /* * User code can define RSEQ_GET_ABI_OVERRIDE to override the @@ -62,8 +51,6 @@ */ #ifndef RSEQ_GET_ABI_OVERRIDE -# include - # ifdef __cplusplus extern "C" { # endif @@ -72,7 +59,18 @@ extern "C" { extern ptrdiff_t rseq_offset; /* - * Size of the registered rseq area. 0 if the registration was + * The rseq ABI is composed of extensible feature fields. The extensions + * are done by appending additional fields at the end of the structure. + * The rseq_size defines the size of the active feature set which can be + * used by the application for the current rseq registration. Features + * starting at offset >= rseq_size are inactive and should not be used. + * + * The rseq_size is the intersection between the available allocation + * size for the rseq area and the feature size supported by the kernel. + */ + +/* + * Size of the active rseq feature set. 0 if the registration was * unsuccessful. */ extern unsigned int rseq_size; @@ -81,12 +79,10 @@ extern unsigned int rseq_size; extern unsigned int rseq_flags; /* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. + * Returns a pointer to the rseq area. */ -extern unsigned int rseq_feature_size; - -static inline struct rseq_abi *rseq_get_abi(void) +static inline __attribute__((always_inline)) +struct rseq_abi *rseq_get_abi(void) { return (struct rseq_abi *) ((uintptr_t) rseq_thread_pointer() + rseq_offset); } @@ -97,58 +93,12 @@ static inline struct rseq_abi *rseq_get_abi(void) #endif /* RSEQ_GET_ABI_OVERRIDE */ -enum rseq_mo { - RSEQ_MO_RELAXED = 0, - RSEQ_MO_CONSUME = 1, /* Unused */ - RSEQ_MO_ACQUIRE = 2, /* Unused */ - RSEQ_MO_RELEASE = 3, - RSEQ_MO_ACQ_REL = 4, /* Unused */ - RSEQ_MO_SEQ_CST = 5, /* Unused */ -}; -enum rseq_percpu_mode { - RSEQ_PERCPU_CPU_ID = 0, - RSEQ_PERCPU_MM_CID = 1, -}; +/* + * Architecture specific. + */ +#include -#define rseq_likely(x) __builtin_expect(!!(x), 1) -#define rseq_unlikely(x) __builtin_expect(!!(x), 0) -#define rseq_barrier() __asm__ __volatile__("" : : : "memory") - -#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x)) -#define RSEQ_WRITE_ONCE(x, v) __extension__ ({ RSEQ_ACCESS_ONCE(x) = (v); }) -#define RSEQ_READ_ONCE(x) RSEQ_ACCESS_ONCE(x) - -#define __rseq_str_1(x) #x -#define __rseq_str(x) __rseq_str_1(x) - -#define rseq_log(fmt, ...) \ - fprintf(stderr, fmt "(in %s() at " __FILE__ ":" __rseq_str(__LINE__)"\n", \ - ## __VA_ARGS__, __func__) - -#define rseq_bug(fmt, ...) \ - do { \ - rseq_log(fmt, ## __VA_ARGS__); \ - abort(); \ - } while (0) - -#if defined(__x86_64__) || defined(__i386__) -#include -#elif defined(__ARMEL__) || defined(__ARMEB__) -#include -#elif defined (__AARCH64EL__) -#include -#elif defined(__PPC__) -#include -#elif defined(__mips__) -#include -#elif defined(__s390__) -#include -#elif defined(__riscv) -#include -#else -#error unsupported target -#endif #ifdef __cplusplus extern "C" { @@ -178,21 +128,25 @@ int32_t rseq_fallback_current_cpu(void); */ int32_t rseq_fallback_current_node(void); -enum rseq_available_query { - RSEQ_AVAILABLE_QUERY_KERNEL = 0, - RSEQ_AVAILABLE_QUERY_LIBC = 1, -}; - /* * Returns true if rseq is supported. */ bool rseq_available(unsigned int query); +/* + * rseq_get_max_nr_cpus: Get the max_nr_cpus auto-detected value. + * + * Returns the max_nr_cpus auto-detected at pool creation when invoked + * with @nr_max_cpus=0 argument. + */ +int rseq_get_max_nr_cpus(void); + /* * Values returned can be either the current CPU number, -1 (rseq is * uninitialized), or -2 (rseq initialization has failed). */ -static inline int32_t rseq_current_cpu_raw(void) +static inline __attribute__((always_inline)) +int32_t rseq_current_cpu_raw(void) { return RSEQ_READ_ONCE(rseq_get_abi()->cpu_id); } @@ -208,12 +162,14 @@ static inline int32_t rseq_current_cpu_raw(void) * return value of rseq_current_cpu_raw() if the rseq asm sequence * does not need to be invoked. */ -static inline uint32_t rseq_cpu_start(void) +static inline __attribute__((always_inline)) +uint32_t rseq_cpu_start(void) { return RSEQ_READ_ONCE(rseq_get_abi()->cpu_id_start); } -static inline uint32_t rseq_current_cpu(void) +static inline __attribute__((always_inline)) +uint32_t rseq_current_cpu(void) { int32_t cpu; @@ -223,31 +179,36 @@ static inline uint32_t rseq_current_cpu(void) return cpu; } -static inline bool rseq_node_id_available(void) +static inline __attribute__((always_inline)) +bool rseq_node_id_available(void) { - return (int) rseq_feature_size >= (int) rseq_offsetofend(struct rseq_abi, node_id); + return (int) rseq_size >= (int) rseq_offsetofend(struct rseq_abi, node_id); } /* * Current NUMA node number. */ -static inline uint32_t rseq_current_node_id(void) +static inline __attribute__((always_inline)) +uint32_t rseq_current_node_id(void) { assert(rseq_node_id_available()); return RSEQ_READ_ONCE(rseq_get_abi()->node_id); } -static inline bool rseq_mm_cid_available(void) +static inline __attribute__((always_inline)) +bool rseq_mm_cid_available(void) { - return (int) rseq_feature_size >= (int) rseq_offsetofend(struct rseq_abi, mm_cid); + return (int) rseq_size >= (int) rseq_offsetofend(struct rseq_abi, mm_cid); } -static inline uint32_t rseq_current_mm_cid(void) +static inline __attribute__((always_inline)) +uint32_t rseq_current_mm_cid(void) { return RSEQ_READ_ONCE(rseq_get_abi()->mm_cid); } -static inline void rseq_clear_rseq_cs(void) +static inline __attribute__((always_inline)) +void rseq_clear_rseq_cs(void) { RSEQ_WRITE_ONCE(rseq_get_abi()->rseq_cs.arch.ptr, 0); } @@ -263,13 +224,20 @@ static inline void rseq_clear_rseq_cs(void) * holding the struct rseq_cs or reclaim of the code pointed to by struct * rseq_cs start_ip and post_commit_offset fields. */ -static inline void rseq_prepare_unload(void) +static inline __attribute__((always_inline)) +void rseq_prepare_unload(void) { rseq_clear_rseq_cs(); } +/* + * Refer to rseq/pseudocode.h for documentation and pseudo-code of the + * rseq critical section helpers. + */ +#include "rseq/pseudocode.h" + static inline __attribute__((always_inline)) -int rseq_cmpeqv_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, +int rseq_load_cbne_store__ptr(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, intptr_t *v, intptr_t expect, intptr_t newv, int cpu) { @@ -277,20 +245,16 @@ int rseq_cmpeqv_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, return -1; switch (percpu_mode) { case RSEQ_PERCPU_CPU_ID: - return rseq_cmpeqv_storev_relaxed_cpu_id(v, expect, newv, cpu); + return rseq_load_cbne_store__ptr_relaxed_cpu_id(v, expect, newv, cpu); case RSEQ_PERCPU_MM_CID: - return rseq_cmpeqv_storev_relaxed_mm_cid(v, expect, newv, cpu); + return rseq_load_cbne_store__ptr_relaxed_mm_cid(v, expect, newv, cpu); default: return -1; } } -/* - * Compare @v against @expectnot. When it does _not_ match, load @v - * into @load, and store the content of *@v + voffp into @v. - */ static inline __attribute__((always_inline)) -int rseq_cmpnev_storeoffp_load(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, +int rseq_load_cbeq_store_add_load_store__ptr(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, intptr_t *v, intptr_t expectnot, long voffp, intptr_t *load, int cpu) { @@ -298,46 +262,42 @@ int rseq_cmpnev_storeoffp_load(enum rseq_mo rseq_mo, enum rseq_percpu_mode percp return -1; switch (percpu_mode) { case RSEQ_PERCPU_CPU_ID: - return rseq_cmpnev_storeoffp_load_relaxed_cpu_id(v, expectnot, voffp, load, cpu); + return rseq_load_cbeq_store_add_load_store__ptr_relaxed_cpu_id(v, expectnot, voffp, load, cpu); case RSEQ_PERCPU_MM_CID: - return rseq_cmpnev_storeoffp_load_relaxed_mm_cid(v, expectnot, voffp, load, cpu); + return rseq_load_cbeq_store_add_load_store__ptr_relaxed_mm_cid(v, expectnot, voffp, load, cpu); default: return -1; } } static inline __attribute__((always_inline)) -int rseq_addv(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, +int rseq_load_add_store__ptr(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, intptr_t *v, intptr_t count, int cpu) { if (rseq_mo != RSEQ_MO_RELAXED) return -1; switch (percpu_mode) { case RSEQ_PERCPU_CPU_ID: - return rseq_addv_relaxed_cpu_id(v, count, cpu); + return rseq_load_add_store__ptr_relaxed_cpu_id(v, count, cpu); case RSEQ_PERCPU_MM_CID: - return rseq_addv_relaxed_mm_cid(v, count, cpu); + return rseq_load_add_store__ptr_relaxed_mm_cid(v, count, cpu); default: return -1; } } -#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV -/* - * pval = *(ptr+off) - * *pval += inc; - */ +#ifdef rseq_arch_has_load_add_load_load_add_store static inline __attribute__((always_inline)) -int rseq_offset_deref_addv(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, +int rseq_load_add_load_load_add_store__ptr(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, intptr_t *ptr, long off, intptr_t inc, int cpu) { if (rseq_mo != RSEQ_MO_RELAXED) return -1; switch (percpu_mode) { case RSEQ_PERCPU_CPU_ID: - return rseq_offset_deref_addv_relaxed_cpu_id(ptr, off, inc, cpu); + return rseq_load_add_load_load_add_store__ptr_relaxed_cpu_id(ptr, off, inc, cpu); case RSEQ_PERCPU_MM_CID: - return rseq_offset_deref_addv_relaxed_mm_cid(ptr, off, inc, cpu); + return rseq_load_add_load_load_add_store__ptr_relaxed_mm_cid(ptr, off, inc, cpu); default: return -1; } @@ -345,7 +305,7 @@ int rseq_offset_deref_addv(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mo #endif static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, +int rseq_load_cbne_store_store__ptr(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, intptr_t *v, intptr_t expect, intptr_t *v2, intptr_t newv2, intptr_t newv, int cpu) @@ -354,18 +314,18 @@ int rseq_cmpeqv_trystorev_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode per case RSEQ_MO_RELAXED: switch (percpu_mode) { case RSEQ_PERCPU_CPU_ID: - return rseq_cmpeqv_trystorev_storev_relaxed_cpu_id(v, expect, v2, newv2, newv, cpu); + return rseq_load_cbne_store_store__ptr_relaxed_cpu_id(v, expect, v2, newv2, newv, cpu); case RSEQ_PERCPU_MM_CID: - return rseq_cmpeqv_trystorev_storev_relaxed_mm_cid(v, expect, v2, newv2, newv, cpu); + return rseq_load_cbne_store_store__ptr_relaxed_mm_cid(v, expect, v2, newv2, newv, cpu); default: return -1; } case RSEQ_MO_RELEASE: switch (percpu_mode) { case RSEQ_PERCPU_CPU_ID: - return rseq_cmpeqv_trystorev_storev_release_cpu_id(v, expect, v2, newv2, newv, cpu); + return rseq_load_cbne_store_store__ptr_release_cpu_id(v, expect, v2, newv2, newv, cpu); case RSEQ_PERCPU_MM_CID: - return rseq_cmpeqv_trystorev_storev_release_mm_cid(v, expect, v2, newv2, newv, cpu); + return rseq_load_cbne_store_store__ptr_release_mm_cid(v, expect, v2, newv2, newv, cpu); default: return -1; } @@ -379,7 +339,7 @@ int rseq_cmpeqv_trystorev_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode per } static inline __attribute__((always_inline)) -int rseq_cmpeqv_cmpeqv_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, +int rseq_load_cbne_load_cbne_store__ptr(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, intptr_t *v, intptr_t expect, intptr_t *v2, intptr_t expect2, intptr_t newv, int cpu) @@ -388,16 +348,16 @@ int rseq_cmpeqv_cmpeqv_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu return -1; switch (percpu_mode) { case RSEQ_PERCPU_CPU_ID: - return rseq_cmpeqv_cmpeqv_storev_relaxed_cpu_id(v, expect, v2, expect2, newv, cpu); + return rseq_load_cbne_load_cbne_store__ptr_relaxed_cpu_id(v, expect, v2, expect2, newv, cpu); case RSEQ_PERCPU_MM_CID: - return rseq_cmpeqv_cmpeqv_storev_relaxed_mm_cid(v, expect, v2, expect2, newv, cpu); + return rseq_load_cbne_load_cbne_store__ptr_relaxed_mm_cid(v, expect, v2, expect2, newv, cpu); default: return -1; } } static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, +int rseq_load_cbne_memcpy_store__ptr(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, intptr_t *v, intptr_t expect, void *dst, void *src, size_t len, intptr_t newv, int cpu) @@ -406,18 +366,18 @@ int rseq_cmpeqv_trymemcpy_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode per case RSEQ_MO_RELAXED: switch (percpu_mode) { case RSEQ_PERCPU_CPU_ID: - return rseq_cmpeqv_trymemcpy_storev_relaxed_cpu_id(v, expect, dst, src, len, newv, cpu); + return rseq_load_cbne_memcpy_store__ptr_relaxed_cpu_id(v, expect, dst, src, len, newv, cpu); case RSEQ_PERCPU_MM_CID: - return rseq_cmpeqv_trymemcpy_storev_relaxed_mm_cid(v, expect, dst, src, len, newv, cpu); + return rseq_load_cbne_memcpy_store__ptr_relaxed_mm_cid(v, expect, dst, src, len, newv, cpu); default: return -1; } case RSEQ_MO_RELEASE: switch (percpu_mode) { case RSEQ_PERCPU_CPU_ID: - return rseq_cmpeqv_trymemcpy_storev_release_cpu_id(v, expect, dst, src, len, newv, cpu); + return rseq_load_cbne_memcpy_store__ptr_release_cpu_id(v, expect, dst, src, len, newv, cpu); case RSEQ_PERCPU_MM_CID: - return rseq_cmpeqv_trymemcpy_storev_release_mm_cid(v, expect, dst, src, len, newv, cpu); + return rseq_load_cbne_memcpy_store__ptr_release_mm_cid(v, expect, dst, src, len, newv, cpu); default: return -1; } @@ -434,4 +394,4 @@ int rseq_cmpeqv_trymemcpy_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode per } #endif -#endif /* RSEQ_H_ */ +#endif /* _RSEQ_RSEQ_H */ diff --git a/third_party/librseq/include/rseq/thread-pointer.h b/third_party/librseq/include/rseq/thread-pointer.h new file mode 100644 index 000000000000..264d4b8eea69 --- /dev/null +++ b/third_party/librseq/include/rseq/thread-pointer.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2021 Mathieu Desnoyers */ + +/* + * rseq/thread-pointer.h + */ + +#ifndef _RSEQ_THREAD_POINTER_H +#define _RSEQ_THREAD_POINTER_H + +#if (defined(__amd64__) \ + || defined(__amd64) \ + || defined(__x86_64__) \ + || defined(__x86_64) \ + || defined(__i386__) \ + || defined(__i386)) + +#include + +#elif (defined(__powerpc64__) \ + || defined(__ppc64__) \ + || defined(__powerpc__) \ + || defined(__powerpc) \ + || defined(__ppc__)) + +#include + +#elif defined(__riscv) + +#include + +#else + +#include + +#endif + +#endif /* _RSEQ_THREAD_POINTER_H */ diff --git a/third_party/librseq/include/rseq/utils.h b/third_party/librseq/include/rseq/utils.h new file mode 100644 index 000000000000..066a9da16385 --- /dev/null +++ b/third_party/librseq/include/rseq/utils.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: 2016-2022 Mathieu Desnoyers */ + +/* + * rseq/utils.h + */ + +#ifndef _RSEQ_UTILS_H +#define _RSEQ_UTILS_H + +#include +#include + +#ifndef rseq_sizeof_field +#define rseq_sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#endif + +#ifndef rseq_offsetofend +#define rseq_offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + rseq_sizeof_field(TYPE, MEMBER)) +#endif + +#define __rseq_str_1(x) #x +#define __rseq_str(x) __rseq_str_1(x) + +#define rseq_log(fmt, ...) \ + fprintf(stderr, fmt "(in %s() at " __FILE__ ":" __rseq_str(__LINE__)"\n", \ + ## __VA_ARGS__, __func__) + +#define rseq_bug(fmt, ...) \ + do { \ + rseq_log(fmt, ## __VA_ARGS__); \ + abort(); \ + } while (0) + +#endif /* _RSEQ_UTILS_H */ diff --git a/third_party/librseq/src/config.h b/third_party/librseq/src/config.h index 135bf90bbc15..b034347bc274 100644 --- a/third_party/librseq/src/config.h +++ b/third_party/librseq/src/config.h @@ -1,7 +1,7 @@ /* include/config.h. Generated from config.h.in by configure. */ /* include/config.h.in. Generated from configure.ac by autoheader. */ -/* Define to 1 if you have the `atexit' function. */ +/* Define to 1 if you have the 'atexit' function. */ #define HAVE_ATEXIT 1 /* define if the compiler supports basic C++11 syntax */ @@ -10,25 +10,28 @@ /* Define to 1 if you have the header file. */ #define HAVE_DLFCN_H 1 -/* Define to 1 if you have the `fork' function. */ +/* Define to 1 if you have the 'fork' function. */ #define HAVE_FORK 1 -/* Define to 1 if you have the `getpagesize' function. */ +/* Define to 1 if you have the 'getpagesize' function. */ #define HAVE_GETPAGESIZE 1 /* Define to 1 if you have the header file. */ #define HAVE_INTTYPES_H 1 +/* Define to 1 if libnuma is available. */ +/* #undef HAVE_LIBNUMA */ + /* Define to 1 if you have the header file. */ #define HAVE_LIMITS_H 1 -/* Define to 1 if you have the `memset' function. */ +/* Define to 1 if you have the 'memset' function. */ #define HAVE_MEMSET 1 /* Define to 1 if you have the header file. */ /* #undef HAVE_MINIX_CONFIG_H */ -/* Define to 1 if you have a working `mmap' system call. */ +/* Define to 1 if you have a working 'mmap' system call. */ #define HAVE_MMAP 1 /* Define if you have POSIX threads libraries and header files. */ @@ -52,7 +55,7 @@ /* Define to 1 if you have the header file. */ #define HAVE_STDLIB_H 1 -/* Define to 1 if you have the `strerror' function. */ +/* Define to 1 if you have the 'strerror' function. */ #define HAVE_STRERROR 1 /* Define to 1 if you have the header file. */ @@ -79,7 +82,7 @@ /* Define to 1 if you have the header file. */ #define HAVE_UNISTD_H 1 -/* Define to 1 if you have the `vfork' function. */ +/* Define to 1 if you have the 'vfork' function. */ #define HAVE_VFORK 1 /* Define to 1 if you have the header file. */ @@ -88,13 +91,13 @@ /* Define to 1 if you have the header file. */ #define HAVE_WCHAR_H 1 -/* Define to 1 if `fork' works. */ +/* Define to 1 if 'fork' works. */ #define HAVE_WORKING_FORK 1 -/* Define to 1 if `vfork' works. */ +/* Define to 1 if 'vfork' works. */ #define HAVE_WORKING_VFORK 1 -/* Define to 1 if the system has the type `_Bool'. */ +/* Define to 1 if the system has the type '_Bool'. */ #define HAVE__BOOL 1 /* define if your compiler has __attribute__ */ @@ -128,12 +131,12 @@ your system. */ /* #undef PTHREAD_CREATE_JOINABLE */ -/* Define to 1 if all of the C90 standard headers exist (not just the ones +/* Define to 1 if all of the C89 standard headers exist (not just the ones required in a freestanding environment). This macro is provided for backward compatibility; new code need not use it. */ #define STDC_HEADERS 1 -/* Enable extensions on AIX 3, Interix. */ +/* Enable extensions on AIX, Interix, z/OS. */ #ifndef _ALL_SOURCE # define _ALL_SOURCE 1 #endif @@ -194,11 +197,15 @@ #ifndef __STDC_WANT_IEC_60559_DFP_EXT__ # define __STDC_WANT_IEC_60559_DFP_EXT__ 1 #endif +/* Enable extensions specified by C23 Annex F. */ +#ifndef __STDC_WANT_IEC_60559_EXT__ +# define __STDC_WANT_IEC_60559_EXT__ 1 +#endif /* Enable extensions specified by ISO/IEC TS 18661-4:2015. */ #ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__ # define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1 #endif -/* Enable extensions specified by ISO/IEC TS 18661-3:2015. */ +/* Enable extensions specified by C23 Annex H and ISO/IEC TS 18661-3:2015. */ #ifndef __STDC_WANT_IEC_60559_TYPES_EXT__ # define __STDC_WANT_IEC_60559_TYPES_EXT__ 1 #endif @@ -227,9 +234,12 @@ /* Number of bits in a file offset, on hosts where this is settable. */ /* #undef _FILE_OFFSET_BITS */ -/* Define for large files, on AIX-style hosts. */ +/* Define to 1 on platforms where this makes off_t a 64-bit type. */ /* #undef _LARGE_FILES */ +/* Number of bits in time_t, on hosts where this is settable. */ +/* #undef _TIME_BITS */ + /* Define for Solaris 2.5.1 so the uint32_t typedef from , , or is not used. If the typedef were allowed, the #define below would cause a syntax error. */ @@ -240,7 +250,10 @@ #define below would cause a syntax error. */ /* #undef _UINT64_T */ -/* Define to `__inline__' or `__inline' if that's what the C compiler +/* Define to 1 on platforms where this makes time_t a 64-bit type. */ +/* #undef __MINGW_USE_VC2005_COMPAT */ + +/* Define to '__inline__' or '__inline' if that's what the C compiler calls it, or to nothing if 'inline' is not supported under any name. */ #ifndef __cplusplus /* #undef inline */ @@ -254,13 +267,13 @@ such a type exists and the standard includes do not define it. */ /* #undef int64_t */ -/* Define to `long int' if does not define. */ +/* Define to 'long int' if does not define. */ /* #undef off_t */ /* Define as a signed integer type capable of holding a process identifier. */ /* #undef pid_t */ -/* Define to `unsigned int' if does not define. */ +/* Define as 'unsigned int' if doesn't define. */ /* #undef size_t */ /* Define to __typeof__ if your compiler spells it that way. */ @@ -274,5 +287,5 @@ such a type exists and the standard includes do not define it. */ /* #undef uint64_t */ -/* Define as `fork' if `vfork' does not work. */ +/* Define as 'fork' if 'vfork' does not work. */ /* #undef vfork */ diff --git a/third_party/librseq/src/list.h b/third_party/librseq/src/list.h new file mode 100644 index 000000000000..39a456b2d4cf --- /dev/null +++ b/third_party/librseq/src/list.h @@ -0,0 +1,198 @@ +// SPDX-FileCopyrightText: 2002 Free Software Foundation, Inc. +// SPDX-FileCopyrightText: 2009 Pierre-Marc Fournier +// SPDX-FileCopyrightText: 2010 Mathieu Desnoyers +// +// SPDX-License-Identifier: LGPL-2.1-or-later + +/* + * (originally part of the GNU C Library) + * Contributed by Ulrich Drepper , 2002. + */ + +#ifndef _LIST_H +#define _LIST_H 1 + +/* + * container_of - Get the address of an object containing a field. + * + * @ptr: pointer to the field. + * @type: type of the object. + * @member: name of the field within the object. + */ +#define container_of(ptr, type, member) \ + __extension__ \ + ({ \ + const __typeof__(((type *) NULL)->member) * __ptr = (ptr); \ + (type *)((char *)__ptr - offsetof(type, member)); \ + }) + +/* + * The definitions of this file are adopted from those which can be + * found in the Linux kernel headers to enable people familiar with the + * latter find their way in these sources as well. + */ + +/* Basic type for the double-link list. */ +struct list_head { + struct list_head *next, *prev; +}; + +/* Define a variable with the head and tail of the list. */ +#define LIST_HEAD(name) \ + struct list_head name = { &(name), &(name) } + +/* Initialize a new list head. */ +#define INIT_LIST_HEAD(ptr) \ + (ptr)->next = (ptr)->prev = (ptr) + +#define LIST_HEAD_INIT(name) { .next = &(name), .prev = &(name) } + +/* Add new element at the head of the list. */ +static inline +void list_add(struct list_head *newp, struct list_head *head) +{ + head->next->prev = newp; + newp->next = head->next; + newp->prev = head; + head->next = newp; +} + +/* Add new element at the tail of the list. */ +static inline +void list_add_tail(struct list_head *newp, struct list_head *head) +{ + head->prev->next = newp; + newp->next = head; + newp->prev = head->prev; + head->prev = newp; +} + +/* Remove element from list. */ +static inline +void __list_del(struct list_head *prev, struct list_head *next) +{ + next->prev = prev; + prev->next = next; +} + +/* Remove element from list. */ +static inline +void list_del(struct list_head *elem) +{ + __list_del(elem->prev, elem->next); +} + +/* Remove element from list, initializing the element's list pointers. */ +static inline +void list_del_init(struct list_head *elem) +{ + list_del(elem); + INIT_LIST_HEAD(elem); +} + +/* Delete from list, add to another list as head. */ +static inline +void list_move(struct list_head *elem, struct list_head *head) +{ + __list_del(elem->prev, elem->next); + list_add(elem, head); +} + +/* Replace an old entry. */ +static inline +void list_replace(struct list_head *old, struct list_head *_new) +{ + _new->next = old->next; + _new->prev = old->prev; + _new->prev->next = _new; + _new->next->prev = _new; +} + +/* Join two lists. */ +static inline +void list_splice(struct list_head *add, struct list_head *head) +{ + /* Do nothing if the list which gets added is empty. */ + if (add != add->next) { + add->next->prev = head; + add->prev->next = head->next; + head->next->prev = add->prev; + head->next = add->next; + } +} + +/* Get typed element from list at a given position. */ +#define list_entry(ptr, type, member) container_of(ptr, type, member) + +/* Get first entry from a list. */ +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) + +/* Iterate forward over the elements of the list. */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; (pos) != (head); pos = (pos)->next) + +/* + * Iterate forward over the elements list. The list elements can be + * removed from the list while doing this. + */ +#define list_for_each_safe(pos, p, head) \ + for (pos = (head)->next, p = (pos)->next; \ + (pos) != (head); \ + pos = (p), p = (pos)->next) + +/* Iterate backward over the elements of the list. */ +#define list_for_each_prev(pos, head) \ + for (pos = (head)->prev; (pos) != (head); pos = (pos)->prev) + +/* + * Iterate backwards over the elements list. The list elements can be + * removed from the list while doing this. + */ +#define list_for_each_prev_safe(pos, p, head) \ + for (pos = (head)->prev, p = (pos)->prev; \ + (pos) != (head); \ + pos = (p), p = (pos)->prev) + +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, __typeof__(*(pos)), member); \ + &(pos)->member != (head); \ + pos = list_entry((pos)->member.next, __typeof__(*(pos)), member)) + +#define list_for_each_entry_reverse(pos, head, member) \ + for (pos = list_entry((head)->prev, __typeof__(*(pos)), member); \ + &(pos)->member != (head); \ + pos = list_entry((pos)->member.prev, __typeof__(*(pos)), member)) + +#define list_for_each_entry_safe(pos, p, head, member) \ + for (pos = list_entry((head)->next, __typeof__(*(pos)), member), \ + p = list_entry((pos)->member.next, __typeof__(*(pos)), member); \ + &(pos)->member != (head); \ + pos = (p), p = list_entry((pos)->member.next, __typeof__(*(pos)), member)) + +/* + * Same as list_for_each_entry_safe, but starts from "pos" which should + * point to an entry within the list. + */ +#define list_for_each_entry_safe_from(pos, p, head, member) \ + for (p = list_entry((pos)->member.next, __typeof__(*(pos)), member); \ + &(pos)->member != (head); \ + pos = (p), p = list_entry((pos)->member.next, __typeof__(*(pos)), member)) + +static inline +int list_empty(struct list_head *head) +{ + return head == head->next; +} + +static inline +void list_replace_init(struct list_head *old, struct list_head *_new) +{ + struct list_head *head = old->next; + + list_del(old); + list_add_tail(_new, head); + INIT_LIST_HEAD(old); +} + +#endif /* _LIST_H */ diff --git a/third_party/librseq/src/rseq-mempool.c b/third_party/librseq/src/rseq-mempool.c new file mode 100644 index 000000000000..061d38f25e10 --- /dev/null +++ b/third_party/librseq/src/rseq-mempool.c @@ -0,0 +1,1398 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers +// SPDX-FileCopyrightText: 2024 Olivier Dion + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_LIBNUMA +# include +# include +#endif + +#include "rseq-utils.h" +#include "list.h" +#include + +/* + * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator. + * + * The rseq per-CPU memory allocator allows the application the request + * memory pools of CPU-Local memory each of containing objects of a + * given size (rounded to next power of 2), reserving a given virtual + * address size per CPU, for a given maximum number of CPUs. + * + * The per-CPU memory allocator is analogous to TLS (Thread-Local + * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU + * memory allocator provides CPU-Local Storage. + */ + +#define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG + +#define POOL_HEADER_NR_PAGES 2 + +/* + * Smallest allocation should hold enough space for a free list pointer. + */ +#if RSEQ_BITS_PER_LONG == 64 +# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */ +#else +# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */ +#endif + +#define BIT_PER_ULONG (8 * sizeof(unsigned long)) + +#define MOVE_PAGES_BATCH_SIZE 4096 + +#define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range) + +#if RSEQ_BITS_PER_LONG == 64 +# define DEFAULT_COW_INIT_POISON_VALUE 0x5555555555555555ULL +#else +# define DEFAULT_COW_INIT_POISON_VALUE 0x55555555UL +#endif + +/* + * Define the default COW_ZERO poison value as zero to prevent useless + * COW page allocation when writing poison values when freeing items. + */ +#define DEFAULT_COW_ZERO_POISON_VALUE 0x0 + +struct free_list_node; + +struct free_list_node { + struct free_list_node *next; +}; + +enum mempool_type { + MEMPOOL_TYPE_PERCPU = 0, /* Default */ + MEMPOOL_TYPE_GLOBAL = 1, +}; + +struct rseq_mempool_attr { + bool init_set; + int (*init_func)(void *priv, void *addr, size_t len, int cpu); + void *init_priv; + + bool robust_set; + + enum mempool_type type; + size_t stride; + int max_nr_cpus; + + unsigned long max_nr_ranges; + + bool poison_set; + uintptr_t poison; + + enum rseq_mempool_populate_policy populate_policy; +}; + +struct rseq_mempool_range; + +struct rseq_mempool_range { + struct list_head node; /* Linked list of ranges. */ + struct rseq_mempool *pool; /* Backward reference to container pool. */ + + /* + * Memory layout of a mempool range: + * - Canary header page (for detection of destroy-after-fork of + * COW_INIT pool), + * - Header page (contains struct rseq_mempool_range at the + * very end), + * - Base of the per-cpu data, starting with CPU 0. + * Aliases with free-list for non-robust COW_ZERO pool. + * - CPU 1, + * ... + * - CPU max_nr_cpus - 1 + * - init values (only allocated for COW_INIT pool). + * Aliases with free-list for non-robust COW_INIT pool. + * - free list (for robust pool). + * + * The free list aliases the CPU 0 memory area for non-robust + * COW_ZERO pools. It aliases with init values for non-robust + * COW_INIT pools. It is located immediately after the init + * values for robust pools. + */ + void *header; + void *base; + /* + * The init values contains malloc_init/zmalloc values. + * Pointer is NULL for RSEQ_MEMPOOL_POPULATE_COW_ZERO. + */ + void *init; + size_t next_unused; + + /* Pool range mmap/munmap */ + void *mmap_addr; + size_t mmap_len; + + /* Track alloc/free. */ + unsigned long *alloc_bitmap; +}; + +struct rseq_mempool { + struct list_head range_list; /* Head of ranges linked-list. */ + unsigned long nr_ranges; + + size_t item_len; + int item_order; + + /* + * COW_INIT non-robust pools: + * The free list chains freed items on the init + * values address range. + * + * COW_ZERO non-robust pools: + * The free list chains freed items on the CPU 0 + * address range. We should rethink this + * decision if false sharing between malloc/free + * from other CPUs and data accesses from CPU 0 + * becomes an issue. + * + * Robust pools: The free list chains freed items in the + * address range dedicated for the free list. + * + * This is a NULL-terminated singly-linked list. + */ + struct free_list_node *free_list_head; + + /* This lock protects allocation/free within the pool. */ + pthread_mutex_t lock; + + struct rseq_mempool_attr attr; + char *name; +}; + +/* + * Pool set entries are indexed by item_len rounded to the next power of + * 2. A pool set can contain NULL pool entries, in which case the next + * large enough entry will be used for allocation. + */ +struct rseq_mempool_set { + /* This lock protects add vs malloc/zmalloc within the pool set. */ + pthread_mutex_t lock; + struct rseq_mempool *entries[POOL_SET_NR_ENTRIES]; +}; + +static +const char *get_pool_name(const struct rseq_mempool *pool) +{ + return pool->name ? : ""; +} + +static +void *__rseq_pool_range_percpu_ptr(const struct rseq_mempool_range *range, int cpu, + uintptr_t item_offset, size_t stride) +{ + return range->base + (stride * cpu) + item_offset; +} + +static +void *__rseq_pool_range_init_ptr(const struct rseq_mempool_range *range, + uintptr_t item_offset) +{ + if (!range->init) + return NULL; + return range->init + item_offset; +} + +static +void __rseq_percpu *__rseq_free_list_to_percpu_ptr(const struct rseq_mempool *pool, + struct free_list_node *node) +{ + void __rseq_percpu *p = (void __rseq_percpu *) node; + + if (pool->attr.robust_set) { + /* Skip cpus. */ + p -= pool->attr.max_nr_cpus * pool->attr.stride; + /* Skip init values */ + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + p -= pool->attr.stride; + + } else { + /* COW_INIT free list is in init values */ + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + p -= pool->attr.max_nr_cpus * pool->attr.stride; + } + return p; +} + +static +struct free_list_node *__rseq_percpu_to_free_list_ptr(const struct rseq_mempool *pool, + void __rseq_percpu *p) +{ + if (pool->attr.robust_set) { + /* Skip cpus. */ + p += pool->attr.max_nr_cpus * pool->attr.stride; + /* Skip init values */ + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + p += pool->attr.stride; + + } else { + /* COW_INIT free list is in init values */ + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + p += pool->attr.max_nr_cpus * pool->attr.stride; + } + return (struct free_list_node *) p; +} + +static +intptr_t rseq_cmp_item(void *p, size_t item_len, intptr_t cmp_value, intptr_t *unexpected_value) +{ + size_t offset; + intptr_t res = 0; + + for (offset = 0; offset < item_len; offset += sizeof(uintptr_t)) { + intptr_t v = *((intptr_t *) (p + offset)); + + if ((res = v - cmp_value) != 0) { + if (unexpected_value) + *unexpected_value = v; + break; + } + } + return res; +} + +static +void rseq_percpu_zero_item(struct rseq_mempool *pool, + struct rseq_mempool_range *range, uintptr_t item_offset) +{ + char *init_p = NULL; + int i; + + init_p = __rseq_pool_range_init_ptr(range, item_offset); + if (init_p) + bzero(init_p, pool->item_len); + for (i = 0; i < pool->attr.max_nr_cpus; i++) { + char *p = __rseq_pool_range_percpu_ptr(range, i, + item_offset, pool->attr.stride); + + /* + * If item is already zeroed, either because the + * init range update has propagated or because the + * content is already zeroed (e.g. zero page), don't + * write to the page. This eliminates useless COW over + * the zero page just for overwriting it with zeroes. + * + * This means zmalloc() in COW_ZERO policy pool do + * not trigger COW for CPUs which are not actively + * writing to the pool. This is however not the case for + * malloc_init() in populate-all pools if it populates + * non-zero content. + */ + if (!rseq_cmp_item(p, pool->item_len, 0, NULL)) + continue; + bzero(p, pool->item_len); + } +} + +static +void rseq_percpu_init_item(struct rseq_mempool *pool, + struct rseq_mempool_range *range, uintptr_t item_offset, + void *init_ptr, size_t init_len) +{ + char *init_p = NULL; + int i; + + init_p = __rseq_pool_range_init_ptr(range, item_offset); + if (init_p) + memcpy(init_p, init_ptr, init_len); + for (i = 0; i < pool->attr.max_nr_cpus; i++) { + char *p = __rseq_pool_range_percpu_ptr(range, i, + item_offset, pool->attr.stride); + + /* + * If the update propagated through a shared mapping, + * or the item already has the correct content, skip + * writing it into the cpu item to eliminate useless + * COW of the page. + */ + if (!memcmp(init_ptr, p, init_len)) + continue; + memcpy(p, init_ptr, init_len); + } +} + +static +void rseq_poison_item(void *p, size_t item_len, uintptr_t poison) +{ + size_t offset; + + for (offset = 0; offset < item_len; offset += sizeof(uintptr_t)) + *((uintptr_t *) (p + offset)) = poison; +} + +static +void rseq_percpu_poison_item(struct rseq_mempool *pool, + struct rseq_mempool_range *range, uintptr_t item_offset) +{ + uintptr_t poison = pool->attr.poison; + char *init_p = NULL; + int i; + + init_p = __rseq_pool_range_init_ptr(range, item_offset); + if (init_p) + rseq_poison_item(init_p, pool->item_len, poison); + for (i = 0; i < pool->attr.max_nr_cpus; i++) { + char *p = __rseq_pool_range_percpu_ptr(range, i, + item_offset, pool->attr.stride); + + /* + * If the update propagated through a shared mapping, + * or the item already has the correct content, skip + * writing it into the cpu item to eliminate useless + * COW of the page. + * + * It is recommended to use zero as poison value for + * COW_ZERO pools to eliminate COW due to writing + * poison to CPU memory still backed by the zero page. + */ + if (rseq_cmp_item(p, pool->item_len, poison, NULL) == 0) + continue; + rseq_poison_item(p, pool->item_len, poison); + } +} + +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +void rseq_check_poison_item(const struct rseq_mempool *pool, uintptr_t item_offset, + void *p, size_t item_len, uintptr_t poison) +{ + intptr_t unexpected_value; + + if (rseq_cmp_item(p, item_len, poison, &unexpected_value) == 0) + return; + + fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n", + __func__, (unsigned long) unexpected_value, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0)); + abort(); +} + +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +void rseq_percpu_check_poison_item(const struct rseq_mempool *pool, + const struct rseq_mempool_range *range, uintptr_t item_offset) +{ + uintptr_t poison = pool->attr.poison; + char *init_p; + int i; + + if (!pool->attr.robust_set) + return; + init_p = __rseq_pool_range_init_ptr(range, item_offset); + if (init_p) + rseq_check_poison_item(pool, item_offset, init_p, pool->item_len, poison); + for (i = 0; i < pool->attr.max_nr_cpus; i++) { + char *p = __rseq_pool_range_percpu_ptr(range, i, + item_offset, pool->attr.stride); + rseq_check_poison_item(pool, item_offset, p, pool->item_len, poison); + } +} + +#ifdef HAVE_LIBNUMA +int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags) +{ + unsigned long nr_pages, page_len; + int status[MOVE_PAGES_BATCH_SIZE]; + int nodes[MOVE_PAGES_BATCH_SIZE]; + void *pages[MOVE_PAGES_BATCH_SIZE]; + long ret; + + if (!numa_flags) { + errno = EINVAL; + return -1; + } + page_len = rseq_get_page_len(); + nr_pages = len >> rseq_get_count_order_ulong(page_len); + + nodes[0] = numa_node_of_cpu(cpu); + if (nodes[0] < 0) + return -1; + + for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) { + nodes[k] = nodes[0]; + } + + for (unsigned long page = 0; page < nr_pages;) { + + size_t max_k = RSEQ_ARRAY_SIZE(pages); + size_t left = nr_pages - page; + + if (left < max_k) { + max_k = left; + } + + for (size_t k = 0; k < max_k; ++k, ++page) { + pages[k] = addr + (page * page_len); + status[k] = -EPERM; + } + + ret = move_pages(0, max_k, pages, nodes, status, numa_flags); + + if (ret < 0) + return ret; + + if (ret > 0) { + fprintf(stderr, "%lu pages were not migrated\n", ret); + for (size_t k = 0; k < max_k; ++k) { + if (status[k] < 0) + fprintf(stderr, + "Error while moving page %p to numa node %d: %u\n", + pages[k], nodes[k], -status[k]); + } + } + } + return 0; +} +#else +int rseq_mempool_range_init_numa(void *addr __attribute__((unused)), + size_t len __attribute__((unused)), + int cpu __attribute__((unused)), + int numa_flags __attribute__((unused))) +{ + errno = ENOSYS; + return -1; +} +#endif + +static +int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range) +{ + size_t count; + + count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG; + + /* + * Not being able to create the validation bitmap is an error + * that needs to be reported. + */ + range->alloc_bitmap = calloc(count, sizeof(unsigned long)); + if (!range->alloc_bitmap) + return -1; + return 0; +} + +static +bool percpu_addr_in_pool(const struct rseq_mempool *pool, void __rseq_percpu *_addr) +{ + struct rseq_mempool_range *range; + void *addr = (void *) _addr; + + list_for_each_entry(range, &pool->range_list, node) { + if (addr >= range->base && addr < range->base + range->next_unused) + return true; + } + return false; +} + +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +void check_free_list(const struct rseq_mempool *pool, bool mapping_accessible) +{ + size_t total_item = 0, total_never_allocated = 0, total_freed = 0, + max_list_traversal = 0, traversal_iteration = 0; + struct rseq_mempool_range *range; + + if (!pool->attr.robust_set || !mapping_accessible) + return; + + list_for_each_entry(range, &pool->range_list, node) { + total_item += pool->attr.stride >> pool->item_order; + total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order; + } + max_list_traversal = total_item - total_never_allocated; + + for (struct free_list_node *node = pool->free_list_head, *prev = NULL; + node; + prev = node, + node = node->next) { + + if (traversal_iteration >= max_list_traversal) { + fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n", + __func__, get_pool_name(pool), pool, __builtin_return_address(0)); + abort(); + } + + /* Node is out of range. */ + if (!percpu_addr_in_pool(pool, __rseq_free_list_to_percpu_ptr(pool, node))) { + if (prev) + fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n", + __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0)); + else + fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n", + __func__, node, get_pool_name(pool), pool, __builtin_return_address(0)); + abort(); + } + + traversal_iteration++; + total_freed++; + } + + if (total_never_allocated + total_freed != total_item) { + fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n", + __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0)); + abort(); + } +} + +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +void check_range_poison(const struct rseq_mempool *pool, + const struct rseq_mempool_range *range) +{ + size_t item_offset; + + for (item_offset = 0; item_offset < range->next_unused; + item_offset += pool->item_len) + rseq_percpu_check_poison_item(pool, range, item_offset); +} + +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +void check_pool_poison(const struct rseq_mempool *pool, bool mapping_accessible) +{ + struct rseq_mempool_range *range; + + if (!pool->attr.robust_set || !mapping_accessible) + return; + list_for_each_entry(range, &pool->range_list, node) + check_range_poison(pool, range); +} + +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range) +{ + unsigned long *bitmap = range->alloc_bitmap; + size_t count, total_leaks = 0; + + if (!bitmap) + return; + + count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG; + + /* Assert that all items in the pool were freed. */ + for (size_t k = 0; k < count; ++k) + total_leaks += rseq_hweight_ulong(bitmap[k]); + if (total_leaks) { + fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n", + __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0)); + abort(); + } + + free(bitmap); + range->alloc_bitmap = NULL; +} + +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +int rseq_mempool_range_destroy(struct rseq_mempool *pool, + struct rseq_mempool_range *range, + bool mapping_accessible) +{ + destroy_alloc_bitmap(pool, range); + if (!mapping_accessible) { + /* + * Only the header pages are populated in the child + * process. + */ + return munmap(range->header, POOL_HEADER_NR_PAGES * rseq_get_page_len()); + } + return munmap(range->mmap_addr, range->mmap_len); +} + +/* + * Allocate a memory mapping aligned on @alignment, with an optional + * @pre_header before the mapping. + */ +static +void *aligned_mmap_anonymous(size_t page_size, size_t len, size_t alignment, + void **pre_header, size_t pre_header_len) +{ + size_t minimum_page_count, page_count, extra, total_allocate = 0; + int page_order; + void *ptr; + + if (len < page_size || alignment < page_size || + !is_pow2(alignment) || (len & (alignment - 1))) { + errno = EINVAL; + return NULL; + } + page_order = rseq_get_count_order_ulong(page_size); + if (page_order < 0) { + errno = EINVAL; + return NULL; + } + if (pre_header_len && (pre_header_len & (page_size - 1))) { + errno = EINVAL; + return NULL; + } + + minimum_page_count = (pre_header_len + len) >> page_order; + page_count = (pre_header_len + len + alignment - page_size) >> page_order; + + assert(page_count >= minimum_page_count); + + ptr = mmap(NULL, page_count << page_order, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (ptr == MAP_FAILED) { + ptr = NULL; + goto alloc_error; + } + + total_allocate = page_count << page_order; + + if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) { + /* Pointer is already aligned. ptr points to pre_header. */ + goto out; + } + + /* Unmap extra before. */ + extra = offset_align((uintptr_t) ptr + pre_header_len, alignment); + assert(!(extra & (page_size - 1))); + if (munmap(ptr, extra)) { + perror("munmap"); + abort(); + } + total_allocate -= extra; + ptr += extra; /* ptr points to pre_header */ + page_count -= extra >> page_order; +out: + assert(page_count >= minimum_page_count); + + if (page_count > minimum_page_count) { + void *extra_ptr; + + /* Unmap extra after. */ + extra_ptr = ptr + (minimum_page_count << page_order); + extra = (page_count - minimum_page_count) << page_order; + if (munmap(extra_ptr, extra)) { + perror("munmap"); + abort(); + } + total_allocate -= extra; + } + + assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1))); + assert(total_allocate == len + pre_header_len); + +alloc_error: + if (ptr) { + if (pre_header) + *pre_header = ptr; + ptr += pre_header_len; + } + return ptr; +} + +static +int rseq_memfd_create_init(const char *poolname, size_t init_len) +{ + int fd; + char buf[249]; /* Limit is 249 bytes. */ + const char *name; + + if (poolname) { + snprintf(buf, sizeof(buf), "%s:rseq-mempool", poolname); + name = buf; + } else { + name = ":rseq-mempool"; + } + + fd = memfd_create(name, MFD_CLOEXEC); + if (fd < 0) { + perror("memfd_create"); + goto end; + } + if (ftruncate(fd, (off_t) init_len)) { + if (close(fd)) + perror("close"); + fd = -1; + goto end; + } +end: + return fd; +} + +static +void rseq_memfd_close(int fd) +{ + if (fd < 0) + return; + if (close(fd)) + perror("close"); +} + +static +struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool) +{ + struct rseq_mempool_range *range; + unsigned long page_size; + void *header; + void *base; + size_t range_len; /* Range len excludes header. */ + size_t header_len; + int memfd = -1; + + if (pool->attr.max_nr_ranges && + pool->nr_ranges >= pool->attr.max_nr_ranges) { + errno = ENOMEM; + return NULL; + } + page_size = rseq_get_page_len(); + + header_len = POOL_HEADER_NR_PAGES * page_size; + range_len = pool->attr.stride * pool->attr.max_nr_cpus; + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + range_len += pool->attr.stride; /* init values */ + if (pool->attr.robust_set) + range_len += pool->attr.stride; /* dedicated free list */ + base = aligned_mmap_anonymous(page_size, range_len, + pool->attr.stride, &header, header_len); + if (!base) + return NULL; + range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET); + range->pool = pool; + range->header = header; + range->base = base; + range->mmap_addr = header; + range->mmap_len = header_len + range_len; + + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) { + range->init = base + (pool->attr.stride * pool->attr.max_nr_cpus); + /* Populate init values pages from memfd */ + memfd = rseq_memfd_create_init(pool->name, pool->attr.stride); + if (memfd < 0) + goto error_alloc; + if (mmap(range->init, pool->attr.stride, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, memfd, 0) != (void *) range->init) + goto error_alloc; + assert(pool->attr.type == MEMPOOL_TYPE_PERCPU); + /* + * Map per-cpu memory as private COW mappings of init values. + */ + { + int cpu; + + for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) { + void *p = base + (pool->attr.stride * cpu); + size_t len = pool->attr.stride; + + if (mmap(p, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, + memfd, 0) != (void *) p) + goto error_alloc; + } + } + /* + * The init values shared mapping should not be shared + * with the children processes across fork. Prevent the + * whole mapping from being used across fork. + */ + if (madvise(base, range_len, MADV_DONTFORK)) + goto error_alloc; + + /* + * Write 0x1 in first byte of header first page, which + * will be WIPEONFORK (and thus cleared) in children + * processes. Used to find out if pool destroy is called + * from a child process after fork. + */ + *((char *) header) = 0x1; + if (madvise(header, page_size, MADV_WIPEONFORK)) + goto error_alloc; + + /* + * The second header page contains the struct + * rseq_mempool_range, which is needed by pool destroy. + * Leave this anonymous page populated (COW) in child + * processes. + */ + rseq_memfd_close(memfd); + memfd = -1; + } + + if (pool->attr.robust_set) { + if (create_alloc_bitmap(pool, range)) + goto error_alloc; + } + if (pool->attr.init_set) { + switch (pool->attr.type) { + case MEMPOOL_TYPE_GLOBAL: + if (pool->attr.init_func(pool->attr.init_priv, + base, pool->attr.stride, -1)) { + goto error_alloc; + } + break; + case MEMPOOL_TYPE_PERCPU: + { + int cpu; + for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) { + if (pool->attr.init_func(pool->attr.init_priv, + base + (pool->attr.stride * cpu), + pool->attr.stride, cpu)) { + goto error_alloc; + } + } + break; + } + default: + abort(); + } + } + pool->nr_ranges++; + return range; + +error_alloc: + rseq_memfd_close(memfd); + (void) rseq_mempool_range_destroy(pool, range, true); + return NULL; +} + +static +bool pool_mappings_accessible(struct rseq_mempool *pool) +{ + struct rseq_mempool_range *range; + size_t page_size; + char *addr; + + if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_COW_INIT) + return true; + if (list_empty(&pool->range_list)) + return true; + range = list_first_entry(&pool->range_list, struct rseq_mempool_range, node); + page_size = rseq_get_page_len(); + /* + * Header first page is one page before the page containing the + * range structure. + */ + addr = (char *) ((uintptr_t) range & ~(page_size - 1)) - page_size; + /* + * Look for 0x1 first byte marker in header first page. + */ + if (*addr != 0x1) + return false; + return true; +} + +int rseq_mempool_destroy(struct rseq_mempool *pool) +{ + struct rseq_mempool_range *range, *tmp_range; + bool mapping_accessible; + int ret = 0; + + if (!pool) + return 0; + + /* + * Validate that the pool mappings are accessible before doing + * free list/poison validation and unmapping ranges. This allows + * calling pool destroy in child process after a fork for COW_INIT + * pools to free pool resources. + */ + mapping_accessible = pool_mappings_accessible(pool); + + check_free_list(pool, mapping_accessible); + check_pool_poison(pool, mapping_accessible); + + /* Iteration safe against removal. */ + list_for_each_entry_safe(range, tmp_range, &pool->range_list, node) { + list_del(&range->node); + if (rseq_mempool_range_destroy(pool, range, mapping_accessible)) { + /* Keep list coherent in case of partial failure. */ + list_add(&range->node, &pool->range_list); + goto end; + } + } + pthread_mutex_destroy(&pool->lock); + free(pool->name); + free(pool); +end: + return ret; +} + +struct rseq_mempool *rseq_mempool_create(const char *pool_name, + size_t item_len, const struct rseq_mempool_attr *_attr) +{ + struct rseq_mempool_attr attr = {}; + struct rseq_mempool_range *range; + struct rseq_mempool *pool; + int order; + + /* Make sure each item is large enough to contain free list pointers. */ + if (item_len < sizeof(void *)) + item_len = sizeof(void *); + + /* Align item_len on next power of two. */ + order = rseq_get_count_order_ulong(item_len); + if (order < 0) { + errno = EINVAL; + return NULL; + } + item_len = 1UL << order; + + if (_attr) + memcpy(&attr, _attr, sizeof(attr)); + + /* + * Validate that the pool populate policy requested is known. + */ + switch (attr.populate_policy) { + case RSEQ_MEMPOOL_POPULATE_COW_INIT: + break; + case RSEQ_MEMPOOL_POPULATE_COW_ZERO: + break; + default: + errno = EINVAL; + return NULL; + } + + switch (attr.type) { + case MEMPOOL_TYPE_PERCPU: + if (attr.max_nr_cpus < 0) { + errno = EINVAL; + return NULL; + } + if (attr.max_nr_cpus == 0) { + /* Auto-detect */ + attr.max_nr_cpus = rseq_get_max_nr_cpus(); + if (attr.max_nr_cpus == 0) { + errno = EINVAL; + return NULL; + } + } + break; + case MEMPOOL_TYPE_GLOBAL: + /* Override populate policy for global type. */ + if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + attr.populate_policy = RSEQ_MEMPOOL_POPULATE_COW_ZERO; + /* Use a 1-cpu pool for global mempool type. */ + attr.max_nr_cpus = 1; + break; + } + if (!attr.stride) + attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */ + if (attr.robust_set && !attr.poison_set) { + attr.poison_set = true; + if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + attr.poison = DEFAULT_COW_INIT_POISON_VALUE; + else + attr.poison = DEFAULT_COW_ZERO_POISON_VALUE; + } + if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() || + !is_pow2(attr.stride)) { + errno = EINVAL; + return NULL; + } + + pool = calloc(1, sizeof(struct rseq_mempool)); + if (!pool) + return NULL; + + memcpy(&pool->attr, &attr, sizeof(attr)); + pthread_mutex_init(&pool->lock, NULL); + pool->item_len = item_len; + pool->item_order = order; + INIT_LIST_HEAD(&pool->range_list); + + range = rseq_mempool_range_create(pool); + if (!range) + goto error_alloc; + list_add(&range->node, &pool->range_list); + + if (pool_name) { + pool->name = strdup(pool_name); + if (!pool->name) + goto error_alloc; + } + return pool; + +error_alloc: + rseq_mempool_destroy(pool); + errno = ENOMEM; + return NULL; +} + +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset) +{ + unsigned long *bitmap = range->alloc_bitmap; + size_t item_index = item_offset >> pool->item_order; + unsigned long mask; + size_t k; + + if (!bitmap) + return; + + k = item_index / BIT_PER_ULONG; + mask = 1ULL << (item_index % BIT_PER_ULONG); + + /* Print error if bit is already set. */ + if (bitmap[k] & mask) { + fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n", + __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0)); + abort(); + } + bitmap[k] |= mask; +} + +static +void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool, + bool zeroed, void *init_ptr, size_t init_len) +{ + struct rseq_mempool_range *range; + struct free_list_node *node; + uintptr_t item_offset; + void __rseq_percpu *addr; + + if (init_len > pool->item_len) { + errno = EINVAL; + return NULL; + } + pthread_mutex_lock(&pool->lock); + /* Get first entry from free list. */ + node = pool->free_list_head; + if (node != NULL) { + void *range_base, *ptr; + + ptr = __rseq_free_list_to_percpu_ptr(pool, node); + range_base = (void *) ((uintptr_t) ptr & (~(pool->attr.stride - 1))); + range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET); + /* Remove node from free list (update head). */ + pool->free_list_head = node->next; + item_offset = (uintptr_t) (ptr - range_base); + rseq_percpu_check_poison_item(pool, range, item_offset); + addr = __rseq_free_list_to_percpu_ptr(pool, node); + goto end; + } + /* + * If there are no ranges, or if the most recent range (first in + * list) does not have any room left, create a new range and + * prepend it to the list head. + */ + if (list_empty(&pool->range_list)) + goto create_range; + range = list_first_entry(&pool->range_list, struct rseq_mempool_range, node); + if (range->next_unused + pool->item_len > pool->attr.stride) + goto create_range; + else + goto room_left; +create_range: + range = rseq_mempool_range_create(pool); + if (!range) { + errno = ENOMEM; + addr = NULL; + goto end; + } + /* Add range to head of list. */ + list_add(&range->node, &pool->range_list); +room_left: + /* First range in list has room left. */ + item_offset = range->next_unused; + addr = (void __rseq_percpu *) (range->base + item_offset); + range->next_unused += pool->item_len; +end: + if (addr) + set_alloc_slot(pool, range, item_offset); + pthread_mutex_unlock(&pool->lock); + if (addr) { + if (zeroed) + rseq_percpu_zero_item(pool, range, item_offset); + else if (init_ptr) { + rseq_percpu_init_item(pool, range, item_offset, + init_ptr, init_len); + } + } + return addr; +} + +void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool) +{ + return __rseq_percpu_malloc(pool, false, NULL, 0); +} + +void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool) +{ + return __rseq_percpu_malloc(pool, true, NULL, 0); +} + +void __rseq_percpu *rseq_mempool_percpu_malloc_init(struct rseq_mempool *pool, + void *init_ptr, size_t len) +{ + return __rseq_percpu_malloc(pool, false, init_ptr, len); +} + +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset) +{ + unsigned long *bitmap = range->alloc_bitmap; + size_t item_index = item_offset >> pool->item_order; + unsigned long mask; + size_t k; + + if (!bitmap) + return; + + k = item_index / BIT_PER_ULONG; + mask = 1ULL << (item_index % BIT_PER_ULONG); + + /* Print error if bit is not set. */ + if (!(bitmap[k] & mask)) { + fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n", + __func__, get_pool_name(pool), pool, item_offset, + (void *) __builtin_return_address(0)); + abort(); + } + bitmap[k] &= ~mask; +} + +void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride) +{ + uintptr_t ptr = (uintptr_t) _ptr; + void *range_base = (void *) (ptr & (~(stride - 1))); + struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET); + struct rseq_mempool *pool = range->pool; + uintptr_t item_offset = ptr & (stride - 1); + struct free_list_node *head, *item; + + pthread_mutex_lock(&pool->lock); + clear_alloc_slot(pool, range, item_offset); + /* Add ptr to head of free list */ + head = pool->free_list_head; + if (pool->attr.poison_set) + rseq_percpu_poison_item(pool, range, item_offset); + item = __rseq_percpu_to_free_list_ptr(pool, _ptr); + /* + * Setting the next pointer will overwrite the first uintptr_t + * poison for either CPU 0 (COW_ZERO, non-robust), or init data + * (COW_INIT, non-robust). + */ + item->next = head; + pool->free_list_head = item; + pthread_mutex_unlock(&pool->lock); +} + +struct rseq_mempool_set *rseq_mempool_set_create(void) +{ + struct rseq_mempool_set *pool_set; + + pool_set = calloc(1, sizeof(struct rseq_mempool_set)); + if (!pool_set) + return NULL; + pthread_mutex_init(&pool_set->lock, NULL); + return pool_set; +} + +int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set) +{ + int order, ret; + + for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) { + struct rseq_mempool *pool = pool_set->entries[order]; + + if (!pool) + continue; + ret = rseq_mempool_destroy(pool); + if (ret) + return ret; + pool_set->entries[order] = NULL; + } + pthread_mutex_destroy(&pool_set->lock); + free(pool_set); + return 0; +} + +/* Ownership of pool is handed over to pool set on success. */ +int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool) +{ + size_t item_order = pool->item_order; + int ret = 0; + + pthread_mutex_lock(&pool_set->lock); + if (pool_set->entries[item_order]) { + errno = EBUSY; + ret = -1; + goto end; + } + pool_set->entries[pool->item_order] = pool; +end: + pthread_mutex_unlock(&pool_set->lock); + return ret; +} + +static +void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, + void *init_ptr, size_t len, bool zeroed) +{ + int order, min_order = POOL_SET_MIN_ENTRY; + struct rseq_mempool *pool; + void __rseq_percpu *addr; + + order = rseq_get_count_order_ulong(len); + if (order > POOL_SET_MIN_ENTRY) + min_order = order; +again: + pthread_mutex_lock(&pool_set->lock); + /* First smallest present pool where @len fits. */ + for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) { + pool = pool_set->entries[order]; + + if (!pool) + continue; + if (pool->item_len >= len) + goto found; + } + pool = NULL; +found: + pthread_mutex_unlock(&pool_set->lock); + if (pool) { + addr = __rseq_percpu_malloc(pool, zeroed, init_ptr, len); + if (addr == NULL && errno == ENOMEM) { + /* + * If the allocation failed, try again with a + * larger pool. + */ + min_order = order + 1; + goto again; + } + } else { + /* Not found. */ + errno = ENOMEM; + addr = NULL; + } + return addr; +} + +void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len) +{ + return __rseq_mempool_set_malloc(pool_set, NULL, len, false); +} + +void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len) +{ + return __rseq_mempool_set_malloc(pool_set, NULL, len, true); +} + +void __rseq_percpu *rseq_mempool_set_percpu_malloc_init(struct rseq_mempool_set *pool_set, + void *init_ptr, size_t len) +{ + return __rseq_mempool_set_malloc(pool_set, init_ptr, len, true); +} + +struct rseq_mempool_attr *rseq_mempool_attr_create(void) +{ + return calloc(1, sizeof(struct rseq_mempool_attr)); +} + +void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr) +{ + free(attr); +} + +int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr, + int (*init_func)(void *priv, void *addr, size_t len, int cpu), + void *init_priv) +{ + if (!attr) { + errno = EINVAL; + return -1; + } + attr->init_set = true; + attr->init_func = init_func; + attr->init_priv = init_priv; + attr->populate_policy = RSEQ_MEMPOOL_POPULATE_COW_INIT; + return 0; +} + +int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr) +{ + if (!attr) { + errno = EINVAL; + return -1; + } + attr->robust_set = true; + return 0; +} + +int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr, + size_t stride, int max_nr_cpus) +{ + if (!attr) { + errno = EINVAL; + return -1; + } + attr->type = MEMPOOL_TYPE_PERCPU; + attr->stride = stride; + attr->max_nr_cpus = max_nr_cpus; + return 0; +} + +int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr, + size_t stride) +{ + if (!attr) { + errno = EINVAL; + return -1; + } + attr->type = MEMPOOL_TYPE_GLOBAL; + attr->stride = stride; + attr->max_nr_cpus = 0; + return 0; +} + +int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr, + unsigned long max_nr_ranges) +{ + if (!attr) { + errno = EINVAL; + return -1; + } + attr->max_nr_ranges = max_nr_ranges; + return 0; +} + +int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr, + uintptr_t poison) +{ + if (!attr) { + errno = EINVAL; + return -1; + } + attr->poison_set = true; + attr->poison = poison; + return 0; +} + +int rseq_mempool_attr_set_populate_policy(struct rseq_mempool_attr *attr, + enum rseq_mempool_populate_policy policy) +{ + if (!attr) { + errno = EINVAL; + return -1; + } + attr->populate_policy = policy; + return 0; +} + +int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool) +{ + if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) { + errno = EINVAL; + return -1; + } + return mempool->attr.max_nr_cpus; +} diff --git a/third_party/librseq/src/rseq-utils.h b/third_party/librseq/src/rseq-utils.h new file mode 100644 index 000000000000..4c1ea33f66f4 --- /dev/null +++ b/third_party/librseq/src/rseq-utils.h @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers + +#ifndef _RSEQ_COMMON_UTILS_H +#define _RSEQ_COMMON_UTILS_H + +#include +#include + +#define RSEQ_ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define __rseq_align_mask(v, mask) (((v) + (mask)) & ~(mask)) +#define rseq_align(v, align) __rseq_align_mask(v, (__typeof__(v)) (align) - 1) + +static inline +unsigned int rseq_fls_u64(uint64_t x) +{ + unsigned int r = 64; + + if (!x) + return 0; + + if (!(x & 0xFFFFFFFF00000000ULL)) { + x <<= 32; + r -= 32; + } + if (!(x & 0xFFFF000000000000ULL)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xFF00000000000000ULL)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xF000000000000000ULL)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xC000000000000000ULL)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x8000000000000000ULL)) { + x <<= 1; + r -= 1; + } + return r; +} + +static inline +unsigned int rseq_fls_u32(uint32_t x) +{ + unsigned int r = 32; + + if (!x) + return 0; + if (!(x & 0xFFFF0000U)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xFF000000U)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xF0000000U)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xC0000000U)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000U)) { + x <<= 1; + r -= 1; + } + return r; +} + +static inline +unsigned int rseq_fls_ulong(unsigned long x) +{ +#if RSEQ_BITS_PER_LONG == 32 + return rseq_fls_u32(x); +#else + return rseq_fls_u64(x); +#endif +} + +/* + * Return the minimum order for which x <= (1UL << order). + * Return -1 if x is 0. + */ +static inline +int rseq_get_count_order_ulong(unsigned long x) +{ + if (!x) + return -1; + + return rseq_fls_ulong(x - 1); +} + +#define RSEQ_DEFAULT_PAGE_SIZE 4096 + +static inline +unsigned long rseq_get_page_len(void) +{ + long page_len = sysconf(_SC_PAGE_SIZE); + + if (page_len < 0) + page_len = RSEQ_DEFAULT_PAGE_SIZE; + return (unsigned long) page_len; +} + +static inline +int rseq_hweight_ulong(unsigned long v) +{ + return __builtin_popcountl(v); +} + +static inline +bool is_pow2(uint64_t x) +{ + return !(x & (x - 1)); +} + +/* + * Calculate offset needed to align p on alignment towards higher + * addresses. Alignment must be a power of 2 + */ +static inline +off_t offset_align(uintptr_t p, size_t alignment) +{ + return (alignment - p) & (alignment - 1); +} + +#endif /* _RSEQ_COMMON_UTILS_H */ diff --git a/third_party/librseq/src/rseq.c b/third_party/librseq/src/rseq.c index 090c00f48ea4..c6848770c93d 100644 --- a/third_party/librseq/src/rseq.c +++ b/third_party/librseq/src/rseq.c @@ -1,4 +1,3 @@ -#include "config.h" // SPDX-License-Identifier: MIT // SPDX-FileCopyrightText: 2016 Mathieu Desnoyers @@ -22,6 +21,7 @@ #include #include +#include "smp.h" #ifndef AT_RSEQ_FEATURE_SIZE # define AT_RSEQ_FEATURE_SIZE 27 @@ -45,7 +45,7 @@ static const unsigned int *libc_rseq_flags_p; ptrdiff_t rseq_offset; /* - * Size of the registered rseq area. 0 if the registration was + * Size of the active rseq feature set. 0 if the registration was * unsuccessful. */ unsigned int rseq_size = -1U; @@ -53,14 +53,7 @@ unsigned int rseq_size = -1U; /* Flags used during rseq registration. */ unsigned int rseq_flags; -/* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. - */ -unsigned int rseq_feature_size = -1U; - static int rseq_ownership; -static int rseq_reg_success; /* At least one rseq registration has succeded. */ /* Allocate a large area for the TLS. */ #define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 @@ -71,6 +64,12 @@ static int rseq_reg_success; /* At least one rseq registration has succeded. */ /* Original struct rseq allocation size is 32 bytes. */ #define ORIG_RSEQ_ALLOC_SIZE 32 +/* + * The alignment on RSEQ_THREAD_AREA_ALLOC_SIZE guarantees that the + * rseq_abi structure allocated size is at least + * RSEQ_THREAD_AREA_ALLOC_SIZE bytes to hold extra space for yet unknown + * kernel rseq extensions. + */ static __thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = { .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, @@ -82,7 +81,7 @@ static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); } -static int sys_getcpu(unsigned *cpu, unsigned *node) +static int sys_getcpu(unsigned int *cpu, unsigned int *node) { return syscall(__NR_getcpu, cpu, node, NULL); } @@ -115,6 +114,43 @@ bool rseq_available(unsigned int query) return false; } +/* The rseq areas need to be at least 32 bytes. */ +static +unsigned int get_rseq_min_alloc_size(void) +{ + unsigned int alloc_size = rseq_size; + + if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) + alloc_size = ORIG_RSEQ_ALLOC_SIZE; + return alloc_size; +} + +/* + * Return the feature size supported by the kernel. + * + * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): + * + * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) + * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). + * + * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. + */ +static +unsigned int get_rseq_kernel_feature_size(void) +{ + unsigned long auxv_rseq_feature_size, auxv_rseq_align; + + auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); + assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); + + auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); + assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); + if (auxv_rseq_feature_size) + return auxv_rseq_feature_size; + else + return ORIG_RSEQ_FEATURE_SIZE; +} + int rseq_register_current_thread(void) { int rc; @@ -125,16 +161,29 @@ int rseq_register_current_thread(void) /* Treat libc's ownership as a successful registration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); if (rc) { - if (RSEQ_READ_ONCE(rseq_reg_success)) { + /* + * After at least one thread has registered successfully + * (rseq_size > 0), the registration of other threads should + * never fail. + */ + if (RSEQ_READ_ONCE(rseq_size) > 0) { /* Incoherent success/failure within process. */ abort(); } return -1; } assert(rseq_current_cpu_raw() >= 0); - RSEQ_WRITE_ONCE(rseq_reg_success, 1); + + /* + * The first thread to register sets the rseq_size to mimic the libc + * behavior. + */ + if (RSEQ_READ_ONCE(rseq_size) == 0) { + RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size()); + } + return 0; } @@ -146,65 +195,105 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; } -static -unsigned int get_rseq_feature_size(void) -{ - unsigned long auxv_rseq_feature_size, auxv_rseq_align; - - auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); - assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); - - auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); - assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); - if (auxv_rseq_feature_size) - return auxv_rseq_feature_size; - else - return ORIG_RSEQ_FEATURE_SIZE; -} - +/* + * Initialize the public symbols for the rseq offset, size, feature size and + * flags prior to registering threads. If glibc owns the registration, get the + * values from its public symbols. + */ static void rseq_init(void) { - if (RSEQ_READ_ONCE(init_done)) + /* + * Ensure initialization is only done once. Use load-acquire to + * observe the initialization performed by a concurrently + * running thread. + */ + if (rseq_smp_load_acquire(&init_done)) return; + /* + * Take the mutex, check the initialization flag again and atomically + * set it to ensure we are the only thread doing the initialization. + */ pthread_mutex_lock(&init_lock); if (init_done) goto unlock; - RSEQ_WRITE_ONCE(init_done, 1); + + /* + * Check for glibc rseq support, if the 3 public symbols are found and + * the rseq_size is not zero, glibc owns the registration. + */ libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && *libc_rseq_size_p != 0) { + unsigned int libc_rseq_size; + /* rseq registration owned by glibc */ rseq_offset = *libc_rseq_offset_p; - rseq_size = *libc_rseq_size_p; + libc_rseq_size = *libc_rseq_size_p; rseq_flags = *libc_rseq_flags_p; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size > rseq_size) - rseq_feature_size = rseq_size; + + /* + * Previous versions of glibc expose the value + * 32 even though the kernel only supported 20 + * bytes initially. Therefore treat 32 as a + * special-case. glibc 2.40 exposes a 20 bytes + * __rseq_size without using getauxval(3) to + * query the supported size, while still allocating a 32 + * bytes area. Also treat 20 as a special-case. + * + * Special-cases are handled by using the following + * value as active feature set size: + * + * rseq_size = min(32, get_rseq_kernel_feature_size()) + */ + switch (libc_rseq_size) { + case ORIG_RSEQ_FEATURE_SIZE: /* Fallthrough. */ + case ORIG_RSEQ_ALLOC_SIZE: + { + unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size(); + + if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE) + rseq_size = rseq_kernel_feature_size; + else + rseq_size = ORIG_RSEQ_ALLOC_SIZE; + break; + } + default: + /* Otherwise just use the __rseq_size from libc as rseq_size. */ + rseq_size = libc_rseq_size; + break; + } goto unlock; } + + /* librseq owns the registration */ rseq_ownership = 1; - if (!rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) { - rseq_size = 0; - rseq_feature_size = 0; - goto unlock; - } + + /* Calculate the offset of the rseq area from the thread pointer. */ rseq_offset = (uintptr_t)&__rseq_abi - (uintptr_t)rseq_thread_pointer(); + + /* rseq flags are deprecated, always set to 0. */ rseq_flags = 0; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) - rseq_size = ORIG_RSEQ_ALLOC_SIZE; - else - rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; + + /* + * Set the size to 0 until at least one thread registers to mimic the + * libc behavior. + */ + rseq_size = 0; + /* + * Set init_done with store-release, to make sure concurrently + * running threads observe the initialized state. + */ + rseq_smp_store_release(&init_done, 1); unlock: pthread_mutex_unlock(&init_lock); } @@ -216,7 +305,6 @@ void rseq_exit(void) return; rseq_offset = 0; rseq_size = -1U; - rseq_feature_size = -1U; rseq_ownership = 0; } @@ -244,3 +332,8 @@ int32_t rseq_fallback_current_node(void) } return (int32_t) node_id; } + +int rseq_get_max_nr_cpus(void) +{ + return get_possible_cpus_array_len(); +} diff --git a/third_party/librseq/src/smp.c b/third_party/librseq/src/smp.c new file mode 100644 index 000000000000..a162c2b471a1 --- /dev/null +++ b/third_party/librseq/src/smp.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (C) 2011-2012 Mathieu Desnoyers + * Copyright (C) 2019 Michael Jeanson + */ + +#define _LGPL_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "smp.h" + +#define __max(a,b) ((a)>(b)?(a):(b)) + +#define RSEQ_CPUMASK_SIZE 4096 + +static int possible_cpus_array_len_cache; + +static +int _get_max_cpuid_from_sysfs(const char *path) +{ + long max_cpuid = -1; + + DIR *cpudir; + struct dirent *entry; + + assert(path); + + cpudir = opendir(path); + if (cpudir == NULL) + goto end; + + /* + * Iterate on all directories named "cpu" followed by an integer. + */ + while ((entry = readdir(cpudir))) { + if (entry->d_type == DT_DIR && + strncmp(entry->d_name, "cpu", 3) == 0) { + + char *endptr; + long cpu_id; + + cpu_id = strtol(entry->d_name + 3, &endptr, 10); + if ((cpu_id < LONG_MAX) && (endptr != entry->d_name + 3) + && (*endptr == '\0')) { + if (cpu_id > max_cpuid) + max_cpuid = cpu_id; + } + } + } + + if (closedir(cpudir)) + perror("closedir"); + + /* + * If the max CPU id is out of bound, set it to -1 so it results in a + * CPU num of 0. + */ + if (max_cpuid < 0 || max_cpuid > INT_MAX) + max_cpuid = -1; + +end: + return max_cpuid; +} + +/* + * Get the highest CPU id from sysfs. + * + * Iterate on all the folders in "/sys/devices/system/cpu" that start with + * "cpu" followed by an integer, keep the highest CPU id encountered during + * this iteration and add 1 to get a number of CPUs. + * + * Returns the highest CPU id, or -1 on error. + */ +static +int get_max_cpuid_from_sysfs(void) +{ + return _get_max_cpuid_from_sysfs("/sys/devices/system/cpu"); +} + +/* + * As a fallback to parsing the CPU mask in "/sys/devices/system/cpu/possible", + * iterate on all the folders in "/sys/devices/system/cpu" that start with + * "cpu" followed by an integer, keep the highest CPU id encountered during + * this iteration and add 1 to get a number of CPUs. + * + * Then get the value from sysconf(_SC_NPROCESSORS_CONF) as a fallback and + * return the highest one. + * + * On Linux, using the value from sysconf can be unreliable since the way it + * counts CPUs varies between C libraries and even between versions of the same + * library. If we used it directly, getcpu() could return a value greater than + * this sysconf, in which case the arrays indexed by processor would overflow. + * + * As another example, the MUSL libc implementation of the _SC_NPROCESSORS_CONF + * sysconf does not return the number of configured CPUs in the system but + * relies on the cpu affinity mask of the current task. + * + * Returns 0 or less on error. + */ +static +int get_num_possible_cpus_fallback(void) +{ + /* + * Get the sysconf value as a last resort. Keep the highest number. + */ + return __max(sysconf(_SC_NPROCESSORS_CONF), get_max_cpuid_from_sysfs() + 1); +} + +/* + * Get a CPU mask string from sysfs. + * + * buf: the buffer where the mask will be read. + * max_bytes: the maximum number of bytes to write in the buffer. + * path: file path to read the mask from. + * + * Returns the number of bytes read or -1 on error. + */ +static +int get_cpu_mask_from_sysfs(char *buf, size_t max_bytes, const char *path) +{ + ssize_t bytes_read = 0; + size_t total_bytes_read = 0; + int fd = -1, ret = -1; + + assert(path); + + if (buf == NULL) + goto end; + + fd = open(path, O_RDONLY); + if (fd < 0) + goto end; + + do { + bytes_read = read(fd, buf + total_bytes_read, + max_bytes - total_bytes_read); + + if (bytes_read < 0) { + if (errno == EINTR) { + continue; /* retry operation */ + } else { + goto end; + } + } + + total_bytes_read += bytes_read; + assert(total_bytes_read <= max_bytes); + } while (max_bytes > total_bytes_read && bytes_read != 0); + + /* + * Make sure the mask read is a null terminated string. + */ + if (total_bytes_read < max_bytes) + buf[total_bytes_read] = '\0'; + else + buf[max_bytes - 1] = '\0'; + + if (total_bytes_read > INT_MAX) + goto end; + ret = (int) total_bytes_read; +end: + if (fd >= 0 && close(fd) < 0) + perror("close"); + return ret; +} + +/* + * Get the CPU possible mask string from sysfs. + * + * buf: the buffer where the mask will be read. + * max_bytes: the maximum number of bytes to write in the buffer. + * + * Returns the number of bytes read or -1 on error. + */ +static +int get_possible_cpu_mask_from_sysfs(char *buf, size_t max_bytes) +{ + return get_cpu_mask_from_sysfs(buf, max_bytes, + "/sys/devices/system/cpu/possible"); +} + +/* + * Get the highest CPU id from a CPU mask. + * + * pmask: the mask to parse. + * len: the len of the mask excluding '\0'. + * + * Returns the highest CPU id from the mask or -1 on error. + */ +static +int get_max_cpuid_from_mask(const char *pmask, size_t len) +{ + ssize_t i; + unsigned long cpu_index; + char *endptr; + + /* We need at least one char to read */ + if (len < 1) + goto error; + + /* Start from the end to read the last CPU index. */ + for (i = len - 1; i > 0; i--) { + /* Break when we hit the first separator. */ + if ((pmask[i] == ',') || (pmask[i] == '-')) { + i++; + break; + } + } + + cpu_index = strtoul(&pmask[i], &endptr, 10); + + if ((&pmask[i] != endptr) && (cpu_index < INT_MAX)) + return (int) cpu_index; + +error: + return -1; +} + +static void update_possible_cpus_array_len_cache(void) +{ + char buf[RSEQ_CPUMASK_SIZE]; + int ret; + + /* Get the possible cpu mask from sysfs, fallback to sysconf. */ + ret = get_possible_cpu_mask_from_sysfs((char *) &buf, RSEQ_CPUMASK_SIZE); + if (ret <= 0) + goto fallback; + + /* Parse the possible cpu mask, on failure fallback to sysconf. */ + ret = get_max_cpuid_from_mask((char *) &buf, ret); + if (ret >= 0) { + /* Add 1 to convert from max cpuid to an array len. */ + ret++; + goto end; + } + +fallback: + /* Fallback to sysconf. */ + ret = get_num_possible_cpus_fallback(); + +end: + /* If all methods failed, don't store the value. */ + if (ret < 1) + return; + + possible_cpus_array_len_cache = ret; +} + +/* + * Returns the length of an array that could contain a per-CPU element for each + * possible CPU id for the lifetime of the process. + * + * We currently assume CPU ids are contiguous up the maximum CPU id. + * + * If the cache is not yet initialized, get the value from + * "/sys/devices/system/cpu/possible" or fallback to sysconf and cache it. + * + * If all methods fail, don't populate the cache and return 0. + */ +int get_possible_cpus_array_len(void) +{ + if (rseq_unlikely(!possible_cpus_array_len_cache)) + update_possible_cpus_array_len_cache(); + + return possible_cpus_array_len_cache; +} diff --git a/third_party/librseq/src/smp.h b/third_party/librseq/src/smp.h new file mode 100644 index 000000000000..6f9240f66580 --- /dev/null +++ b/third_party/librseq/src/smp.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (C) 2011-2012 Mathieu Desnoyers + * Copyright (C) 2019 Michael Jeanson + */ + +#ifndef _RSEQ_SMP_H +#define _RSEQ_SMP_H + +int get_possible_cpus_array_len(void) __attribute__((visibility("hidden"))); + +#endif /* _RSEQ_SMP_H */