Skip to content
This repository has been archived by the owner on Mar 5, 2024. It is now read-only.

Commit

Permalink
oaknut: Implement DualCodeBlock and related support
Browse files Browse the repository at this point in the history
  • Loading branch information
merryhime committed Jan 28, 2024
1 parent 9f131cf commit 783965c
Show file tree
Hide file tree
Showing 9 changed files with 380 additions and 129 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ endif()
# Source project files
set(header_files
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/code_block.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/dual_code_block.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/cpu_feature.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/feature_detection.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/id_registers.hpp
Expand Down
40 changes: 37 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ EmittedFunction EmitExample(oaknut::CodeGenerator& code, int value)
{
using namespace oaknut::util;

EmittedFunction result = code.ptr<EmittedFunction>();
EmittedFunction result = code.xptr<EmittedFunction>();

code.MOV(W0, value);
code.RET();
Expand All @@ -32,7 +32,7 @@ EmittedFunction EmitExample(oaknut::CodeGenerator& code, int value)
int main()
{
oaknut::CodeBlock mem{4096};
oaknut::CodeGenerator code{mem.ptr()};
oaknut::CodeGenerator code{mem.ptr(), mem.ptr()};

mem.unprotect();

Expand All @@ -47,12 +47,45 @@ int main()
}
```
CodeGenerator takes two pointers. The first pointer is the memory address to write to, and the second pointer is the memory address that the code will be executing from. This allows you to write to a buffer before copying to the final destination for execution, or to have to use dual-mapped memory blocks to avoid memory protection overhead.
Below is an example of using the oaknut-provided utility header for dual-mapped memory blocks:
```cpp
#include <cstdio>
#include <oaknut/dual_code_block.hpp>
#include <oaknut/oaknut.hpp>
using EmittedFunction = ;
int main()
{
using namespace oaknut::util;
oaknut::DualCodeBlock mem{4096};
oaknut::CodeGenerator code{mem.wptr(), mem.xptr()};
const auto result = code.xptr<int (*)()>();
code.MOV(W0, value);
code.RET();
mem.invalidate_all();
std::printf("%i\n", fn()); // Output: 42
return 0;
}
```

### Emit to `std::vector`

If you wish to merely emit code into memory without executing it, or if you are developing a cross-compiler that is not running on an ARM64 device, you can use `oaknut::VectorCodeGenerator` instead.

Provide `oaknut::VectorCodeGenerator` with a reference to a `std::vector<std::uint32_t>` and it will append to that vector.

The second pointer argument represents the destination address the code will eventually be executed from.

Simple example:

```cpp
Expand All @@ -64,7 +97,7 @@ Simple example:
int main()
{
std::vector<std::uint32_t> vec;
oaknut::VectorCodeGenerator code{vec};
oaknut::VectorCodeGenerator code{vec, (uint32_t*)0x1000};

code.MOV(W0, 42);
code.RET();
Expand All @@ -81,6 +114,7 @@ int main()
| ------ | --------------------- | -------- |
| `<oaknut/oaknut.hpp>` | Yes | Provides `CodeGenerator` and `VectorCodeGenerator` for code emission, as well as the `oaknut::util` namespace. |
| `<oaknut/code_block.hpp>` | No | Utility header that provides `CodeBlock`, allocates, alters permissions of, and invalidates executable memory. |
| `<oaknut/dual_code_block.hpp>` | No | Utility header that provides `DualCodeBlock`, which allocates two mirrored memory blocks (with RW and RX permissions respectively). |
| `<oaknut/oaknut_exception.hpp>` | Yes | Provides `OaknutException` which is thrown on an error. |
| `<oaknut/feature_detection/cpu_feature.hpp>` | Yes | Utility header that provides `CpuFeatures` which can be used to describe AArch64 features. |
| `<oaknut/feature_detection/feature_detection.hpp>` | No | Utility header that provides `detect_features` and `read_id_registers` for determining available AArch64 features. |
Expand Down
153 changes: 153 additions & 0 deletions include/oaknut/dual_code_block.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// SPDX-FileCopyrightText: Copyright (c) 2024 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT

#pragma once

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <new>

#if defined(_WIN32)
# define NOMINMAX
# include <windows.h>
#elif defined(__APPLE__)
# include <mach/mach.h>
# include <mach/vm_map.h>

# include <TargetConditionals.h>
# include <libkern/OSCacheControl.h>
# include <pthread.h>
# include <sys/mman.h>
# include <unistd.h>
#else
# define _GNU_SOURCE
# include <sys/mman.h>
#endif

namespace oaknut {

class DualCodeBlock {
public:
explicit DualCodeBlock(std::size_t size)
: m_size(size)
{
#if defined(_WIN32)
m_wmem = m_xmem = (std::uint32_t*)VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
if (m_wmem == nullptr)
throw std::bad_alloc{};
#elif defined(__APPLE__)
m_wmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
if (m_wmem == MAP_FAILED)
throw std::bad_alloc{};

vm_prot_t cur_prot, max_prot;
kern_return_t ret = vm_remap(mach_task_self(), (vm_address_t*)&m_xmem, size, 0, VM_FLAGS_ANYWHERE | VM_FLAGS_RANDOM_ADDR, mach_task_self(), (mach_vm_address_t)m_wmem, false, &cur_prot, &max_prot, VM_INHERIT_NONE);
if (ret != KERN_SUCCESS)
throw std::bad_alloc{};

mprotect(m_xmem, size, PROT_READ | PROT_EXEC);
#else
fd = memfd_create("oaknut_dual_code_block", 0);
if (fd < 0)
throw std::bad_alloc{};

int ret = ftruncate(fd, size);
if (ret != 0)
throw std::bad_alloc{};

m_wmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
m_xmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);

if (m_wmem == MAP_FAILED || m_xmem == MAP_FAILED)
throw std::bad_alloc{};
#endif
}

~DualCodeBlock()
{
#if defined(_WIN32)
VirtualFree((void*)m_xmem, 0, MEM_RELEASE);
#elif defined(__APPLE__)
#else
munmap(m_wmem, m_size);
munmap(m_xmem, m_size);
close(fd);
#endif
}

DualCodeBlock(const DualCodeBlock&) = delete;
DualCodeBlock& operator=(const DualCodeBlock&) = delete;
DualCodeBlock(DualCodeBlock&&) = delete;
DualCodeBlock& operator=(DualCodeBlock&&) = delete;

/// Pointer to executable mirror of memory (permissions: R-X)
std::uint32_t* xptr() const
{
return m_xmem;
}

/// Pointer to writeable mirror of memory (permissions: RW-)
std::uint32_t* wptr() const
{
return m_wmem;
}

/// Invalidate should be used with executable memory pointers.
void invalidate(std::uint32_t* mem, std::size_t size)
{
#if defined(__APPLE__)
sys_icache_invalidate(mem, size);
#elif defined(_WIN32)
FlushInstructionCache(GetCurrentProcess(), mem, size);
#else
static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000;

std::uint64_t ctr;
__asm__ volatile("mrs %0, ctr_el0"
: "=r"(ctr));

const std::size_t isize = icache_line_size = std::min<std::size_t>(icache_line_size, 4 << ((ctr >> 0) & 0xf));
const std::size_t dsize = dcache_line_size = std::min<std::size_t>(dcache_line_size, 4 << ((ctr >> 16) & 0xf));

const std::uintptr_t end = (std::uintptr_t)mem + size;

for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(dsize - 1); addr < end; addr += dsize) {
__asm__ volatile("dc cvau, %0"
:
: "r"(addr)
: "memory");
}
__asm__ volatile("dsb ish\n"
:
:
: "memory");

for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(isize - 1); addr < end; addr += isize) {
__asm__ volatile("ic ivau, %0"
:
: "r"(addr)
: "memory");
}
__asm__ volatile("dsb ish\nisb\n"
:
:
: "memory");
#endif
}

void invalidate_all()
{
invalidate(m_xmem, m_size);
}

protected:
#if !defined(_WIN32) && !defined(__APPLE__)
int fd = -1;
#endif
std::uint32_t* m_xmem = nullptr;
std::uint32_t* m_wmem = nullptr;
std::size_t m_size = 0;
};

} // namespace oaknut
37 changes: 15 additions & 22 deletions include/oaknut/impl/arm64_encode_helpers.inc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ std::uint32_t encode(AddrOffset<size, align> v)
{
static_assert(std::popcount(splat) == size - align);

const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) {
const std::ptrdiff_t diff = target - current_addr;
const auto encode_fn = [](std::ptrdiff_t current_offset, std::ptrdiff_t target_offset) {
const std::ptrdiff_t diff = target_offset - current_offset;
return pdep<splat>(AddrOffset<size, align>::encode(diff));
};

Expand All @@ -122,19 +122,16 @@ std::uint32_t encode(AddrOffset<size, align> v)
return pdep<splat>(encoding);
},
[&](Label* label) -> std::uint32_t {
if (label->m_addr) {
return encode_fn(Policy::current_address(), *label->m_addr);
if (label->m_offset) {
return encode_fn(Policy::offset(), *label->m_offset);
}

label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
label->m_wbs.emplace_back(Label::Writeback{Policy::offset(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
return 0u;
},
[&]([[maybe_unused]] const void* p) -> std::uint32_t {
if constexpr (Policy::has_absolute_addresses) {
return encode_fn(Policy::current_address(), reinterpret_cast<std::uintptr_t>(p));
} else {
throw OaknutException{ExceptionType::RequiresAbsoluteAddressesContext};
}
[&](const void* p) -> std::uint32_t {
const std::ptrdiff_t diff = reinterpret_cast<std::uintptr_t>(p) - Policy::template xptr<std::uintptr_t>();
return pdep<splat>(AddrOffset<size, align>::encode(diff));
},
},
v.m_payload);
Expand All @@ -145,25 +142,21 @@ std::uint32_t encode(PageOffset<size, shift_amount> v)
{
static_assert(std::popcount(splat) == size);

const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) {
return pdep<splat>(PageOffset<size, shift_amount>::encode(current_addr, target));
const auto encode_fn = [](std::ptrdiff_t current_offset, std::ptrdiff_t target_offset) {
return pdep<splat>(PageOffset<size, shift_amount>::encode(std::bit_cast<std::uintptr_t>(current_offset), std::bit_cast<std::uintptr_t>(target_offset)));
};

return std::visit(detail::overloaded{
[&](Label* label) -> std::uint32_t {
if (label->m_addr) {
return encode_fn(Policy::current_address(), *label->m_addr);
if (label->m_offset) {
return encode_fn(Policy::offset(), *label->m_offset);
}

label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
label->m_wbs.emplace_back(Label::Writeback{Policy::offset(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
return 0u;
},
[&]([[maybe_unused]] const void* p) -> std::uint32_t {
if constexpr (Policy::has_absolute_addresses) {
return encode_fn(Policy::current_address(), reinterpret_cast<std::uintptr_t>(p));
} else {
throw OaknutException{ExceptionType::RequiresAbsoluteAddressesContext};
}
[&](const void* p) -> std::uint32_t {
return pdep<splat>(PageOffset<size, shift_amount>::encode(Policy::template xptr<std::uintptr_t>(), reinterpret_cast<std::ptrdiff_t>(p)));
},
},
v.m_payload);
Expand Down
Loading

0 comments on commit 783965c

Please sign in to comment.