From bbbc7fb1f2d23e043b4ccf5c7aa59bbff82246a6 Mon Sep 17 00:00:00 2001 From: offtkp Date: Sat, 18 Nov 2023 15:18:04 +0200 Subject: [PATCH] KVM support Co-authored-by: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Co-authored-by: hazelwiss --- CMakeLists.txt | 5 +- include/cpu.hpp | 2 +- include/cpu_kvm.hpp | 235 ++++++++++++++++++++++++++++++++++ include/memory.hpp | 11 +- perf.data | Bin 0 -> 30108 bytes src/core/CPU/cpu_kvm.cpp | 268 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 517 insertions(+), 4 deletions(-) create mode 100644 include/cpu_kvm.hpp create mode 100644 perf.data create mode 100644 src/core/CPU/cpu_kvm.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index df0e2bb84..e68a2a359 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,7 @@ option(ENABLE_HTTP_SERVER "Enable HTTP server. Used for Discord bot support" OFF option(ENABLE_DISCORD_RPC "Compile with Discord RPC support (disabled by default)" ON) option(ENABLE_LUAJIT "Enable scripting with the Lua programming language" ON) option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF) +option(USE_KVM "Use KVM instead of Dynarmic" OFF) option(BUILD_HYDRA_CORE "Build a Hydra core" OFF) option(BUILD_LIBRETRO_CORE "Build a Libretro core" OFF) @@ -155,12 +156,14 @@ else() set(HOST_ARM64 FALSE) endif() -if(HOST_X64 OR HOST_ARM64) +if(NOT USE_KVM AND (HOST_X64 OR HOST_ARM64)) set(DYNARMIC_TESTS OFF) #set(DYNARMIC_NO_BUNDLED_FMT ON) set(DYNARMIC_FRONTENDS "A32" CACHE STRING "") add_subdirectory(third_party/dynarmic) add_compile_definitions(CPU_DYNARMIC) +elseif(USE_KVM AND HOST_ARM64) + add_compile_definitions(CPU_KVM) else() message(FATAL_ERROR "Currently unsupported CPU architecture") endif() diff --git a/include/cpu.hpp b/include/cpu.hpp index 14800e197..7c8a3f65a 100644 --- a/include/cpu.hpp +++ b/include/cpu.hpp @@ -3,7 +3,7 @@ #ifdef CPU_DYNARMIC #include "cpu_dynarmic.hpp" #elif defined(CPU_KVM) -#error KVM CPU is not implemented yet +#include "cpu_kvm.hpp" #else #error No CPU core implemented :( #endif \ No newline at end of file diff --git a/include/cpu_kvm.hpp b/include/cpu_kvm.hpp new file mode 100644 index 000000000..5e3b2491a --- /dev/null +++ b/include/cpu_kvm.hpp @@ -0,0 +1,235 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "helpers.hpp" +#include "kernel.hpp" +#include "memory.hpp" + +#define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +struct MmuTables { + u32 level1[4096]; + u32 level2SectionTables[256]; +}; + +constexpr u32 hypervisorCodeAddress = 0xD0000000; +constexpr u32 hypervisorDataAddress = 0xE0000000; +constexpr u32 hypervisorCodeSize = hypervisorDataAddress - hypervisorCodeAddress; +constexpr u32 hypervisorDataSize = hypervisorCodeSize; +constexpr u32 mmuTableOffset = hypervisorDataSize - sizeof(MmuTables); +constexpr u32 mmuTableAddress = hypervisorDataAddress + mmuTableOffset; +constexpr u32 exitCodeOffset = 0; // at start of hypervisor data segment +constexpr u32 customEntryOffset = 0x100000; // arbitrary, far enough that the exit code won't ever overlap with this +constexpr u32 guestStateOffset = 0x200000; // also arbitrary, store the guest state here upon exit + +struct GuestState +{ + std::array regs; + std::array fprs; + u32 cpsr; + u32 fpscr; + // u32 tlsBase? + // u64 ticks? +}; + +struct Environment { + Environment(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) { + u32 currentMemorySlot = 0; + + kvmDescriptor = open("/dev/kvm", O_RDWR); + if (kvmDescriptor < 0) { + Helpers::panic("Failed to open /dev/kvm"); + } + + vmDescriptor = ioctl(kvmDescriptor, KVM_CREATE_VM, 0); + if (vmDescriptor < 0) { + Helpers::panic("Failed to create KVM VM"); + } + + if (ioctl(vmDescriptor, KVM_CHECK_EXTENSION, KVM_CAP_ARM_EL1_32BIT) <= 0) { + Helpers::panic("CPU doesn't support EL1 32-bit mode, KVM won't work on this CPU"); + } + + // TODO: allocate these with mmap instead of malloc + kvm_userspace_memory_region vramRegionDesc = { + .slot = currentMemorySlot++, + .flags = 0, + .guest_phys_addr = PhysicalAddrs::VRAM, + .memory_size = PhysicalAddrs::VRAMSize, + .userspace_addr = (uint64_t)mem.getVRAM()}; + if (ioctl(vmDescriptor, KVM_SET_USER_MEMORY_REGION, &vramRegionDesc) < 0) { + Helpers::panic("Failed to set VRAM memory region"); + } + + kvm_userspace_memory_region dspRegionDesc = { + .slot = currentMemorySlot++, + .flags = 0, + .guest_phys_addr = PhysicalAddrs::DSPMem, + .memory_size = PhysicalAddrs::DSPMemSize, + .userspace_addr = (uint64_t)mem.getDSPMem()}; + if (ioctl(vmDescriptor, KVM_SET_USER_MEMORY_REGION, &dspRegionDesc) < 0) { + Helpers::panic("Failed to set DSP memory region"); + } + + kvm_userspace_memory_region fcramRegionDesc = { + .slot = currentMemorySlot++, + .flags = 0, + .guest_phys_addr = PhysicalAddrs::FCRAM, + .memory_size = PhysicalAddrs::FCRAMSize * 2, + .userspace_addr = (uint64_t)mem.getFCRAM()}; + if (ioctl(vmDescriptor, KVM_SET_USER_MEMORY_REGION, &fcramRegionDesc) < 0) { + Helpers::panic("Failed to set FCRAM memory region"); + } + + hypervisorCodeRegion = mmap(NULL, hypervisorCodeSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); + if (hypervisorCodeRegion == MAP_FAILED) { + Helpers::panic("Failed to allocate memory for hypervisor I/O"); + } + + kvm_userspace_memory_region hypervisorCodeRegionDesc = { + .slot = currentMemorySlot++, + .flags = KVM_MEM_READONLY, // We want writes here to cause VM exits + .guest_phys_addr = hypervisorCodeAddress, + .memory_size = hypervisorCodeSize, + .userspace_addr = (uint64_t)hypervisorCodeRegion + }; + + if (ioctl(vmDescriptor, KVM_SET_USER_MEMORY_REGION, &hypervisorCodeRegionDesc) < 0) { + Helpers::panic("Failed to set up hypervisor IO memory region\n"); + return; + } + + hypervisorDataRegion = mmap(NULL, hypervisorDataSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (hypervisorDataRegion == MAP_FAILED) { + Helpers::panic("Failed to allocate memory for hypervisor code"); + } + + kvm_userspace_memory_region hypervisorDataRegionDesc = { + .slot = currentMemorySlot++, + .flags = 0, + .guest_phys_addr = hypervisorDataAddress, + .memory_size = hypervisorDataSize, + .userspace_addr = (uint64_t)hypervisorDataRegion + }; + + if (ioctl(vmDescriptor, KVM_SET_USER_MEMORY_REGION, &hypervisorDataRegionDesc) < 0) { + Helpers::panic("Failed to set up hypervisor code memory region\n"); + return; + } + + cpuDescriptor = ioctl(vmDescriptor, KVM_CREATE_VCPU, 0); + if (cpuDescriptor < 0) { + Helpers::panic("Failed to create VCPU"); + } + + int mmapSize = ioctl(kvmDescriptor, KVM_GET_VCPU_MMAP_SIZE, 0); + if (mmapSize < 0) { + Helpers::panic("Failed to get KVM shared memory size"); + } + + runInfo = (kvm_run*)mmap(nullptr, mmapSize, PROT_READ | PROT_WRITE, MAP_SHARED, cpuDescriptor, 0); + if (runInfo == MAP_FAILED) { + Helpers::panic("Failed to map KVM shared memory"); + } + + kvm_vcpu_init initParams; + if (ioctl(vmDescriptor, KVM_ARM_PREFERRED_TARGET, &initParams) < 0) { + Helpers::panic("Failed to fetch initialization parameters for vCPU"); + } + initParams.features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; + initParams.features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; + + if (ioctl(cpuDescriptor, KVM_ARM_VCPU_INIT, initParams) < 0) { + Helpers::panic("Failed to initialize vCPU"); + } + + kvm_reg_list tempRegList; + tempRegList.n = 0; + ioctl(cpuDescriptor, KVM_GET_REG_LIST, &tempRegList); + + regList = (kvm_reg_list*)malloc(sizeof(kvm_reg_list) + tempRegList.n * sizeof(u64)); + regList->n = tempRegList.n; + if (ioctl(cpuDescriptor, KVM_GET_REG_LIST, regList) < 0) { + Helpers::panic("Failed to get register list"); + } + } + + void setPC(u32 pc) { + u64 val = (u64)pc; + kvm_one_reg reg; + + reg.id = AARCH64_CORE_REG(regs.pc); + reg.addr = (u64)&val; + + if (ioctl(cpuDescriptor, KVM_SET_ONE_REG, ®) < 0) [[unlikely]] { + printf("SetPC failed\n"); + } + } + + void run() { + if (ioctl(cpuDescriptor, KVM_RUN, 0) < 0) { + Helpers::panic("Failed to run vCPU"); + } else { + printf("KVM run succeeded\n"); + } + } + + void mapHypervisorCode(const std::vector& code, u32 offset) + { + if (code.size() > hypervisorCodeSize) { + Helpers::panic("Launch code is too big"); + } + memcpy((void*)((uintptr_t)hypervisorCodeRegion + offset), code.data(), code.size()); + } + + Memory& mem; + Kernel& kernel; + kvm_run* runInfo = nullptr; + kvm_reg_list* regList = nullptr; + void* hypervisorCodeRegion; + void* hypervisorDataRegion; + int kvmDescriptor = -1; + int vmDescriptor = -1; + int cpuDescriptor = -1; +}; + +class CPU { + Memory& mem; + Environment env; + GuestState state; + + public: + static constexpr u64 ticksPerSec = 268111856; + + CPU(Memory& mem, Kernel& kernel); + void reset() {} + + void setReg(int index, u32 value) {} + u32 getReg(int index) {return 0;} + + std::span regs() { return state.regs; } + std::span fprs() { return state.fprs; } + + void setCPSR(u32 value) { state.cpsr = value; } + u32 getCPSR() { return state.cpsr; } + void setFPSCR(u32 value) { state.fpscr = value; } + u32 getFPSCR() { return state.fpscr; } + void setTLSBase(u32 value) {} + + u64 getTicks() {return 0;} + u64& getTicksRef() {static u64 dummy; return dummy;} + + void clearCache() {} + + void runFrame() {} + + // TODO: remove + void romLoaded(); +}; + +#undef AARCH64_CORE_REG \ No newline at end of file diff --git a/include/memory.hpp b/include/memory.hpp index 33ccbae58..3ddd3df8d 100644 --- a/include/memory.hpp +++ b/include/memory.hpp @@ -17,12 +17,18 @@ namespace PhysicalAddrs { enum : u32 { VRAM = 0x18000000, - VRAMEnd = VRAM + 0x005FFFFF, + VRAMSize = 0x00600000, + VRAMEnd = VRAM + VRAMSize - 1, FCRAM = 0x20000000, FCRAMEnd = FCRAM + 0x07FFFFFF, DSP_RAM = 0x1FF00000, - DSP_RAM_End = DSP_RAM + 0x0007FFFF + DSP_RAM_End = DSP_RAM + 0x0007FFFF, + FCRAMSize = 0x08000000, + FCRAMEnd = FCRAM + FCRAMSize - 1, + DSPMem = 0x1FF00000, + DSPMemSize = 0x00080000, + DSPMemEnd = DSPMem + DSPMemSize - 1 }; } @@ -284,6 +290,7 @@ class Memory { u8* getDSPDataMem() { return &dspRam[DSP_DATA_MEMORY_OFFSET]; } u8* getDSPCodeMem() { return &dspRam[DSP_CODE_MEMORY_OFFSET]; } u32 getUsedUserMem() { return usedUserMemory; } + u8* getVRAM() { return vram; } void setVRAM(u8* pointer) { vram = pointer; } void setDSPMem(u8* pointer) { dspRam = pointer; } diff --git a/perf.data b/perf.data new file mode 100644 index 0000000000000000000000000000000000000000..0a51cd023d39b4c504a70fa9a54f633427f14cc3 GIT binary patch literal 30108 zcmeHQdze(kmA@d!OR-f@(Ws3e4CwT9_dIA&@bXd-kO>No;@Z=XJ2TDn%YF<48XW}{ zg}7s4VxpK3S5Q%~6~$FiM~#u77}kw`#H{ORVl+PSL3D-v*g93`-0pknzW4USFZ0J1 zU-!9n>s0;fJnCING|pal?wonE-EEQxzd%~mPdw~Ey0^cG@1BGVI&$FJD}SeZrHP)1 zv6GPg0O`p{PeFPrQajSoNXH-@i}ZA)op^OIb`8?Fm2#g0tjKrt!hmf7*oZ_io1K7KNR=w`W`q+_zTA*G-FkYx${_(f_ zkmY+JJFwJdqzgFpNaSY&2M*+>-r`=e?7+c;2a!!BXkKYHe`7oB1WX}+2AxA7`Y<>L z3*_emMN}e6>E!>1V>(evr*<(2(}_|#wU@z|PL$F~KGHu?O1EP^tgb|q(nn+ZFde0I zYLA$#5=y6bO8O&8>C}F)Tak!TI<@P=uwO1EgV+K79=8Q|S3HX%3H9NqV~=n@TR=Pd4#j$dA-<)4cV zSE&5AZqo8EP6IX zu>^HM2QCqy&)OOJnn(P<6&e7)g{s!1lZl#e$Pb@*I-FD-$(Y0GaMw9%IX~$!aQsE5 zf*z@TvZ}HC-C6wZT7ln%EvAt)E&9yY%>;`!w+MiL(%>+U%>0>nm(L!_Xs{&|<`!`=71Y8MjqrWL7a8OCZhwROgN%0Q5jgM- z_-iIM@r0*0ibvr8ll6EuvFJ9&Nz2Mm_*tGtzadeo!*fd=DOFsPM z@?=Vh1cgV|PUe*$`Cod`1i$BLAwSjJ&TGNHK>qvU=J>wk@@OEMM0^MDS_yyX*u}>B zw;SV+ikjocT3ZFS$5BtUC;T7AOz@jO5c((i$q!gTApaI&erX*RoKOE9v4lUARH6Z> zg`0Vx%dOhK@o5u$X|(WvyYMg8RjC2+Z?z!*x$~ya1ZbB#>wzTyhQXH@>)+Y)g7AMF zfk_Vx8}Lv3(%k=J@iZFW#bg)NC;ZD6nc!!7h5py0{IbXo-+*6j0Y4INb-8PW3)FcW zeCQ_px~(Soy`2J|^xvq(-ybpezev0`?2F)t;1qoT;eR7F8S8&zreR6K2a70 z{M3x2xMG&h6ysCA_SY1Q1me0H5dQA}%!l8pwoj>hy1*y9s}czOfj)Ep5BNB`+cC+h zmg|~$X&$@h;M&`S-S?v1?KmGmxyQfC9#~i^2wy+x=OO3K1EF0GHtwsqSG*PVjAePW z^N*bIHZ$wDV*Z2e&8Ee5TEY#6l5t-u&?bBi%uf0~<)WA|zRid29p%>PyV??dQx+KY zL-;lKYw^XpAv<4wgiXkRclWgaJ`40j`5LE+IO$EF?@`P*{@!;q`3}Q)p?q60-}}F+ zJ3AL|+ub5wDBok4?@w<`(&P(ZobnZ#KX2@J<21lK9rc6SE;#?6e{ymBBTv)vUxolQ zKhHdS&7xmr{IgRbB41wbKr4}bGB&QI%R zGkIt2FOGkO^tq;d+V>)PH!OSkkz9HA4bbx2KNana_=798^6sCyS5y9=SH9p|Y9g2L z-!149KsS%u&-qDTLoq%P?M-^x(NDY1yYk$t?cf)*UNzIx>)zt{7bsf(Yju7x;fhv` zoo`yI<$w5A;TKFqwrimXI!^6#UC;GG z`x<1Y$0qb>>igsgD>eGtI^^qP8T_H^%$`|dT**Kx3o`%aUr=BF5c73alhTl$Nx8TD>M0HQ1k`0u}BLGC11I;`bYtr7l|*Vh{3 zcR%%_W`5Hw$R(m@w*=v_&pq5k{XdR0`jk)X72aFLMmG^{f8x`osiZwu|*N>76LzzhG^gs8;uM&;q3Y$jWPs z4Q2)T0=K=^@UuQkS(Wid`V5mzVwg%90-jow%MST%xQxAb-T7{K+}3 zBK{JK`d5blwxa{R0O|jy&zRuH9~Ss_#3hRO3oYU^|HmC24zJ>mk+07q|7tZ|vLQa{ zohafrS;UVg6t<(sy+HLrkcjl~Q6^tKy3}-O{T87IraK;pvIw+;xW#(F)~awt(Gd}T zebvf5ddR`I8RK_Y#P3w+Ktc_KpZKK-zWh924@v76i-ss`$Kg-R+5vNbnouH?^s)!0 z0HMUJ{x|ynHU-u(1;^pcM-T9Mfc-hKgvwk%MF{&g6zFt`&49_N?S=B$?l7!PmS@r9u#uNF{2$RBY=O41--L|$Ce#@4p>h!Ai_UQ`OFx9 z{U(9mjJS5Bi~#;a*5*IngfFPXqQU^MsZIDFt^M2>fA9U+?$O_Mc0VU%u74%mQq3?Y z!)ldkPxzyr-D`~RMEpj~sl)#_i}L#-!PYq6))i9#!f$*13uFAf8&LoG<=AfSjwQUl6@QO% zz!=||5&E|uEd(b&K>d4uVlICukWTYS$E137;3NCrcKiQo@P(IU_a`4`3u4%xTrhX1 zx<3i;Zkr?GG+xvGWbQo_*kgbEu6OzVIR> z{A0qFtbf3}wVOrUOizzK?&Uaif6&N3Ku@3TDUSb^ziIjRkiK%;AC%vn(elsOl|47o z*B{QzroTIxpO;ywKk#4rkK*|E9MJM#Z%}_I|EK{|H1+v1BU(gm|H13c`;XROqC@l` zU~hQe^>dM*?nCO^6YxNLTKVUT)pJ64H|$R$UML^v7V=%{m~amCJAeo8=AI$ql#kcb zp51@u{T%W`o-fA_IT7;HeN6U>&3tm!Evfc+O)S->Bx)MjuC%Xq)d~c+n^NO*Vn05P zU)1B3wo`&j%I+z>5fe7LM@rj{%^w6fJOLoM!XK{_5NDoUo zg2~vwWN`+ZI!uF;uN_03hCV6y=$^DuI|lxbKQ4~{nEhJ*3<^W-SRVAeI*vm5FAO_^ z*PH4y;_o5@@$cHGJ!gC8{3A8|yQ2NM2;e=MbHT2E7k15{gfdoy5%90s+xPJoM)x^e zB4W3X9dW4}-elL&{cIY2AA%phS>X5Z<65?{0}_b<_)9I!x7l(#aK;&8w#}|D@));{>3rU0+PMATOWW>f~LwaAY3boc6o>S%FLa2F0^tYo1_S zhMLvX_@c3BR=)a|ryAqfWO05Wp+U28P5BflgcGW1(%?`#vu7^Q+?zhZPBDz#XGCYkT>; z-wuVdV5BGDUw>b5{O4`f@-IaIvUBb{ir;zKs^uSil+aV|JOuc}I)>fvY9Gr*1i9h8 z)GgLQc)e3y$KL87_JD=^@#&z_A+{1H z;qBOLfBf9=cX)rmFSoGra{U6Sv`xEeh zUwFQIS@jEe*YS;r6W%PYhQX5kZ2ln8^<|m=>P3=7x+JaA#dql9N5~>S@i$_eeoxh; zOJApp_v-jR#P~XF_^W0}Ja~j+rR}pt{8fyvo+ILy3nKj$<41^QD(yvkufm^|rdkA} z4f$7LyqWT2Tt_=&>t~kH_lkmarC5T2A%fyAA6_1^q#-- z(W?QY#^QR&2$(QlYTka&)5q`o^76>Tdk&2H!wDlxxNg%XB)E^Y~+kSe^21ojuac}A|^8Fsn3Kn6kd1uMOyYBy=5325) zlfJF(w8wVdx@f!}Bj25*VlckC;m#jk_J@n#+%xU4#qNveWFDWs{Q?a}f%8@~44E;A zRk|5x;dCgYfnBwjT6 z<8r6k?B^*_`y4pTs#-X~J~Nh3sxF>C!5*4a?Q+-GFJfGUgUpgx_H-z!@Dk2!IMY>M zcVR&cFrY9G^d3q=>x{|0H)L3 zd+5I)eBXa_0{UrYKNgf!2+IV!BQpE=6N;}r z9%I*?SfqpiW30?Rfn>^;Quy#xLMdV|C8wNtduvI>NZ?OUPH3{LtS$)rm6rokXRkkH zK8s=*@&p>o_+V~! z^)-OAmw?aOwnlx%&` zZsqQi_hq|PO13^}w~}@yBsKJDyOk+heE=&mar%qY6&Spcw17CSuU5`<1A;D z17(!EU(%OlD|g+UD;w>>R>E#>XL&naV7I#J@@8FqSvF6ZXYz0-26O~pw9i!uySdzM zPdQtZP^WUwBk8wU^fw=&-^2i04CXMMVe8!Y#XLh9zA*a>j$vQB;%!^*PlSpdETN2Y{q_It z%ao`KGK=>m`@(6ju8NkUbe5Tl#eLpLI-Cl{!-|*v^{zyysKyLBDRY|0`+#`T?`2me zE!SVqSgCp>*}*AO1nbpW6>lt^il>Xa{!poUL{b4SyKWqcS-1q#o^ZN@bVih^tvbtWmLp2aNXLQYGZ$buq_rYzR^HEDxnX4x5!(% zRw|_r)_*XGN)Iy2S`k0>Ef2?h!K61FikDKGiq#{vED=hTJY4erv=aJ=rXyZoFv#3A zDF&G$J2TKnnbT!13(S{UXBt*~rF8<8vJ0PolrYXRHSi#_?@HJu#dUEl`mu8FT>c;bl-dLU literal 0 HcmV?d00001 diff --git a/src/core/CPU/cpu_kvm.cpp b/src/core/CPU/cpu_kvm.cpp new file mode 100644 index 000000000..6bde71fd6 --- /dev/null +++ b/src/core/CPU/cpu_kvm.cpp @@ -0,0 +1,268 @@ +// #ifdef CPU_KVM +#include "cpu_kvm.hpp" + +MmuTables* mmuTables = nullptr; + +// ARMv6 MMU supports up to two levels of address lookup with 4KiB pages. +// The top level is called the level 1 table. It contains 4096 entries of 4 bytes each (16KiB total). +// The bottom level is called level 2, which contains 256 entries of 4 bytes each (1KiB total). +// The level 1 table supports 3 kind of entries: Pages, Sections and Supersections each corresponding to a page size. +// Pages are for 4KiB pages, Sections are for 1MiB pages and Supersections are for 16MiB pages. + +// Sections and supersections don't use the level 2 table at all. +// This is because with a 32 bit vaddr and 4 KiB pages, the offset is 12 bits, +// the level 2 index is 8 bits and the level 1 index is 12 bits -> 12 + 8 + 12 = 32 for the vaddr +// However for sections, the offset is 20 bits, so you can only use +// the level 1 table (up to 4096 entries) because 20 for offset + 12 for level 1 -> 20 + 12 = 32 for the vaddr +// For supersections, you need a 24 bit offset, so the level 1 table actually has up to 256 entries because +// you're left with 8 bits -> 24 + 8 = 32 for the vaddr + +// Level 2 entries +// Bits: 31-12 11 10 9 8-6 5-4 3 2 1 0 +// Value: BADDR nG S APX TEX[2:0] AP C B 1 XN + +// Access permission table: +/* + APX AP Privileged Unprivileged Description + 0 00 No access No access Permission fault + 0 01 Read/Write No access Privileged Access only + 0 10 Read/Write Read No user-mode write + 0 11 Read/Write Read/Write Full access + 1 00 - - Reserved + 1 01 Read No access Privileged Read only + 1 10 Read Read Read only + 1 11 - - Reserved +*/ + +constexpr u32 APX = 1 << 9; +constexpr u32 AP0 = 1 << 4; +constexpr u32 AP1 = 1 << 5; + +enum Level2Flags : u32 +{ + Level2Flags_ExecuteNever = 1 << 0, + Level2Flags_Bufferable = 1 << 2, + Level2Flags_Cacheable = 1 << 3, + Level2Flags_Shared = 1 << 10, + Level2Flags_AP_NoUserModeWrite = AP1, + Level2Flags_AP_FullAccess = AP1 | AP0, +}; + +// Generated by passing the following code to godbolt: +// Thanks libn3ds +/* + // FCSE PID Register (FCSE PID = 0) + // Note: This must be 0 before disabling the MMU otherwise UB + __asm__ volatile ("mcr p15, 0, %0, c13, c0, 0" : : "r"(0)); + + // Context ID Register (ASID = 0, PROCID = 0) + __asm__ volatile ("mcr p15, 0, %0, c13, c0, 1" : : "r"(0)); + + // // TTBR0 address shared page table walk and outer cachable write-through, no allocate on write + uint32_t ttbr0 = mmuTableAddress | 0x12; + __asm__ volatile ("mcr p15, 0, %0, c2, c0, 0" : : "r" (ttbr0) : "memory"); + + // Use the 16 KiB L1 table only + __asm__ volatile ("mcr p15, 0, %0, c2, c0, 2" : : "r"(0)); + + // Domain 0 = client, remaining domains all = no access + __asm__ volatile("mcr p15, 0, %0, c3, c0, 0" : : "r"(1)); + + uint32_t* d = (uint32_t*)hypervisorCodeAddress; + *d = hypervisorCodeAddress; +*/ +constexpr u8 mmuCodeBefore[] = { + 0x00, 0x30, 0xb0, 0xe3, // movs r3, #0 + 0x10, 0x3f, 0x0d, 0xee, // mcr p15, #0, r3, c13, c0, #0 + 0x30, 0x3f, 0x0d, 0xee, // mcr p15, #0, r3, c13, c0, #1 + 0x14, 0x20, 0x9f, 0xe5, // ldr r2, [pc, #0x14] + 0x10, 0x2f, 0x02, 0xee, // mcr p15, #0, r2, c2, c0, #0 + 0x50, 0x3f, 0x02, 0xee, // mcr p15, #0, r3, c2, c0, #2 + 0x01, 0x30, 0xb0, 0xe3, // movs r3, #1 + 0x10, 0x3f, 0x03, 0xee, // mcr p15, #0, r3, c3, c0, #0 + 0x0d, 0x32, 0xa0, 0xe3, // mov r3, #-0x30000000 TODO: instead jump to exit code + 0x00, 0x30, 0x83, 0xe5, // str r3, [r3] + (mmuTableAddress & 0xFF) | 0x12, (mmuTableAddress >> 8) & 0xFF, (mmuTableAddress >> 16) & 0xFF, (mmuTableAddress >> 24) & 0xFF, +}; + +// Generated by passing the following code to godbolt: +// Thanks libn3ds +/* + // Invalidate TLB + __asm__ volatile("mcr p15, 0, %0, c8, c7, 0" : : "r"(0)); + __asm__ volatile("dsb"); + + // Get ACR + uint32_t reg; + __asm__ volatile("mrc p15, 0, %0, c1, c0, 1" : "=r"(reg)); + // Enable Return stack, Dynamic branch prediction, Static branch prediction, + // Instruction folding and SMP mode: the CPU is taking part in coherency + reg |= 0x2F; + __asm__ volatile("mcr p15, 0, %0, c1, c0, 1" : : "r"(reg)); + + // Get CR + __asm__ volatile("mrc p15, 0, %0, c1, c0, 0" : "=r"(reg)); + // Enable MMU, D-Cache, Program flow prediction, + // I-Cache, high exception vectors, Unaligned data access, + // subpage AP bits disabled + reg |= 0xC03805; + __asm__ volatile("mcr p15, 0, %0, c1, c0, 0" : : "r"(reg)); + + // Invalidate both caches + __asm__ volatile("mcr p15, 0, %0, c7, c7, 0" : : "r" (0) : "memory"); + __asm__ volatile("dsb"); + __asm__ volatile("isb"); + + uint32_t* d = (uint32_t*)hypervisorCodeAddress; + *d = hypervisorCodeAddress; +*/ +constexpr u8 mmuCodeAfter[] = { + 0x00, 0x00, 0xb0, 0xe3, // movs r0, #0 + 0x17, 0x0f, 0x08, 0xee, // mcr p15, #0, r0, c8, c7, #0 + 0x4f, 0xf0, 0x7f, 0xf5, // dsb sy + 0x30, 0x3f, 0x11, 0xee, // mrc p15, #0, r3, c1, c0, #1 + 0x2f, 0x30, 0x83, 0xe3, // orr r3, r3, #0x2f + 0x30, 0x3f, 0x01, 0xee, // mcr p15, #0, r3, c1, c0, #1 + 0x10, 0x2f, 0x11, 0xee, // mrc p15, #0, r2, c1, c0, #0 + 0x05, 0x38, 0x03, 0xe3, // movw r3, #0x3805 + 0xc0, 0x30, 0x40, 0xe3, // movt r3, #0xc0 + 0x02, 0x30, 0x93, 0xe1, // orrs r3, r3, r2 + 0x10, 0x3f, 0x01, 0xee, // mcr p15, #0, r3, c1, c0, #0 + 0x17, 0x0f, 0x07, 0xee, // mcr p15, #0, r0, c7, c7, #0 + 0x4f, 0xf0, 0x7f, 0xf5, // dsb sy + 0x6f, 0xf0, 0x7f, 0xf5, // isb sy + 0x0d, 0x32, 0xa0, 0xe3, // mov r3, #-0x30000000 TODO: instead jump to exit code + 0x00, 0x30, 0x83, 0xe5, // str r3, [r3] +}; + +// Store the CPU state and exit the VM, then return from SVC +// Generated from the following ARM32 assembly +/* + push {r0} + ldr r0, GuestStateAddr + 4 + stmfd r0, {r1-r12, sp, lr, pc}^ + pop {r0} + + push {r1} + ldr r1, GuestStateAddr + str r0, [r1] + + // Exit the VM + ldr r1, CodeAddr + str r1, [r1] + + pop {r1} + + CodeAddr: + .word 0xD0000000 + GuestStateAddr: + .word 0xE0200000 +*/ +constexpr u8 svcHandlerCode[] = { +}; + +/// Level 1, page table entry +/// Bits: 31-10 9 8-5 4 3 2 1 0 +/// Value: BADDR IMP Domain SBZ NS PXN 0 1 +/// We don't use domains, so we can set it to 0 +u32 pageTableEntry(u32 level2Address) +{ + // Level 2 tables have 256 entries of 4 bytes each, so they must be aligned to 1KiB + if ((level2Address & 0x3FF) != 0) { + Helpers::panic("level2Address is not aligned to 1KiB"); + } + + return level2Address | 0b1; +} + +u32 level2Entry(u32 physicalAddress, Level2Flags flags) +{ + return (physicalAddress & 0xFFFFF000) | 0b10 | flags; +} + +void mapPageTables(u32 virtualAddress, u32 physicalAddress, u8 pageCount, Level2Flags flags) +{ + if ((virtualAddress & 0xFFFFF000) != 0) { + Helpers::panic("virtualAddress is not aligned to 4KiB"); + } + + if ((physicalAddress & 0xFFFFF000) != 0) { + Helpers::panic("physicalAddress is not aligned to 4KiB"); + } + + for (u32 i = 0; i < pageCount * 4096; i += 4096) + { + u8 level2Index = ((virtualAddress + i) >> 12) & 0xFF; + mmuTables->level2SectionTables[level2Index] = level2Entry(physicalAddress + i, flags); + } + + u32 level2TableAddressVm = mmuTableAddress + offsetof(MmuTables, level2SectionTables); + mmuTables->level1[virtualAddress >> 20] = pageTableEntry(level2TableAddressVm); +} + +CPU::CPU(Memory& mem, Kernel& kernel) +: mem(mem), env(mem, kernel) +{ +} + +void CPU::romLoaded() +{ + NCCH* ncch = mem.getCXI(); + if (!ncch) { + // TODO: what to do here? + Helpers::panic("Alber has decided to panic!"); + } + + // Map the VM exit code which stores all registers to shared hypervisor memory + // and exits the VM by writing to read-only memory. + // We map it at the start of hypervisorCodeAddress. + env.mapHypervisorCode(std::vector(vmExitCode, vmExitCode + sizeof(vmExitCode)), 0); + + printf("Debug: Running pre mmu table code\n"); + env.mapHypervisorCode(std::vector(mmuCodeBefore, mmuCodeBefore + sizeof(mmuCodeBefore)), customEntryOffset); + env.setPC(hypervisorCodeAddress + customEntryOffset); + env.run(); + + const auto& text = ncch->text; + const auto& rodata = ncch->rodata; + const auto& data = ncch->data; + + mmuTables = (MmuTables*)((uintptr_t)env.hypervisorDataRegion + mmuTableOffset); + printf("Debug: level2sectionTables is at %p in host, %08x in guest\n", mmuTables->level2SectionTables, mmuTableAddress + offsetof(MmuTables, level2SectionTables)); + mapPageTables( + text.address, + text.address, + text.pageCount, + (Level2Flags)(Level2Flags_Shared | + Level2Flags_Bufferable | + Level2Flags_Cacheable | + Level2Flags_AP_NoUserModeWrite) + ); + mapPageTables( + rodata.address, + rodata.address, + rodata.pageCount, + (Level2Flags)(Level2Flags_Shared | + Level2Flags_Bufferable | + Level2Flags_Cacheable | + Level2Flags_AP_NoUserModeWrite | + Level2Flags_ExecuteNever) + ); + mapPageTables( + data.address, + data.address, + data.pageCount, + (Level2Flags)(Level2Flags_Shared | + Level2Flags_Bufferable | + Level2Flags_Cacheable | + Level2Flags_AP_FullAccess) + ); + + printf("Debug: Running post mmu table code\n"); + env.mapHypervisorCode(std::vector(mmuCodeAfter, mmuCodeAfter + sizeof(mmuCodeAfter)), customEntryOffset); + env.setPC(hypervisorCodeAddress + customEntryOffset); + env.run(); + printf("Done\n"); +} + +// #endif