diff --git a/Cargo.toml b/Cargo.toml index f22a057..effe364 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,8 @@ rust-version = "1.71" [dependencies] bitflags = "2.4.2" -kvm-bindings = "0.7.0" -kvm-ioctls = "0.16.0" +kvm-bindings = { version = "0.9.1", features = ["fam-wrappers"] } +kvm-ioctls = "0.18" +libc = "0.2.155" uuid = "1.8.0" vmm-sys-util = "0.12.1" diff --git a/src/launch/linux.rs b/src/launch/linux.rs index c8ee663..208b561 100644 --- a/src/launch/linux.rs +++ b/src/launch/linux.rs @@ -1,5 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 +use std::marker::PhantomData; + pub const NR_CPUID_CONFIGS: usize = 12; /// Trust Domain eXtensions sub-ioctl() commands @@ -8,13 +10,15 @@ pub enum CmdId { GetCapabilities, InitVm, InitVcpu, + InitMemRegion, + FinalizeVm, } /// Contains information for the sub-ioctl() command to be run. This is /// equivalent to `struct kvm_tdx_cmd` in the kernel. #[derive(Default)] #[repr(C)] -pub struct Cmd { +pub struct Cmd<'a, T: 'a> { /// TDX command identifier pub id: u32, @@ -31,6 +35,21 @@ pub struct Cmd { /// Reserved. pub _unused: u64, + + _phantom: PhantomData<&'a T>, +} + +impl<'a, T: 'a> Cmd<'a, T> { + pub fn from(id: CmdId, data: &'a T) -> Self { + Self { + id: id as u32, + flags: 0, + data: data as *const T as _, + error: 0, + _unused: 0, + _phantom: PhantomData, + } + } } #[derive(Debug)] @@ -159,18 +178,6 @@ impl Default for Capabilities { } } -impl From<&Capabilities> for Cmd { - fn from(caps: &Capabilities) -> Self { - Self { - id: CmdId::GetCapabilities as u32, - flags: 0, - data: caps as *const Capabilities as _, - error: 0, - _unused: 0, - } - } -} - /// TDX specific VM initialization information #[derive(Debug)] #[repr(C)] @@ -227,14 +234,15 @@ impl Default for InitVm { } } -impl From<&InitVm> for Cmd { - fn from(init_vm: &InitVm) -> Self { - Self { - id: CmdId::InitVm as u32, - flags: 0, - data: init_vm as *const InitVm as _, - error: 0, - _unused: 0, - } - } +#[repr(C)] +#[derive(Debug)] +pub struct TdxInitMemRegion { + /// Host physical address of the target page to be added to the TD + pub source_addr: u64, + + /// Guest physical address to be mapped + pub gpa: u64, + + /// Number of pages to be mapped + pub nr_pages: u64, } diff --git a/src/launch/mod.rs b/src/launch/mod.rs index 10cf7fb..ba67ab3 100644 --- a/src/launch/mod.rs +++ b/src/launch/mod.rs @@ -3,11 +3,10 @@ mod linux; use kvm_bindings::{kvm_enable_cap, KVM_CAP_MAX_VCPUS, KVM_CAP_SPLIT_IRQCHIP}; -use linux::{Capabilities, Cmd, CpuidConfig, InitVm, TdxError}; +use linux::{Capabilities, Cmd, CmdId, CpuidConfig, InitVm, TdxError}; use bitflags::bitflags; use kvm_ioctls::{Kvm, VmFd}; -use std::arch::x86_64; // Defined in linux/arch/x86/include/uapi/asm/kvm.h const KVM_X86_TDX_VM: u64 = 2; @@ -34,13 +33,17 @@ impl TdxVm { cap.args[0] = 24; vm_fd.enable_cap(&cap).unwrap(); + cap.cap = kvm_bindings::KVM_CAP_X2APIC_API; + cap.args[0] = (1 << 0) | (1 << 1); + vm_fd.enable_cap(&cap).unwrap(); + Ok(Self { fd: vm_fd }) } /// Retrieve information about the Intel TDX module pub fn get_capabilities(&self) -> Result { let caps = Capabilities::default(); - let mut cmd: Cmd = Cmd::from(&caps); + let mut cmd: Cmd = Cmd::from(CmdId::GetCapabilities, &caps); unsafe { self.fd.encrypt_op(&mut cmd)?; @@ -61,7 +64,7 @@ impl TdxVm { } /// Do additional VM initialization that is specific to Intel TDX - pub fn init_vm(&self, kvm_fd: &Kvm, caps: &TdxCapabilities) -> Result<(), TdxError> { + pub fn init_vm(&self, kvm_fd: &Kvm) -> Result<(), TdxError> { let cpuid = kvm_fd .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) .unwrap(); @@ -70,46 +73,63 @@ impl TdxVm { // resize to 256 entries to make sure that InitVm is 8KB cpuid_entries.resize(256, kvm_bindings::kvm_cpuid_entry2::default()); - // hex for Ob1100000001011111111 based on the XSAVE state-components architecture - let xcr0_mask = 0x602ff; - // hex for 0b11111110100000000 based on the XSAVE state-components architecture - let xss_mask = 0x1FD00; - - let xfam_fixed0 = caps.xfam.fixed0.bits(); - let xfam_fixed1 = caps.xfam.fixed1.bits(); - // patch cpuid for entry in cpuid_entries.as_mut_slice() { - // mandatory patches for TDX based on XFAM values reported by TdxCapabilities - match entry.index { - // XSAVE features and state-components - 0xD => { - if entry.index == 0 { - // XSAVE XCR0 LO - entry.eax &= (xfam_fixed0 as u32) & (xcr0_mask as u32); - entry.eax |= (xfam_fixed1 as u32) & (xcr0_mask as u32); - // XSAVE XCR0 HI - entry.edx &= ((xfam_fixed0 & xcr0_mask) >> 32) as u32; - entry.edx |= ((xfam_fixed1 & xcr0_mask) >> 32) as u32; - } else if entry.index == 1 { - // XSAVE XCR0 LO - entry.ecx &= (xfam_fixed0 as u32) & (xss_mask as u32); - entry.ecx |= (xfam_fixed1 as u32) & (xss_mask as u32); - // XSAVE XCR0 HI - entry.edx &= ((xfam_fixed0 & xss_mask) >> 32) as u32; - entry.edx |= ((xfam_fixed1 & xss_mask) >> 32) as u32; - } + if entry.function == 0xD && entry.index == 0 { + const XFEATURE_MASK_XTILE: u32 = (1 << 17) | (1 << 18); + if (entry.eax & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE { + entry.eax &= !XFEATURE_MASK_XTILE; } - 0x8000_0008 => { - // host physical address bits supported - let phys_bits = unsafe { x86_64::__cpuid(0x8000_0008).eax } & 0xff; - entry.eax = (entry.eax & 0xffff_ff00) | (phys_bits & 0xff); + } + + if entry.function == 0xD && entry.index == 1 { + entry.ecx &= !(1 << 15); + const XFEATURE_MASK_CET: u32 = (1 << 11) | (1 << 12); + if entry.ecx & XFEATURE_MASK_CET > 0 { + entry.ecx |= XFEATURE_MASK_CET; } - _ => (), } } - let mut cmd = Cmd::from(&InitVm::new(&cpuid_entries)); + let init_vm = InitVm::new(&cpuid_entries); + let mut cmd: Cmd = Cmd::from(CmdId::InitVm, &init_vm); + unsafe { + self.fd.encrypt_op(&mut cmd)?; + } + + Ok(()) + } + + /// Encrypt a memory continuous region + pub fn init_mem_region( + &self, + gpa: u64, + nr_pages: u64, + attributes: u32, + source_addr: u64, + ) -> Result<(), TdxError> { + const TDVF_SECTION_ATTRIBUTES_MR_EXTEND: u32 = 1u32 << 0; + let mem_region = linux::TdxInitMemRegion { + source_addr, + gpa, + nr_pages, + }; + + let mut cmd: Cmd = Cmd::from(CmdId::InitMemRegion, &mem_region); + + // determines if we also extend the measurement + cmd.flags = ((attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND) > 0) as u32; + + unsafe { + self.fd.encrypt_op(&mut cmd)?; + } + + Ok(()) + } + + /// Complete measurement of the initial TD contents and mark it ready to run + pub fn finalize(&self) -> Result<(), TdxError> { + let mut cmd: Cmd = Cmd::from(CmdId::FinalizeVm, &0); unsafe { self.fd.encrypt_op(&mut cmd)?; } @@ -280,13 +300,7 @@ pub struct TdxVcpu<'a> { impl<'a> TdxVcpu<'a> { pub fn init(&self, hob_address: u64) -> Result<(), TdxError> { - let mut cmd = Cmd { - id: linux::CmdId::InitVcpu as u32, - flags: 0, - data: hob_address as *const u64 as _, - error: 0, - _unused: 0, - }; + let mut cmd: Cmd = Cmd::from(CmdId::InitVcpu, &hob_address); let ret = unsafe { ioctl::ioctl_with_mut_ptr(self.fd, KVM_MEMORY_ENCRYPT_OP(), &mut cmd) }; if ret < 0 { // can't return `ret` because it will just return -1 and not give the error diff --git a/tests/data/OVMF.fd b/tests/data/OVMF.fd new file mode 100644 index 0000000..82d9c00 Binary files /dev/null and b/tests/data/OVMF.fd differ diff --git a/tests/data/OVMF.inteltdx.fd b/tests/data/OVMF.inteltdx.fd deleted file mode 100644 index a2518cf..0000000 Binary files a/tests/data/OVMF.inteltdx.fd and /dev/null differ diff --git a/tests/launch.rs b/tests/launch.rs index 926878e..a2abab7 100644 --- a/tests/launch.rs +++ b/tests/launch.rs @@ -1,24 +1,227 @@ // SPDX-License-Identifier: Apache-2.0 use kvm_ioctls::Kvm; +use vmm_sys_util::*; use tdx::launch::{TdxVcpu, TdxVm}; use tdx::tdvf; +// `mov eax,1000h` will set the value in the register eax (and rax since they both share the bottom 32 bits) to 1000h +// `jmp *%rax` will jump the program to the address that rax contains, which in this case will be 1000h +const FIRMWARE: &[u8; 7] = &[ + 0xb8, 0x00, 0x10, 0x00, 0x00, // mov eax, 1000h + 0xff, 0xe0, // jmp *%rax +]; + #[test] fn launch() { - let mut kvm_fd = Kvm::new().unwrap(); + const KVM_CAP_GUEST_MEMFD: u32 = 234; + const KVM_CAP_MEMORY_MAPPING: u32 = 236; // create vm + let mut kvm_fd = Kvm::new().unwrap(); let tdx_vm = TdxVm::new(&kvm_fd, 100).unwrap(); - let caps = tdx_vm.get_capabilities().unwrap(); - let _ = tdx_vm.init_vm(&kvm_fd, &caps).unwrap(); + let _caps = tdx_vm.get_capabilities().unwrap(); + let _ = tdx_vm.init_vm(&kvm_fd).unwrap(); + + // get tdvf sections + let mut firmware = std::fs::File::open("tests/data/OVMF.fd").unwrap(); + let sections = tdvf::parse_sections(&mut firmware).unwrap(); + let hob_section = tdvf::get_hob_section(§ions).unwrap(); // create vcpu let mut vcpufd = tdx_vm.fd.create_vcpu(10).unwrap(); let tdx_vcpu = TdxVcpu::try_from((&mut vcpufd, &mut kvm_fd)).unwrap(); - let mut firmware = std::fs::File::open("./tests/data/OVMF.inteltdx.fd").unwrap(); - let sections = tdvf::parse_sections(&mut firmware).unwrap(); - let hob_section = tdvf::get_hob_section(§ions).unwrap(); tdx_vcpu.init(hob_section.memory_address).unwrap(); + + // map memory to guest + if !check_extension(KVM_CAP_GUEST_MEMFD) { + panic!("KVM_CAP_GUEST_MEMFD isn't supported, which is required by TDX"); + } + + // In TDX you cannot modify the registers directly since they are + // confidential. Therefore, if you want the VM to run custom code, + // you need to map it to the reset vector on the guest: 0xfffffff0. + + // Start with the first 4k of memory (4G - 4k) as all 0s. + let firmware_code = &mut [0u8; 4096].to_vec(); + + // Map the firmware we want the VM to run on boot to the reset + // vector, which is at 4G - 16B (0xfffffff0). + for (idx, b) in FIRMWARE.iter().enumerate() { + firmware_code[4096 - 16 + idx] = *b; + } + + let firmware_userspace = ram_mmap(firmware_code.len() as u64); + // (4 << 30) - 0x1000 + let guest_addr = 0xfffff000u64; + + // copy the firmware code into the memory allocated for `firmware_userspace` + let address_space: &mut [u8] = unsafe { + std::slice::from_raw_parts_mut(firmware_userspace as *mut u8, firmware_code.len()) + }; + address_space[..firmware_code.len()].copy_from_slice(&firmware_code[..]); + let firmware_userspace = address_space as *const [u8] as *const u8 as u64; + + let gmem = kvm_bindings::kvm_create_guest_memfd { + size: firmware_code.len() as u64, + flags: 0, + reserved: [0; 6], + }; + + let gmem = tdx_vm.fd.create_guest_memfd(gmem).unwrap(); + let region = kvm_bindings::kvm_userspace_memory_region2 { + slot: 0 as u32, + // KVM_MEM_GUEST_MEMFD + flags: 1 << 2, + guest_phys_addr: guest_addr, + memory_size: firmware_code.len() as u64, + userspace_addr: firmware_userspace, + guest_memfd_offset: 0, + guest_memfd: gmem as u32, + pad1: 0, + pad2: [0; 14], + }; + unsafe { + tdx_vm.fd.set_user_memory_region2(region).unwrap(); + } + + let attr = kvm_bindings::kvm_memory_attributes { + address: guest_addr, + size: firmware_code.len() as u64, + // KVM_MEMORY_ATTRIBUTE_PRIVATE + attributes: 1 << 3, + flags: 0, + }; + tdx_vm.fd.set_memory_attributes(attr).unwrap(); + + if check_extension(KVM_CAP_MEMORY_MAPPING) { + // TODO(jakecorrenti): the current CentOS SIG doesn't support the KVM_MEMORY_MAPPING or + // KVM_TDX_EXTEND_MEMORY ioctls, which is what we would typically use here. + } else { + tdx_vm + .init_mem_region(guest_addr, 1, 1, firmware_userspace) + .unwrap(); + } + + // finalize measurement + tdx_vm.finalize().unwrap(); + + // run the vCPU + + // TDX will not allow the host to access private memory. In this case, we + // are trying to jump to address 0x1000 which we haven't mapped anything + // to. Therefore, we shouldn't be able to access this area of memory, which + // should cause a MemoryFault. + let ret = tdx_vcpu.fd.run(); + assert!(matches!( + ret, + Ok(kvm_ioctls::VcpuExit::MemoryFault { + flags: 8, + gpa: 0x1000, + size: 0x1000 + }) + )) +} + +/// Round number down to multiple +pub fn align_down(n: usize, m: usize) -> usize { + n / m * m +} + +/// Round number up to multiple +pub fn align_up(n: usize, m: usize) -> usize { + align_down(n + m - 1, m) +} + +/// Reserve a new memory region of the requested size to be used for maping from the given fd (if +/// any) +pub fn mmap_reserve(size: usize, fd: i32) -> *mut libc::c_void { + let mut flags = libc::MAP_PRIVATE; + flags |= libc::MAP_ANONYMOUS; + unsafe { libc::mmap(0 as _, size, libc::PROT_NONE, flags, fd, 0) } +} + +/// Activate memory in a reserved region from the given fd (if any), to make it accessible. +pub fn mmap_activate( + ptr: *mut libc::c_void, + size: usize, + fd: i32, + map_flags: u32, + map_offset: i64, +) -> *mut libc::c_void { + let noreserve = map_flags & (1 << 3); + let readonly = map_flags & (1 << 0); + let shared = map_flags & (1 << 1); + let sync = map_flags & (1 << 2); + let prot = libc::PROT_READ | (if readonly == 1 { 0 } else { libc::PROT_WRITE }); + let mut map_synced_flags = 0; + let mut flags = libc::MAP_FIXED; + + flags |= if fd == -1 { libc::MAP_ANONYMOUS } else { 0 }; + flags |= if shared >= 1 { + libc::MAP_SHARED + } else { + libc::MAP_PRIVATE + }; + flags |= if noreserve >= 1 { + libc::MAP_NORESERVE + } else { + 0 + }; + + if shared >= 1 && sync >= 1 { + map_synced_flags = libc::MAP_SYNC | libc::MAP_SHARED_VALIDATE; + } + + unsafe { libc::mmap(ptr, size, prot, flags | map_synced_flags, fd, map_offset) } +} + +/// A mmap() abstraction to map guest RAM, simplifying the flag handling, taking care of +/// alignment requirements and installing guard pages. +pub fn ram_mmap(size: u64) -> u64 { + const ALIGN: u64 = 4096; + const GUARD_PAGE_SIZE: u64 = 4096; + let mut total = size + ALIGN; + let guard_addr = mmap_reserve(total as usize, -1); + if guard_addr == libc::MAP_FAILED { + panic!("MMAP activate failed"); + } + assert!(ALIGN.is_power_of_two()); + assert!(ALIGN >= GUARD_PAGE_SIZE); + + let offset = align_up(guard_addr as usize, ALIGN as usize) - guard_addr as usize; + + let addr = mmap_activate(guard_addr.wrapping_add(offset), size as usize, -1, 0, 0); + + if addr == libc::MAP_FAILED { + unsafe { libc::munmap(guard_addr, total as usize) }; + panic!("MMAP activate failed"); + } + + if offset > 0 { + unsafe { libc::munmap(guard_addr, offset as usize) }; + } + + total -= offset as u64; + if total > size + GUARD_PAGE_SIZE { + unsafe { + libc::munmap( + addr.wrapping_add(size as usize) + .wrapping_add(GUARD_PAGE_SIZE as usize), + (total - size - GUARD_PAGE_SIZE) as usize, + ) + }; + } + + addr as u64 +} + +// NOTE(jakecorrenti): This IOCTL needs to get re-implemented manually. We need to check if KVM_CAP_MEMORY_MAPPING +// and KVM_CAP_GUEST_MEMFD are supported on the host, but those values are not present in rust-vmm/kvm-{ioctls, bindings} +ioctl_io_nr!(KVM_CHECK_EXTENSION, kvm_bindings::KVMIO, 0x03); + +fn check_extension(i: u32) -> bool { + let kvm = Kvm::new().unwrap(); + (unsafe { ioctl::ioctl_with_val(&kvm, KVM_CHECK_EXTENSION(), i.into()) }) > 0 }