-
Notifications
You must be signed in to change notification settings - Fork 260
/
Copy pathshvvmxhv.c
344 lines (301 loc) · 9.69 KB
/
shvvmxhv.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
/*++
Copyright (c) Alex Ionescu. All rights reserved.
Module Name:
shvvmxhv.c
Abstract:
This module implements the Simple Hyper Visor itself.
Author:
Alex Ionescu (@aionescu) 16-Mar-2016 - Initial version
Environment:
Hypervisor mode only, IRQL MAX_IRQL
--*/
#include "shv.h"
DECLSPEC_NORETURN
VOID
ShvVmxResume (
VOID
)
{
//
// Issue a VMXRESUME. The reason that we've defined an entire function for
// this sole instruction is both so that we can use it as the target of the
// VMCS when re-entering the VM After a VM-Exit, as well as so that we can
// decorate it with the DECLSPEC_NORETURN marker, which is not set on the
// intrinsic (as it can fail in case of an error).
//
__vmx_vmresume();
}
uintptr_t
FORCEINLINE
ShvVmxRead (
_In_ UINT32 VmcsFieldId
)
{
size_t FieldData;
//
// Because VMXREAD returns an error code, and not the data, it is painful
// to use in most circumstances. This simple function simplifies it use.
//
__vmx_vmread(VmcsFieldId, &FieldData);
return FieldData;
}
INT32
ShvVmxLaunch (
VOID
)
{
INT32 failureCode;
//
// Launch the VMCS
//
__vmx_vmlaunch();
//
// If we got here, either VMCS setup failed in some way, or the launch
// did not proceed as planned.
//
failureCode = (INT32)ShvVmxRead(VM_INSTRUCTION_ERROR);
__vmx_off();
//
// Return the error back to the caller
//
return failureCode;
}
VOID
ShvVmxHandleInvd (
VOID
)
{
//
// This is the handler for the INVD instruction. Technically it may be more
// correct to use __invd instead of __wbinvd, but that intrinsic doesn't
// actually exist. Additionally, the Windows kernel (or HAL) don't contain
// any example of INVD actually ever being used. Finally, Hyper-V itself
// handles INVD by issuing WBINVD as well, so we'll just do that here too.
//
__wbinvd();
}
VOID
ShvVmxHandleCpuid (
_In_ PSHV_VP_STATE VpState
)
{
INT32 cpu_info[4];
//
// Check for the magic CPUID sequence, and check that it is coming from
// Ring 0. Technically we could also check the RIP and see if this falls
// in the expected function, but we may want to allow a separate "unload"
// driver or code at some point.
//
if ((VpState->VpRegs->Rax == 0x41414141) &&
(VpState->VpRegs->Rcx == 0x42424242) &&
((ShvVmxRead(GUEST_CS_SELECTOR) & RPL_MASK) == DPL_SYSTEM))
{
VpState->ExitVm = TRUE;
return;
}
//
// Otherwise, issue the CPUID to the logical processor based on the indexes
// on the VP's GPRs.
//
__cpuidex(cpu_info, (INT32)VpState->VpRegs->Rax, (INT32)VpState->VpRegs->Rcx);
//
// Check if this was CPUID 1h, which is the features request.
//
if (VpState->VpRegs->Rax == 1)
{
//
// Set the Hypervisor Present-bit in RCX, which Intel and AMD have both
// reserved for this indication.
//
cpu_info[2] |= HYPERV_HYPERVISOR_PRESENT_BIT;
}
else if (VpState->VpRegs->Rax == HYPERV_CPUID_INTERFACE)
{
//
// Return our interface identifier
//
cpu_info[0] = ' vhS';
}
//
// Copy the values from the logical processor registers into the VP GPRs.
//
VpState->VpRegs->Rax = cpu_info[0];
VpState->VpRegs->Rbx = cpu_info[1];
VpState->VpRegs->Rcx = cpu_info[2];
VpState->VpRegs->Rdx = cpu_info[3];
}
VOID
ShvVmxHandleXsetbv (
_In_ PSHV_VP_STATE VpState
)
{
//
// Simply issue the XSETBV instruction on the native logical processor.
//
_xsetbv((UINT32)VpState->VpRegs->Rcx,
VpState->VpRegs->Rdx << 32 |
VpState->VpRegs->Rax);
}
VOID
ShvVmxHandleVmx (
_In_ PSHV_VP_STATE VpState
)
{
//
// Set the CF flag, which is how VMX instructions indicate failure
//
VpState->GuestEFlags |= 0x1; // VM_FAIL_INVALID
//
// RFLAGs is actually restored from the VMCS, so update it here
//
__vmx_vmwrite(GUEST_RFLAGS, VpState->GuestEFlags);
}
VOID
ShvVmxHandleExit (
_In_ PSHV_VP_STATE VpState
)
{
//
// This is the generic VM-Exit handler. Decode the reason for the exit and
// call the appropriate handler. As per Intel specifications, given that we
// have requested no optional exits whatsoever, we should only see CPUID,
// INVD, XSETBV and other VMX instructions. GETSEC cannot happen as we do
// not run in SMX context.
//
switch (VpState->ExitReason)
{
case EXIT_REASON_CPUID:
ShvVmxHandleCpuid(VpState);
break;
case EXIT_REASON_INVD:
ShvVmxHandleInvd();
break;
case EXIT_REASON_XSETBV:
ShvVmxHandleXsetbv(VpState);
break;
case EXIT_REASON_VMCALL:
case EXIT_REASON_VMCLEAR:
case EXIT_REASON_VMLAUNCH:
case EXIT_REASON_VMPTRLD:
case EXIT_REASON_VMPTRST:
case EXIT_REASON_VMREAD:
case EXIT_REASON_VMRESUME:
case EXIT_REASON_VMWRITE:
case EXIT_REASON_VMXOFF:
case EXIT_REASON_VMXON:
ShvVmxHandleVmx(VpState);
break;
default:
break;
}
//
// Move the instruction pointer to the next instruction after the one that
// caused the exit. Since we are not doing any special handling or changing
// of execution, this can be done for any exit reason.
//
VpState->GuestRip += ShvVmxRead(VM_EXIT_INSTRUCTION_LEN);
__vmx_vmwrite(GUEST_RIP, VpState->GuestRip);
}
DECLSPEC_NORETURN
VOID
ShvVmxEntryHandler (
_In_ PCONTEXT Context
)
{
SHV_VP_STATE guestContext;
PSHV_VP_DATA vpData;
//
// Because we had to use RCX when calling ShvOsCaptureContext, its value
// was actually pushed on the stack right before the call. Go dig into the
// stack to find it, and overwrite the bogus value that's there now.
//
Context->Rcx = *(UINT64*)((uintptr_t)Context - sizeof(Context->Rcx));
//
// Get the per-VP data for this processor.
//
vpData = (VOID*)((uintptr_t)(Context + 1) - KERNEL_STACK_SIZE);
//
// Build a little stack context to make it easier to keep track of certain
// guest state, such as the RIP/RSP/RFLAGS, and the exit reason. The rest
// of the general purpose registers come from the context structure that we
// captured on our own with RtlCaptureContext in the assembly entrypoint.
//
guestContext.GuestEFlags = ShvVmxRead(GUEST_RFLAGS);
guestContext.GuestRip = ShvVmxRead(GUEST_RIP);
guestContext.GuestRsp = ShvVmxRead(GUEST_RSP);
guestContext.ExitReason = ShvVmxRead(VM_EXIT_REASON) & 0xFFFF;
guestContext.VpRegs = Context;
guestContext.ExitVm = FALSE;
//
// Call the generic handler
//
ShvVmxHandleExit(&guestContext);
//
// Did we hit the magic exit sequence, or should we resume back to the VM
// context?
//
if (guestContext.ExitVm != FALSE)
{
//
// Return the VP Data structure in RAX:RBX which is going to be part of
// the CPUID response that the caller (ShvVpUninitialize) expects back.
// Return confirmation in RCX that we are loaded
//
Context->Rax = (uintptr_t)vpData >> 32;
Context->Rbx = (uintptr_t)vpData & 0xFFFFFFFF;
Context->Rcx = 0x43434343;
//
// Perform any OS-specific CPU uninitialization work
//
ShvOsUnprepareProcessor(vpData);
//
// Our callback routine may have interrupted an arbitrary user process,
// and therefore not a thread running with a systemwide page directory.
// Therefore if we return back to the original caller after turning off
// VMX, it will keep our current "host" CR3 value which we set on entry
// to the PML4 of the SYSTEM process. We want to return back with the
// correct value of the "guest" CR3, so that the currently executing
// process continues to run with its expected address space mappings.
//
__writecr3(ShvVmxRead(GUEST_CR3));
//
// Finally, restore the stack, instruction pointer and EFLAGS to the
// original values present when the instruction causing our VM-Exit
// execute (such as ShvVpUninitialize). This will effectively act as
// a longjmp back to that location.
//
Context->Rsp = guestContext.GuestRsp;
Context->Rip = (UINT64)guestContext.GuestRip;
Context->EFlags = (UINT32)guestContext.GuestEFlags;
//
// Turn off VMX root mode on this logical processor. We're done here.
//
__vmx_off();
}
else
{
//
// Because we won't be returning back into assembly code, nothing will
// ever know about the "pop rcx" that must technically be done (or more
// accurately "add rsp, 4" as rcx will already be correct thanks to the
// fixup earlier. In order to keep the stack sane, do that adjustment
// here.
//
Context->Rsp += sizeof(Context->Rcx);
//
// Return into a VMXRESUME intrinsic, which we broke out as its own
// function, in order to allow this to work. No assembly code will be
// needed as RtlRestoreContext will fix all the GPRs, and what we just
// did to RSP will take care of the rest.
//
Context->Rip = (UINT64)ShvVmxResume;
}
//
// Restore the context to either ShvVmxResume, in which case the CPU's VMX
// facility will do the "true" return back to the VM (but without restoring
// GPRs, which is why we must do it here), or to the original guest's RIP,
// which we use in case an exit was requested. In this case VMX must now be
// off, and this will look like a longjmp to the original stack and RIP.
//
ShvOsRestoreContext(Context);
}