diff --git a/bin/yk-config b/bin/yk-config index c6c2d65e4..3d0d4f7b1 100755 --- a/bin/yk-config +++ b/bin/yk-config @@ -44,7 +44,9 @@ set_aot_pipeline() { PRELINK_PASSES_AO0="" POSTLINK_PASSES_AO0="instcombine" PRELINK_PASSES_AO1="" -POSTLINK_PASSES_AO1="instcombine,mem2reg,instcombine" +# The whole -O1 pipeline, as reported by `opt -O1 -print-pipeline-passes +# /dev/null`. +POSTLINK_PASSES_AO1="annotation2metadata,forceattrs,inferattrs,coro-early,function(lower-expect,simplifycfg,sroa,early-cse<>),openmp-opt,ipsccp,called-value-propagation,globalopt,function(mem2reg,instcombine,simplifycfg),require,function(invalidate),require,cgscc(devirt<4>(inline,inline,function-attrs,function(sroa,early-cse,simplifycfg,instcombine,libcalls-shrinkwrap,simplifycfg,reassociate,loop-mssa(loop-instsimplify,loop-simplifycfg,licm,loop-rotate,licm,simple-loop-unswitch),simplifycfg,instcombine,loop(loop-idiom,indvars,loop-deletion,loop-unroll-full),sroa,memcpyopt,sccp,bdce,instcombine,coro-elide,adce,simplifycfg,instcombine),function-attrs,function(require),coro-split)),deadargelim,coro-cleanup,globalopt,globaldce,elim-avail-extern,rpo-function-attrs,recompute-globalsaa,function(float2int,lower-constant-intrinsics,loop(loop-rotate,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize,infer-alignment,loop-load-elim,instcombine,simplifycfg,vector-combine,instcombine,loop-unroll,transform-warning,sroa,infer-alignment,instcombine,loop-mssa(licm),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg),globaldce,constmerge,cg-profile,rel-lookup-table-converter,function(annotation-remarks),verify" # Initialise the AOT pipeline to level 0 unless YKB_AOT_OPTLEVEL is set. set_aot_pipeline 0 @@ -159,6 +161,8 @@ handle_arg() { OUTPUT="${OUTPUT} -Wl,--mllvm=--yk-shadow-stack" # Encode additional locations in stackmaps. OUTPUT="${OUTPUT} -Wl,--mllvm=--yk-stackmap-add-locs" + # Don't optimise functions by changing their calling convention. + OUTPUT="${OUTPUT} -Wl,--mllvm=--yk-dont-opt-func-abi" # Use software-tracer pass if [ "${YKB_TRACER}" = "swt" ]; then OUTPUT="${OUTPUT} -Wl,--mllvm=--yk-basicblock-tracer" diff --git a/tests/c/aot_debuginfo.c b/tests/c/aot_debuginfo.c index a1a5787f1..202da49c2 100644 --- a/tests/c/aot_debuginfo.c +++ b/tests/c/aot_debuginfo.c @@ -24,7 +24,7 @@ // ... // # aot_debuginfo.c:{{_}}: yk_mt_control_point(mt, &loc); // ... -// call __ykrt_control_point(%{{_}}, %{{_}}, 1i64) [safepoint:... +// call llvm.experimental.patchpoint.void(0i64, 13i32, __ykrt_control_point... // ... // # aot_debuginfo.c:{{_}}: i--; // ... diff --git a/tests/c/lua_array_len.c b/tests/c/lua_array_len.c deleted file mode 100644 index d8292ebfe..000000000 --- a/tests/c/lua_array_len.c +++ /dev/null @@ -1,89 +0,0 @@ -// ## yk-config-env: YKB_AOT_OPTLEVEL=1 -// Run-time: -// env-var: YKD_LOG_IR=-:aot,jit-pre-opt -// env-var: YKD_SERIALISE_COMPILATION=1 -// env-var: YK_LOG=4 - -// This was reduced from yklua. -// -// In short a `ptradd %ptr, 0` was reducing to nothing in the tracebuilder and -// the AOT instruction was being associated with the last JIT instruction -// (which was nothing to do with the `ptr_add`!). -// -// This caused a runtime crash. -// -// I tried to reduce this to a test with a simple constant 0-byte offset field -// access, but I can't get ykllvm to emit a constant zero offset GEP when -// mem2reg is enabled. - -#include -#include -#include -#include -#include -#include - -typedef unsigned char lu_byte; -typedef long long lua_Integer; -typedef unsigned long long lua_Unsigned; - -typedef union Value { - lua_Integer i; -} Value; - -typedef struct TValue { - Value value_; lu_byte tt_; -} TValue; - -typedef union StackValue { - TValue val; - struct { - Value value_; lu_byte tt_; - unsigned short delta; - } tbclist; -} StackValue; - -typedef StackValue *StkId; -typedef struct Table {} Table; - -union GCUnion { - struct Table h; -}; - -__attribute__((noinline)) -lua_Unsigned luaH_getn(Table *t) { return 0; } - -__attribute__((noinline)) -void luaV_objlen(StkId ra, const TValue *rb) { - Table *h = &((union GCUnion *)rb)->h; - TValue *io = (TValue *) ra; - io->value_.i = luaH_getn(h); -} - -int main(int argc, char **argv) { - YkMT *mt = yk_mt_new(NULL); - yk_mt_hot_threshold_set(mt, 0); - YkLocation loc = yk_location_new(); - - int i = 4; - StackValue sv; - sv.val.value_.i = 1; - TValue tv; - tv.value_.i = 1; - StkId ra = &sv; - TValue *rb = &tv; - NOOPT_VAL(loc); - NOOPT_VAL(i); - NOOPT_VAL(ra); - NOOPT_VAL(rb); - while (i > 0) { - yk_mt_control_point(mt, &loc); - luaV_objlen(ra, rb); - fprintf(stderr, "%d\n", i); - i--; - } - fprintf(stderr, "exit\n"); - yk_location_drop(loc); - yk_mt_shutdown(mt); - return (EXIT_SUCCESS); -} diff --git a/tests/c/phi3.c b/tests/c/phi3.c index 8a5d21c5c..3c7c8010a 100644 --- a/tests/c/phi3.c +++ b/tests/c/phi3.c @@ -1,4 +1,3 @@ -// ## yk-config-env: YKB_AOT_OPTLEVEL=1 // Run-time: // env-var: YKD_LOG_IR=-:aot,jit-pre-opt // env-var: YK_LOG=4 diff --git a/ykcapi/src/lib.rs b/ykcapi/src/lib.rs index 03511dc43..9698beba5 100644 --- a/ykcapi/src/lib.rs +++ b/ykcapi/src/lib.rs @@ -66,11 +66,25 @@ pub extern "C" fn __ykrt_control_point( // callee-saved registers for us (so we don't have to do it here). unsafe { std::arch::asm!( - // Push callee-saved registers to the stack as these may contain trace inputs (live + // Push all registers to the stack as these may contain trace inputs (live // variables) referenced by the control point's stackmap. + // + // We don't need to push and restore `rdx` since `smid` can never be a live value and + // thus won't be tracked by the stackmap. + // + // FIXME: In the future we want the control point to return naturally into the compiled + // trace (at the moment we just rip out the control point's stack), which means we then + // no longer need to recover callee-saved registers as the control point will do this + // for us. + "push rax", + "push rcx", "push rbx", "push rdi", "push rsi", + "push r8", + "push r9", + "push r10", + "push r11", "push r12", "push r13", "push r14", @@ -83,9 +97,15 @@ pub extern "C" fn __ykrt_control_point( "pop r14", "pop r13", "pop r12", + "pop r11", + "pop r10", + "pop r9", + "pop r8", "pop rsi", "pop rdi", "pop rbx", + "pop rcx", + "pop rax", "ret", options(noreturn) ); diff --git a/ykllvm b/ykllvm index cbe366d62..df8f25adb 160000 --- a/ykllvm +++ b/ykllvm @@ -1 +1 @@ -Subproject commit cbe366d62656d7259c36f22053726517b73d30ee +Subproject commit df8f25adb2ee7f5da6cd48f3d1ec528b770d08a5 diff --git a/ykrt/src/compile/jitc_yk/aot_ir.rs b/ykrt/src/compile/jitc_yk/aot_ir.rs index 689c8e036..00a08ec2a 100644 --- a/ykrt/src/compile/jitc_yk/aot_ir.rs +++ b/ykrt/src/compile/jitc_yk/aot_ir.rs @@ -45,8 +45,6 @@ const MAGIC: u32 = 0xedd5f00d; /// The version of the bytecode format. const FORMAT_VERSION: u32 = 0; -/// The symbol name of the control point function (after ykllvm has transformed it). -const CONTROL_POINT_NAME: &str = "__ykrt_control_point"; const LLVM_DEBUG_CALL_NAME: &str = "llvm.dbg.value"; /// A struct identifying a line in a source file. @@ -1059,8 +1057,13 @@ impl Inst { /// Returns whether `self` is a call to the control point. pub(crate) fn is_control_point(&self, m: &Module) -> bool { + // FIXME: We don't really expect any other patchpoints here, but it would be nice to check + // the third argument is actually the control point. This would require the ability to + // reconstitute a string from constant bytes though. match self { - Self::Call { callee, .. } => m.func(*callee).name == CONTROL_POINT_NAME, + Self::Call { callee, .. } => { + m.func(*callee).name == "llvm.experimental.patchpoint.void" + } _ => false, } } diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs b/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs index eaeb83530..fa35e7955 100644 --- a/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs +++ b/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs @@ -695,6 +695,17 @@ impl<'a> Assemble<'a> { fn cg_loadtraceinput(&mut self, iidx: jit_ir::InstIdx, inst: &jit_ir::LoadTraceInputInst) { let m = match &self.m.tilocs()[usize::try_from(inst.locidx()).unwrap()] { + yksmp::Location::Register(0, ..) => { + VarLocation::Register(reg_alloc::Register::GP(Rq::RAX)) + } + yksmp::Location::Register(1, ..) => { + // The third argument to the control point is the stackmap ID and is passed by RDX. + // We don't anticipate this ever being a live variable. + unreachable!() + } + yksmp::Location::Register(2, ..) => { + VarLocation::Register(reg_alloc::Register::GP(Rq::RCX)) + } yksmp::Location::Register(3, ..) => { VarLocation::Register(reg_alloc::Register::GP(Rq::RBX)) } @@ -704,6 +715,15 @@ impl<'a> Assemble<'a> { yksmp::Location::Register(5, ..) => { VarLocation::Register(reg_alloc::Register::GP(Rq::RDI)) } + yksmp::Location::Register(8, ..) => { + VarLocation::Register(reg_alloc::Register::GP(Rq::R8)) + } + yksmp::Location::Register(9, ..) => { + VarLocation::Register(reg_alloc::Register::GP(Rq::R9)) + } + yksmp::Location::Register(10, ..) => { + VarLocation::Register(reg_alloc::Register::GP(Rq::R10)) + } yksmp::Location::Register(12, ..) => { VarLocation::Register(reg_alloc::Register::GP(Rq::R12)) } diff --git a/ykrt/src/compile/jitc_yk/trace_builder.rs b/ykrt/src/compile/jitc_yk/trace_builder.rs index 1820d01a7..1e0be4c77 100644 --- a/ykrt/src/compile/jitc_yk/trace_builder.rs +++ b/ykrt/src/compile/jitc_yk/trace_builder.rs @@ -660,6 +660,11 @@ impl TraceBuilder { if AOT_MOD.func(*callee).name() == "__yk_trace_basicblock" { return Ok(()); } + + if inst.is_control_point(self.aot_mod) { + return Ok(()); + } + // Convert AOT args to JIT args. let mut jit_args = Vec::new(); for arg in args { @@ -722,9 +727,6 @@ impl TraceBuilder { } _ => panic!(), } - if inst.is_control_point(self.aot_mod) { - return Ok(()); - } let jit_func_decl_idx = self.handle_func(*callee)?; let inst = jit_ir::DirectCallInst::new(&mut self.jit_mod, jit_func_decl_idx, jit_args)?.into(); diff --git a/ykrt/src/mt.rs b/ykrt/src/mt.rs index 5d8071e1e..a57b3a8f6 100644 --- a/ykrt/src/mt.rs +++ b/ykrt/src/mt.rs @@ -700,16 +700,22 @@ unsafe extern "C" fn exec_trace( // Reset RSP to the end of the control point frame (this includes the registers we pushed // just before the control point) "mov rsp, rsi", - "sub rsp, 8", // Return address of control point call - "sub rsp, 56", // Registers pushed in naked cp call - //// Restore callee-saved registers which were pushed to the stack in __ykrt_control_point. + "sub rsp, 8", // Return address of control point call + "sub rsp, 104", // Registers pushed in naked cp call (includes alignment) + // Restore registers which were pushed to the stack in [ykcapi::__ykrt_control_point]. "pop r15", "pop r14", "pop r13", "pop r12", + "pop r11", + "pop r10", + "pop r9", + "pop r8", "pop rsi", - "pop rdi", // Don't overwrite `rdi` for now until we remove the first two args. + "pop rdi", "pop rbx", + "pop rcx", + "pop rax", // Call the trace function. "jmp rdx", "ret", diff --git a/yksmp/src/lib.rs b/yksmp/src/lib.rs index 3ac8b61e2..024dd51ea 100644 --- a/yksmp/src/lib.rs +++ b/yksmp/src/lib.rs @@ -285,6 +285,7 @@ impl StackMapParser<'_> { fn read_liveouts(&mut self, num: u16) { for _ in 0..num { let _dwreg = self.read_u16(); + let _reserved = self.read_u8(); let _size = self.read_u8(); } }