Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

O1 fixes. #1400

Merged
merged 1 commit into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion bin/yk-config
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ set_aot_pipeline() {
PRELINK_PASSES_AO0=""
POSTLINK_PASSES_AO0="instcombine<max-iterations=1;no-use-loop-info;no-verify-fixpoint>"
PRELINK_PASSES_AO1=""
POSTLINK_PASSES_AO1="instcombine<max-iterations=1;no-use-loop-info;no-verify-fixpoint>,mem2reg,instcombine<max-iterations=1;no-use-loop-info;no-verify-fixpoint>"
# The whole -O1 pipeline, as reported by `opt -O1 -print-pipeline-passes
# /dev/null`.
POSTLINK_PASSES_AO1="annotation2metadata,forceattrs,inferattrs,coro-early,function<eager-inv>(lower-expect,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,sroa<modify-cfg>,early-cse<>),openmp-opt,ipsccp,called-value-propagation,globalopt,function<eager-inv>(mem2reg,instcombine<max-iterations=1;no-use-loop-info;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>),require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline<only-mandatory>,inline,function-attrs<skip-non-recursive-function-attrs>,function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,instcombine<max-iterations=1;no-use-loop-info;no-verify-fixpoint>,libcalls-shrinkwrap,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,reassociate,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;no-prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<no-nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,instcombine<max-iterations=1;no-use-loop-info;no-verify-fixpoint>,loop(loop-idiom,indvars,loop-deletion,loop-unroll-full),sroa<modify-cfg>,memcpyopt,sccp,bdce,instcombine<max-iterations=1;no-use-loop-info;no-verify-fixpoint>,coro-elide,adce,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,instcombine<max-iterations=1;no-use-loop-info;no-verify-fixpoint>),function-attrs,function(require<should-not-run-function-passes>),coro-split)),deadargelim,coro-cleanup,globalopt,globaldce,elim-avail-extern,rpo-function-attrs,recompute-globalsaa,function<eager-inv>(float2int,lower-constant-intrinsics,loop(loop-rotate<header-duplication;no-prepare-for-lto>,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize<no-interleave-forced-only;vectorize-forced-only;>,infer-alignment,loop-load-elim,instcombine<max-iterations=1;no-use-loop-info;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,vector-combine,instcombine<max-iterations=1;no-use-loop-info;no-verify-fixpoint>,loop-unroll<O1>,transform-warning,sroa<preserve-cfg>,infer-alignment,instcombine<max-iterations=1;no-use-loop-info;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>),globaldce,constmerge,cg-profile,rel-lookup-table-converter,function(annotation-remarks),verify"
ltratt marked this conversation as resolved.
Show resolved Hide resolved
# Initialise the AOT pipeline to level 0 unless YKB_AOT_OPTLEVEL is set.
set_aot_pipeline 0

Expand Down Expand Up @@ -159,6 +161,8 @@ handle_arg() {
OUTPUT="${OUTPUT} -Wl,--mllvm=--yk-shadow-stack"
# Encode additional locations in stackmaps.
OUTPUT="${OUTPUT} -Wl,--mllvm=--yk-stackmap-add-locs"
# Don't optimise functions by changing their calling convention.
OUTPUT="${OUTPUT} -Wl,--mllvm=--yk-dont-opt-func-abi"
# Use software-tracer pass
if [ "${YKB_TRACER}" = "swt" ]; then
OUTPUT="${OUTPUT} -Wl,--mllvm=--yk-basicblock-tracer"
Expand Down
2 changes: 1 addition & 1 deletion tests/c/aot_debuginfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
// ...
// # aot_debuginfo.c:{{_}}: yk_mt_control_point(mt, &loc);
// ...
// call __ykrt_control_point(%{{_}}, %{{_}}, 1i64) [safepoint:...
// call llvm.experimental.patchpoint.void(0i64, 13i32, __ykrt_control_point...
// ...
// # aot_debuginfo.c:{{_}}: i--;
// ...
Expand Down
89 changes: 0 additions & 89 deletions tests/c/lua_array_len.c

This file was deleted.

1 change: 0 additions & 1 deletion tests/c/phi3.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// ## yk-config-env: YKB_AOT_OPTLEVEL=1
// Run-time:
// env-var: YKD_LOG_IR=-:aot,jit-pre-opt
// env-var: YK_LOG=4
Expand Down
22 changes: 21 additions & 1 deletion ykcapi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,25 @@ pub extern "C" fn __ykrt_control_point(
// callee-saved registers for us (so we don't have to do it here).
unsafe {
std::arch::asm!(
// Push callee-saved registers to the stack as these may contain trace inputs (live
// Push all registers to the stack as these may contain trace inputs (live
// variables) referenced by the control point's stackmap.
//
// We don't need to push and restore `rdx` since `smid` can never be a live value and
// thus won't be tracked by the stackmap.
//
// FIXME: In the future we want the control point to return naturally into the compiled
// trace (at the moment we just rip out the control point's stack), which means we then
// no longer need to recover callee-saved registers as the control point will do this
// for us.
"push rax",
"push rcx",
"push rbx",
"push rdi",
"push rsi",
"push r8",
"push r9",
"push r10",
"push r11",
"push r12",
"push r13",
"push r14",
Expand All @@ -83,9 +97,15 @@ pub extern "C" fn __ykrt_control_point(
"pop r14",
"pop r13",
"pop r12",
"pop r11",
"pop r10",
"pop r9",
"pop r8",
"pop rsi",
"pop rdi",
"pop rbx",
"pop rcx",
"pop rax",
"ret",
options(noreturn)
);
Expand Down
9 changes: 6 additions & 3 deletions ykrt/src/compile/jitc_yk/aot_ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ const MAGIC: u32 = 0xedd5f00d;
/// The version of the bytecode format.
const FORMAT_VERSION: u32 = 0;

/// The symbol name of the control point function (after ykllvm has transformed it).
const CONTROL_POINT_NAME: &str = "__ykrt_control_point";
const LLVM_DEBUG_CALL_NAME: &str = "llvm.dbg.value";

/// A struct identifying a line in a source file.
Expand Down Expand Up @@ -1059,8 +1057,13 @@ impl Inst {

/// Returns whether `self` is a call to the control point.
pub(crate) fn is_control_point(&self, m: &Module) -> bool {
// FIXME: We don't really expect any other patchpoints here, but it would be nice to check
// the third argument is actually the control point. This would require the ability to
// reconstitute a string from constant bytes though.
match self {
Self::Call { callee, .. } => m.func(*callee).name == CONTROL_POINT_NAME,
Self::Call { callee, .. } => {
m.func(*callee).name == "llvm.experimental.patchpoint.void"
}
_ => false,
}
}
Expand Down
20 changes: 20 additions & 0 deletions ykrt/src/compile/jitc_yk/codegen/x64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,17 @@ impl<'a> Assemble<'a> {

fn cg_loadtraceinput(&mut self, iidx: jit_ir::InstIdx, inst: &jit_ir::LoadTraceInputInst) {
let m = match &self.m.tilocs()[usize::try_from(inst.locidx()).unwrap()] {
yksmp::Location::Register(0, ..) => {
VarLocation::Register(reg_alloc::Register::GP(Rq::RAX))
}
yksmp::Location::Register(1, ..) => {
// The third argument to the control point is the stackmap ID and is passed by RDX.
// We don't anticipate this ever being a live variable.
unreachable!()
}
yksmp::Location::Register(2, ..) => {
VarLocation::Register(reg_alloc::Register::GP(Rq::RCX))
}
yksmp::Location::Register(3, ..) => {
VarLocation::Register(reg_alloc::Register::GP(Rq::RBX))
}
Expand All @@ -704,6 +715,15 @@ impl<'a> Assemble<'a> {
yksmp::Location::Register(5, ..) => {
VarLocation::Register(reg_alloc::Register::GP(Rq::RDI))
}
yksmp::Location::Register(8, ..) => {
VarLocation::Register(reg_alloc::Register::GP(Rq::R8))
}
yksmp::Location::Register(9, ..) => {
VarLocation::Register(reg_alloc::Register::GP(Rq::R9))
}
yksmp::Location::Register(10, ..) => {
VarLocation::Register(reg_alloc::Register::GP(Rq::R10))
}
yksmp::Location::Register(12, ..) => {
VarLocation::Register(reg_alloc::Register::GP(Rq::R12))
}
Expand Down
8 changes: 5 additions & 3 deletions ykrt/src/compile/jitc_yk/trace_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,11 @@ impl TraceBuilder {
if AOT_MOD.func(*callee).name() == "__yk_trace_basicblock" {
return Ok(());
}

if inst.is_control_point(self.aot_mod) {
return Ok(());
}

// Convert AOT args to JIT args.
let mut jit_args = Vec::new();
for arg in args {
Expand Down Expand Up @@ -722,9 +727,6 @@ impl TraceBuilder {
}
_ => panic!(),
}
if inst.is_control_point(self.aot_mod) {
return Ok(());
}
let jit_func_decl_idx = self.handle_func(*callee)?;
let inst =
jit_ir::DirectCallInst::new(&mut self.jit_mod, jit_func_decl_idx, jit_args)?.into();
Expand Down
14 changes: 10 additions & 4 deletions ykrt/src/mt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -700,16 +700,22 @@ unsafe extern "C" fn exec_trace(
// Reset RSP to the end of the control point frame (this includes the registers we pushed
// just before the control point)
"mov rsp, rsi",
"sub rsp, 8", // Return address of control point call
"sub rsp, 56", // Registers pushed in naked cp call
//// Restore callee-saved registers which were pushed to the stack in __ykrt_control_point.
"sub rsp, 8", // Return address of control point call
"sub rsp, 104", // Registers pushed in naked cp call (includes alignment)
// Restore registers which were pushed to the stack in [ykcapi::__ykrt_control_point].
"pop r15",
"pop r14",
"pop r13",
"pop r12",
"pop r11",
"pop r10",
"pop r9",
"pop r8",
"pop rsi",
"pop rdi", // Don't overwrite `rdi` for now until we remove the first two args.
"pop rdi",
"pop rbx",
"pop rcx",
"pop rax",
// Call the trace function.
"jmp rdx",
"ret",
Expand Down
1 change: 1 addition & 0 deletions yksmp/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ impl StackMapParser<'_> {
fn read_liveouts(&mut self, num: u16) {
for _ in 0..num {
let _dwreg = self.read_u16();
let _reserved = self.read_u8();
let _size = self.read_u8();
}
}
Expand Down