Skip to content

Commit

Permalink
work in progress
Browse files Browse the repository at this point in the history
  • Loading branch information
RobertObkircher committed Dec 4, 2024
1 parent 9b28df8 commit 34d3ded
Show file tree
Hide file tree
Showing 4 changed files with 281 additions and 3 deletions.
272 changes: 272 additions & 0 deletions src/sea_of_nodes/global_code_motion.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
use crate::datastructures::id_set::IdSet;
use crate::sea_of_nodes::nodes::index::{Load, Start, Stop};
use crate::sea_of_nodes::nodes::{Cfg, Node, Nodes};
use std::collections::HashSet;

/// Arrange that the existing isCFG() Nodes form a valid CFG. The
/// Node.use(0) is always a block tail (either IfNode or head of the
/// following block). There are no unreachable infinite loops.
pub fn build_cfg(stop: Stop, sea: &mut Nodes) {
fix_loops(stop, sea);
sched_early(sea);
sea.scheduled = true;
sched_late(sea.start, sea);
}

/// Backwards walk on the CFG only, looking for unreachable code - which has
/// to be an infinite loop. Insert a bogus never-taken exit to Stop, so the
/// loop becomes reachable. Also, set loop nesting depth
fn fix_loops(stop: Stop, sea: &mut Nodes) {
// Backwards walk from Stop, looking for unreachable code
let mut visit = IdSet::zeros(sea.len());
let mut unreach = HashSet::with_capacity(sea.len());

unreach.insert(sea.start.to_cfg(&sea.ops).unwrap());
for ret in stop.inputs(sea) {
ret.unwrap()
.to_return(sea)
.walk_unreach(&mut visit, &mut unreach);
}
if unreach.is_empty() {
return;
}

// Forwards walk from unreachable, looking for loops with no exit test.
visit.clear();
for &cfg in &unreach {
walk_infinite(cfg, &mut visit, stop, sea);
}
// Set loop depth on remaining graph
unreach.clear();
visit.clear();
for ret in stop.inputs(sea) {
ret.unwrap()
.to_return(sea)
.walk_unreach(&mut visit, &mut unreach);
}
assert!(unreach.is_empty());
}

/// Forwards walk over previously unreachable, looking for loops with no
/// exit test.
fn walk_infinite(n: Cfg, visit: &mut IdSet<Node>, stop: Stop, sea: &Nodes) {
let n = n.node();
if visit.get(n) {
return; // Been there, done that
}
visit.add(n);
if let Some(n) = n.to_loop(sea) {
n.force_exit(stop);
}
for use_ in sea.outputs[n] {
if use_ != Node::DUMMY {
if let Some(use_) = use_.to_cfg(&sea.ops) {
walk_infinite(use_, visit, stop, sea);
}
}
}
}

fn sched_early(sea: &mut Nodes) {
let mut rpo = vec![];
let mut visit = IdSet::zeros(sea.len());
_rpo_cfg(*sea.start, &mut visit, &mut rpo, sea);

// Reverse Post-Order on CFG
for cfg in rpo.into_iter().rev() {
cfg.loop_depth();
for &n in cfg.node().inputs(sea) {
_sched_early(n, &mut visit, sea);
}

// Strictly for dead infinite loops, we can have entire code blocks
// not reachable from below - so we reach down, from above, one
// step. Since _schedEarly modifies the output arrays, the normal
// region._outputs ArrayList iterator throws CME. The extra edges
// are always *added* after any Phis, so just walk the Phi prefix.
if let Some(region) = cfg.node().to_region(sea) {
let len = sea.outputs[region].len();
for i in 0..len {
if sea.outputs[region][i] != Node::DUMMY {
if let Some(phi) = sea.outputs[region][i].to_phi(sea) {
_sched_early(Some(*phi), &mut visit, sea);
}
}
}
}
}
}

/// Post-Order of CFG
fn _rpo_cfg(n: Node, visit: &mut IdSet<Node>, rpo: &mut Vec<Cfg>, sea: &Nodes) {
let Some(cfg) = n.to_cfg(&sea.ops) else {
return;
};
if visit.get(n) {
return; // Been there, done that
}
visit.add(n);
for &use_ in &sea.outputs[n] {
_rpo_cfg(use_, visit, rpo, sea);
}
rpo.push(cfg);
}

fn _sched_early(n: Option<Node>, visit: &mut IdSet<Node>, sea: &mut Nodes) {
let Some(n) = n else { return };
if visit.get(n) {
return; // Been there, done that
}
visit.add(n);

// Schedule not-pinned not-CFG inputs before self. Since skipping
// Pinned, this never walks the backedge of Phis (and thus spins around
// a data-only loop, eventually attempting relying on some pre-visited-
// not-post-visited data op with no scheduled control.
for &def in n.inputs(sea).iter().flatten() {
if !def.is_pinned(sea) {
_sched_early(Some(def), visit, sea);
}
}

// If not-pinned (e.g. constants, projections, phi) and not-CFG
if !n.is_pinned(sea) {
// Schedule at deepest input
let mut early = sea.start.to_cfg(&sea.ops).unwrap(); // Maximally early, lowest idepth
for i in 1..n.inputs(sea).len() {
let cfg0 = n.inputs(sea)[i].unwrap().inputs(sea)[0]
.unwrap()
.to_cfg(&sea.ops)
.unwrap();
if sea[cfg0].idepth > sea[early].idepth {
early = cfg0; // Latest/deepest input
}
}
n.set_def(0, Some(early.node()), sea); // First place this can go
}
}

fn sched_late(start: Start, sea: &mut Nodes) {
// CFGNode[] late = new CFGNode[Node.UID()];
// Node[] ns = new Node[Node.UID()];
// _schedLate(start,ns,late);
// for( int i=0; i<late.length; i++ )
// if( ns[i] != null )
// ns[i].setDef(0,late[i]);
}

/// Forwards post-order pass. Schedule all outputs first, then draw an
/// idom-tree line from the LCA of uses to the early schedule. Schedule is
/// legal anywhere on this line; pick the most control-dependent (largest
/// idepth) in the shallowest loop nest.
fn _sched_late(n: Node, ns: &[Node], late: &[Cfg], sea: &Nodes) {
// if( late[n._nid]!=null ) return; // Been there, done that
// // These I know the late schedule of, and need to set early for loops
// if( n instanceof CFGNode cfg ) late[n._nid] = cfg.blockHead() ? cfg : cfg.cfg(0);
// if( n instanceof PhiNode phi ) late[n._nid] = phi.region();
//
// // Walk Stores before Loads, so we can get the anti-deps right
// for( Node use : n._outputs )
// if( isForwardsEdge(use,n) &&
// use._type instanceof TypeMem )
// _schedLate(use,ns,late);
// // Walk everybody now
// for( Node use : n._outputs )
// if( isForwardsEdge(use,n) )
// _schedLate(use,ns,late);
// // Already implicitly scheduled
// if( n.isPinned() ) return;
// // Need to schedule n
//
// // Walk uses, gathering the LCA (Least Common Ancestor) of uses
// CFGNode early = (CFGNode)n.in(0);
// assert early != null;
// CFGNode lca = null;
// for( Node use : n._outputs )
// lca = use_block(n,use, late).idom(lca);
//
// // Loads may need anti-dependencies, raising their LCA
// if( n instanceof LoadNode load )
// lca = find_anti_dep(lca,load,early,late);
//
// // Walk up from the LCA to the early, looking for best place. This is the
// // lowest execution frequency, approximated by least loop depth and
// // deepest control flow.
// CFGNode best = lca;
// lca = lca.idom(); // Already found best for starting LCA
// for( ; lca != early.idom(); lca = lca.idom() )
// if( better(lca,best) )
// best = lca;
// assert !(best instanceof IfNode);
// ns [n._nid] = n;
// late[n._nid] = best;
// }
}

/// Block of use. Normally from late[] schedule, except for Phis, which go
/// to the matching Region input.
fn use_block(n: Node, use_: Node, late: &[Cfg], sea: &Nodes) -> Cfg {
// private static CFGNode use_block(Node n, Node use, CFGNode[] late) {
// if( !(use instanceof PhiNode phi) )
// return late[use._nid];
// CFGNode found=null;
// for( int i=1; i<phi.nIns(); i++ )
// if( phi.in(i)==n )
// if( found==null ) found = phi.region().cfg(i);
// else Utils.TODO(); // Can be more than once
// assert found!=null;
// return found;
// }
}

/// Least loop depth first, then largest idepth
fn better(lca: Cfg, best: Cfg, sea: &Nodes) -> bool {
sea[lca].loop_depth < sea[best].loop_depth
|| (sea[lca].idepth > sea[best].idepth || best.node().to_if(sea).is_some())
}

/// Skip iteration if a backedge
fn is_forwards_edge(use_: Node, def: Node, sea: &Nodes) {
// return use != null && def != null &&
// !(use.nIns()>2 && use.in(2)==def && (use instanceof LoopNode || (use instanceof PhiNode phi && phi.region() instanceof LoopNode)));
}

fn find_anti_dep(lca: Cfg, load: Load, early: Cfg, late: &[Cfg]) -> Cfg {
// // We could skip final-field loads here.
// // Walk LCA->early, flagging Load's block location choices
// for( CFGNode cfg=lca; early!=null && cfg!=early.idom(); cfg = cfg.idom() )
// cfg._anti = load._nid;
// // Walk load->mem uses, looking for Stores causing an anti-dep
// for( Node mem : load.mem()._outputs ) {
// switch( mem ) {
// case StoreNode st:
// lca = anti_dep(load,late[st._nid],st.cfg0(),lca,st);
// break;
// case PhiNode phi:
// // Repeat anti-dep for matching Phi inputs.
// // No anti-dep edges but may raise the LCA.
// for( int i=1; i<phi.nIns(); i++ )
// if( phi.in(i)==load.mem() )
// lca = anti_dep(load,phi.region().cfg(i),load.mem().cfg0(),lca,null);
// break;
// case LoadNode ld: break; // Loads do not cause anti-deps on other loads
// case ReturnNode ret: break; // Load must already be ahead of Return
// default: throw Utils.TODO();
// }
// }
// return lca;
}

fn anti_dep(load: Load, stblk: Cfg, defblk: Cfg, lca: Cfg, st: Node, sea: &Nodes) {
// // Walk store blocks "reach" from its scheduled location to its earliest
// for( ; stblk != defblk.idom(); stblk = stblk.idom() ) {
// // Store and Load overlap, need anti-dependence
// if( stblk._anti==load._nid ) {
// lca = stblk.idom(lca); // Raise Loads LCA
// if( lca == stblk && st != null && Utils.find(st._inputs,load) == -1 ) // And if something moved,
// st.addDef(load); // Add anti-dep as well
// return lca; // Cap this stores' anti-dep to here
// }
// }
// return lca;
}
1 change: 1 addition & 0 deletions src/sea_of_nodes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ pub mod parser;
#[cfg(test)]
mod tests;
pub mod types;
mod global_code_motion;
5 changes: 5 additions & 0 deletions src/sea_of_nodes/nodes.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::collections::HashMap;
use std::num::NonZeroU32;

pub use cfg::Cfg;
pub use id::Node;
pub use index::Op;
pub use node::{BoolOp, LoadOp, ProjOp, StartOp, StoreOp};
Expand Down Expand Up @@ -102,6 +103,9 @@ pub struct Nodes<'t> {
/// probed to see if it can be inserted. His edges are "locked", because
/// hacking his edges will change his hash.
hash: IdVec<Node, Option<NonZeroU32>>,

/// True if debug printer can use schedule info
pub scheduled: bool,
}

pub type NodeCreation<'t> = (Op<'t>, Vec<Option<Node>>);
Expand All @@ -128,6 +132,7 @@ impl<'t> Nodes<'t> {
walk_visited: IdSet::zeros(0),
gvn: HashMap::new(),
hash: IdVec::new(vec![None]),
scheduled: false,
}
}
pub fn len(&self) -> usize {
Expand Down
6 changes: 3 additions & 3 deletions src/sea_of_nodes/nodes/cfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ pub struct CfgData {
/// change the CFG incrementally.
///
/// See <a href="https://en.wikipedia.org/wiki/Dominator_(graph_theory)">Wikipedia: Dominator</a>
idepth: u32,
pub idepth: u32,
/// loop_depth is computed after optimization as part of scheduling.
loop_depth: u32,
anti: Option<Load>,
pub loop_depth: u32,
pub anti: Option<Load>,
}
impl CfgData {
pub fn new() -> Self {
Expand Down

0 comments on commit 34d3ded

Please sign in to comment.