nak: Add a jump threading pass
This saves 16 instructions on the compute shader in Sascha Willems' computecloth example. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26473>
This commit is contained in:
parent
786bf749bc
commit
b2420fae4b
5 changed files with 145 additions and 2 deletions
|
|
@ -275,6 +275,7 @@ pub extern "C" fn nak_compile_shader(
|
|||
s.lower_ineg();
|
||||
s.lower_par_copies();
|
||||
s.lower_copy_swap();
|
||||
s.opt_jump_thread();
|
||||
s.calc_instr_deps();
|
||||
|
||||
if DEBUG.print() {
|
||||
|
|
|
|||
|
|
@ -328,6 +328,11 @@ impl<N> CFG<N> {
|
|||
pub fn pred_indices(&self, idx: usize) -> &[usize] {
|
||||
&self.nodes[idx].pred[..]
|
||||
}
|
||||
|
||||
pub fn drain<'a>(&'a mut self) -> impl Iterator<Item = N> + 'a {
|
||||
self.has_loop = false;
|
||||
self.nodes.drain(..).map(|n| n.node)
|
||||
}
|
||||
}
|
||||
|
||||
impl<N> Index<usize> for CFG<N> {
|
||||
|
|
|
|||
|
|
@ -4024,7 +4024,7 @@ impl DisplayOp for OpBSync {
|
|||
impl_display_for_op!(OpBSync);
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
#[derive(Clone, SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpBra {
|
||||
pub target: Label,
|
||||
}
|
||||
|
|
@ -4037,7 +4037,7 @@ impl DisplayOp for OpBra {
|
|||
impl_display_for_op!(OpBra);
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
#[derive(Clone, SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpExit {}
|
||||
|
||||
impl DisplayOp for OpExit {
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ mod nir;
|
|||
mod opt_bar_prop;
|
||||
mod opt_copy_prop;
|
||||
mod opt_dce;
|
||||
mod opt_jump_thread;
|
||||
mod opt_lop;
|
||||
mod opt_out;
|
||||
mod repair_ssa;
|
||||
|
|
|
|||
136
src/nouveau/compiler/nak/opt_jump_thread.rs
Normal file
136
src/nouveau/compiler/nak/opt_jump_thread.rs
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
// Copyright © 2023 Mel Henning
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
use crate::cfg::CFGBuilder;
|
||||
use crate::ir::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn clone_branch(op: &Op) -> Op {
|
||||
match op {
|
||||
Op::Bra(b) => Op::Bra(b.clone()),
|
||||
Op::Exit(e) => Op::Exit(e.clone()),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn jump_thread(func: &mut Function) -> bool {
|
||||
// Let's call a basic block "trivial" if its only instruction is an
|
||||
// unconditional branch. If a block is trivial, we can update all of its
|
||||
// predecessors to jump to its sucessor.
|
||||
//
|
||||
// A single reverse pass over the basic blocks is enough to update all of
|
||||
// the edges we're interested in. Roughly, if we assume that all loops in
|
||||
// the shader can terminate, then loop heads are never trivial and we
|
||||
// never replace a backward edge. Therefore, in each step we only need to
|
||||
// make sure that later control flow has been replaced in order to update
|
||||
// the current block as much as possible.
|
||||
//
|
||||
// We additionally try to update a branch-to-empty-block to point to the
|
||||
// block's successor, which along with block dce/reordering can sometimes
|
||||
// enable a later optimization that converts branches to fallthrough.
|
||||
let mut progress = false;
|
||||
|
||||
// A branch to label can be replaced with Op
|
||||
let mut replacements: HashMap<Label, Op> = HashMap::new();
|
||||
|
||||
// Invariant 1: At the end of each loop iteration,
|
||||
// every trivial block with an index in [i, blocks.len())
|
||||
// is represented in replacements.keys()
|
||||
// Invariant 2: replacements.values() never contains
|
||||
// a branch to a trivial block
|
||||
for i in (0..func.blocks.len()).rev() {
|
||||
// Replace the branch if possible
|
||||
if let Some(instr) = func.blocks[i].instrs.last_mut() {
|
||||
if let Op::Bra(OpBra { target }) = instr.op {
|
||||
if let Some(replacement) = replacements.get(&target) {
|
||||
instr.op = clone_branch(replacement);
|
||||
progress = true;
|
||||
}
|
||||
// If the branch target was previously a trivial block then the
|
||||
// branch was previously a forward edge (see above) and by
|
||||
// invariants 1 and 2 we just updated the branch to target
|
||||
// a nontrivial block
|
||||
}
|
||||
}
|
||||
|
||||
// Is this block trivial?
|
||||
let block_label = func.blocks[i].label;
|
||||
match &func.blocks[i].instrs[..] {
|
||||
[instr] => {
|
||||
if instr.is_branch() && instr.pred.is_true() {
|
||||
// Upholds invariant 2 because we updated the branch above
|
||||
replacements.insert(block_label, clone_branch(&instr.op));
|
||||
}
|
||||
}
|
||||
[] => {
|
||||
// Empty block - falls through
|
||||
// Our successor might be trivial, so we need to
|
||||
// apply the rewrite map to uphold invariant 2
|
||||
let target_label = func.blocks[i + 1].label;
|
||||
let replacement = replacements
|
||||
.get(&target_label)
|
||||
.map(clone_branch)
|
||||
.unwrap_or_else(|| {
|
||||
Op::Bra(OpBra {
|
||||
target: target_label,
|
||||
})
|
||||
});
|
||||
replacements.insert(block_label, replacement);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
if progress {
|
||||
// We don't update the CFG above, so rewrite it if we made progress
|
||||
rewrite_cfg(func);
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
fn rewrite_cfg(func: &mut Function) {
|
||||
// CFGBuilder takes care of removing dead blocks for us
|
||||
// We use the basic block's label to identify it
|
||||
let mut builder = CFGBuilder::new();
|
||||
|
||||
for i in 0..func.blocks.len() {
|
||||
let block = &func.blocks[i];
|
||||
// Note: fall-though must be first edge
|
||||
if block.falls_through() {
|
||||
let next_block = &func.blocks[i + 1];
|
||||
builder.add_edge(block.label, next_block.label);
|
||||
}
|
||||
if let Some(control_flow) = block.branch() {
|
||||
match &control_flow.op {
|
||||
Op::Bra(bra) => {
|
||||
builder.add_edge(block.label, bra.target);
|
||||
}
|
||||
Op::Exit(_) => (),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
for block in func.blocks.drain() {
|
||||
builder.add_node(block.label, block);
|
||||
}
|
||||
let _ = std::mem::replace(&mut func.blocks, builder.as_cfg());
|
||||
}
|
||||
|
||||
impl Function {
|
||||
pub fn opt_jump_thread(&mut self) {
|
||||
jump_thread(self);
|
||||
}
|
||||
}
|
||||
|
||||
impl Shader {
|
||||
/// A simple jump threading pass
|
||||
///
|
||||
/// Note that this can introduce critical edges, so it cannot be run before RA
|
||||
pub fn opt_jump_thread(&mut self) {
|
||||
for f in &mut self.functions {
|
||||
f.opt_jump_thread();
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue