From 3b6e6e985ee5d21d1344f19c65c7c23623efe52d Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 11 Dec 2023 13:28:30 -0600 Subject: [PATCH] nak: Copy propagate and constant fold OpPrmt This comes up fairly frequently in int64 and fp64 use-cases. In particular, we see a lot of `prmt rZ 0x4444 rZ` which is just zero. Part-of: --- src/nouveau/compiler/nak/ir.rs | 13 ++++++++ src/nouveau/compiler/nak/opt_copy_prop.rs | 36 +++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 34a698bb2e2..24136a31906 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -1122,6 +1122,19 @@ impl Src { } } + pub fn as_u32(&self) -> Option { + if self.src_mod.is_none() { + match self.src_ref { + SrcRef::Zero => Some(0), + SrcRef::Imm32(u) => Some(u), + SrcRef::CBuf(_) | SrcRef::SSA(_) | SrcRef::Reg(_) => None, + _ => panic!("Invalid integer source"), + } + } else { + None + } + } + pub fn as_imm_not_i20(&self) -> Option { match self.src_ref { SrcRef::Imm32(i) => { diff --git a/src/nouveau/compiler/nak/opt_copy_prop.rs b/src/nouveau/compiler/nak/opt_copy_prop.rs index 624e3da0dcd..2e1313ff420 100644 --- a/src/nouveau/compiler/nak/opt_copy_prop.rs +++ b/src/nouveau/compiler/nak/opt_copy_prop.rs @@ -384,6 +384,42 @@ impl CopyPropPass { assert!(dst.comps() == 1); self.add_copy(dst[0], SrcType::I32, neg.src.ineg()); } + Op::Prmt(prmt) => { + let dst = prmt.dst.as_ssa().unwrap(); + assert!(dst.comps() == 1); + if prmt.mode != PrmtMode::Index { + return; + } + let SrcRef::Imm32(sel) = prmt.sel.src_ref else { + return; + }; + + if sel == 0x3210 { + self.add_copy(dst[0], SrcType::GPR, prmt.srcs[0]); + } else if sel == 0x7654 { + self.add_copy(dst[0], SrcType::GPR, prmt.srcs[1]); + } else { + let mut is_imm = true; + let mut imm = 0_u32; + for d in 0..4 { + let s = ((sel >> d * 4) & 0x7) as usize; + let sign = (sel >> d * 4) & 0x8 != 0; + if let Some(u) = prmt.srcs[s / 4].as_u32() { + let mut sb = (u >> (s * 8)) as u8; + if sign { + sb = ((sb as i8) >> 7) as u8; + } + imm |= (sb as u32) << (d * 8); + } else { + is_imm = false; + break; + } + } + if is_imm { + self.add_copy(dst[0], SrcType::GPR, imm.into()); + } + } + } Op::Copy(copy) => { let dst = copy.dst.as_ssa().unwrap(); assert!(dst.comps() == 1);