From 995eea8d04f3f3680c643eee54b4278a6be21552 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 13 Dec 2023 09:06:17 -0600 Subject: [PATCH] nak: Split fmul/ffma handling from fmulz/ffmaz They're enough of a special case that things are going to get confusing when we start adding bit sizes to fmul/ffma. Let's make them a special case so they can assert all their things. Part-of: --- src/nouveau/compiler/nak/from_nir.rs | 44 ++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index c37f76f06d8..0da0f5a4b37 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -701,7 +701,7 @@ impl<'a> ShaderFromNir<'a> { dst } nir_op_fexp2 => b.mufu(MuFuOp::Exp2, srcs[0]), - nir_op_ffma | nir_op_ffmaz => { + nir_op_ffma => { let ftype = FloatType::from_bits(alu.def.bit_size().into()); assert!(alu.def.bit_size() == 32); let dst = b.alloc_ssa(RegFile::GPR, 1); @@ -712,8 +712,25 @@ impl<'a> ShaderFromNir<'a> { rnd_mode: self.float_ctl[ftype].rnd_mode, // The hardware doesn't like FTZ+DNZ and DNZ implies FTZ // anyway so only set one of the two bits. - ftz: alu.op != nir_op_ffmaz && self.float_ctl[ftype].ftz, - dnz: alu.op == nir_op_ffmaz, + ftz: self.float_ctl[ftype].ftz, + dnz: false, + }); + dst + } + nir_op_ffmaz => { + assert!(alu.def.bit_size() == 32); + // DNZ implies FTZ so we need FTZ set or this is invalid + assert!(self.float_ctl.fp32.ftz); + let dst = b.alloc_ssa(RegFile::GPR, 1); + b.push_op(OpFFma { + dst: dst.into(), + srcs: [srcs[0], srcs[1], srcs[2]], + saturate: self.try_saturate_alu_dst(&alu.def), + rnd_mode: self.float_ctl.fp32.rnd_mode, + // The hardware doesn't like FTZ+DNZ and DNZ implies FTZ + // anyway so only set one of the two bits. + ftz: false, + dnz: true, }); dst } @@ -732,7 +749,7 @@ impl<'a> ShaderFromNir<'a> { }); dst } - nir_op_fmul | nir_op_fmulz => { + nir_op_fmul => { let ftype = FloatType::from_bits(alu.def.bit_size().into()); assert!(alu.def.bit_size() == 32); let dst = b.alloc_ssa(RegFile::GPR, 1); @@ -741,10 +758,25 @@ impl<'a> ShaderFromNir<'a> { srcs: [srcs[0], srcs[1]], saturate: self.try_saturate_alu_dst(&alu.def), rnd_mode: self.float_ctl[ftype].rnd_mode, + ftz: self.float_ctl[ftype].ftz, + dnz: false, + }); + dst + } + nir_op_fmulz => { + assert!(alu.def.bit_size() == 32); + // DNZ implies FTZ so we need FTZ set or this is invalid + assert!(self.float_ctl.fp32.ftz); + let dst = b.alloc_ssa(RegFile::GPR, 1); + b.push_op(OpFMul { + dst: dst.into(), + srcs: [srcs[0], srcs[1]], + saturate: self.try_saturate_alu_dst(&alu.def), + rnd_mode: self.float_ctl.fp32.rnd_mode, // The hardware doesn't like FTZ+DNZ and DNZ implies FTZ // anyway so only set one of the two bits. - ftz: alu.op != nir_op_fmulz && self.float_ctl[ftype].ftz, - dnz: alu.op == nir_op_fmulz, + ftz: false, + dnz: true, }); dst }