From 995eea8d04f3f3680c643eee54b4278a6be21552 Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Wed, 13 Dec 2023 09:06:17 -0600
Subject: [PATCH] nak: Split fmul/ffma handling from fmulz/ffmaz

They're enough of a special case that things are going to get confusing
when we start adding bit sizes to fmul/ffma.  Let's make them a special
case so they can assert all their things.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26743>
---
 src/nouveau/compiler/nak/from_nir.rs | 44 ++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs
index c37f76f06d8..0da0f5a4b37 100644
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@@ -701,7 +701,7 @@ impl<'a> ShaderFromNir<'a> {
                 dst
             }
             nir_op_fexp2 => b.mufu(MuFuOp::Exp2, srcs[0]),
-            nir_op_ffma | nir_op_ffmaz => {
+            nir_op_ffma => {
                 let ftype = FloatType::from_bits(alu.def.bit_size().into());
                 assert!(alu.def.bit_size() == 32);
                 let dst = b.alloc_ssa(RegFile::GPR, 1);
@@ -712,8 +712,25 @@ impl<'a> ShaderFromNir<'a> {
                     rnd_mode: self.float_ctl[ftype].rnd_mode,
                     // The hardware doesn't like FTZ+DNZ and DNZ implies FTZ
                     // anyway so only set one of the two bits.
-                    ftz: alu.op != nir_op_ffmaz && self.float_ctl[ftype].ftz,
-                    dnz: alu.op == nir_op_ffmaz,
+                    ftz: self.float_ctl[ftype].ftz,
+                    dnz: false,
+                });
+                dst
+            }
+            nir_op_ffmaz => {
+                assert!(alu.def.bit_size() == 32);
+                // DNZ implies FTZ so we need FTZ set or this is invalid
+                assert!(self.float_ctl.fp32.ftz);
+                let dst = b.alloc_ssa(RegFile::GPR, 1);
+                b.push_op(OpFFma {
+                    dst: dst.into(),
+                    srcs: [srcs[0], srcs[1], srcs[2]],
+                    saturate: self.try_saturate_alu_dst(&alu.def),
+                    rnd_mode: self.float_ctl.fp32.rnd_mode,
+                    // The hardware doesn't like FTZ+DNZ and DNZ implies FTZ
+                    // anyway so only set one of the two bits.
+                    ftz: false,
+                    dnz: true,
                 });
                 dst
             }
@@ -732,7 +749,7 @@ impl<'a> ShaderFromNir<'a> {
                 });
                 dst
             }
-            nir_op_fmul | nir_op_fmulz => {
+            nir_op_fmul => {
                 let ftype = FloatType::from_bits(alu.def.bit_size().into());
                 assert!(alu.def.bit_size() == 32);
                 let dst = b.alloc_ssa(RegFile::GPR, 1);
@@ -741,10 +758,25 @@ impl<'a> ShaderFromNir<'a> {
                     srcs: [srcs[0], srcs[1]],
                     saturate: self.try_saturate_alu_dst(&alu.def),
                     rnd_mode: self.float_ctl[ftype].rnd_mode,
+                    ftz: self.float_ctl[ftype].ftz,
+                    dnz: false,
+                });
+                dst
+            }
+            nir_op_fmulz => {
+                assert!(alu.def.bit_size() == 32);
+                // DNZ implies FTZ so we need FTZ set or this is invalid
+                assert!(self.float_ctl.fp32.ftz);
+                let dst = b.alloc_ssa(RegFile::GPR, 1);
+                b.push_op(OpFMul {
+                    dst: dst.into(),
+                    srcs: [srcs[0], srcs[1]],
+                    saturate: self.try_saturate_alu_dst(&alu.def),
+                    rnd_mode: self.float_ctl.fp32.rnd_mode,
                     // The hardware doesn't like FTZ+DNZ and DNZ implies FTZ
                     // anyway so only set one of the two bits.
-                    ftz: alu.op != nir_op_fmulz && self.float_ctl[ftype].ftz,
-                    dnz: alu.op == nir_op_fmulz,
+                    ftz: false,
+                    dnz: true,
                 });
                 dst
             }