From 12acb2ef62b82b8cc86e8319c9fbd51333e8a3b0 Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Mon, 4 Dec 2023 12:25:43 -0600
Subject: [PATCH] nak: Natively implement 64-bit shifts

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26246>
---
 src/nouveau/compiler/nak.rs          |   3 +-
 src/nouveau/compiler/nak_from_nir.rs | 180 +++++++++++++++++++++------
 2 files changed, 143 insertions(+), 40 deletions(-)

diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs
index 8580b7457e4..9c1efae875f 100644
--- a/src/nouveau/compiler/nak.rs
+++ b/src/nouveau/compiler/nak.rs
@@ -128,7 +128,8 @@ fn nir_options(_dev: &nv_device_info) -> nir_shader_compiler_options {
     op.lower_usub_sat = true; // TODO
     op.lower_iadd_sat = true; // TODO
     op.use_interpolated_input_intrinsics = true;
-    op.lower_int64_options = !(nir_lower_iadd64 | nir_lower_ineg64);
+    op.lower_int64_options =
+        !(nir_lower_iadd64 | nir_lower_ineg64 | nir_lower_shift64);
     op.lower_ldexp = true;
     op.lower_fmod = true;
     op.lower_ffract = true;
diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs
index 6bbc7d03ce0..37cf9f23213 100644
--- a/src/nouveau/compiler/nak_from_nir.rs
+++ b/src/nouveau/compiler/nak_from_nir.rs
@@ -919,34 +919,102 @@ impl<'a> ShaderFromNir<'a> {
                 b.lop2(LogicOp::new_lut(&|x, y, _| x | y), srcs[0], srcs[1])
             }
             nir_op_ishl => {
-                assert!(alu.def.bit_size() == 32);
-                let dst = b.alloc_ssa(RegFile::GPR, 1);
-                b.push_op(OpShf {
-                    dst: dst.into(),
-                    low: srcs[0],
-                    high: 0.into(),
-                    shift: srcs[1],
-                    right: false,
-                    wrap: true,
-                    data_type: IntType::I32,
-                    dst_high: false,
-                });
-                dst
+                let x = *srcs[0].as_ssa().unwrap();
+                let shift = srcs[1];
+                if alu.def.bit_size() == 64 {
+                    // For 64-bit shifts, we have to use clamp mode so we need
+                    // to mask the shift in order satisfy NIR semantics.
+                    let shift = b.lop2(
+                        LogicOp::new_lut(&|x, y, _| x & y),
+                        shift,
+                        0x3f.into(),
+                    );
+                    let dst = b.alloc_ssa(RegFile::GPR, 2);
+                    b.push_op(OpShf {
+                        dst: dst[0].into(),
+                        low: 0.into(),
+                        high: x[0].into(),
+                        shift: shift.into(),
+                        right: false,
+                        wrap: false,
+                        data_type: IntType::U32,
+                        dst_high: true,
+                    });
+                    b.push_op(OpShf {
+                        dst: dst[1].into(),
+                        low: x[0].into(),
+                        high: x[1].into(),
+                        shift: shift.into(),
+                        right: false,
+                        wrap: false,
+                        data_type: IntType::U64,
+                        dst_high: true,
+                    });
+                    dst
+                } else {
+                    assert!(alu.def.bit_size() == 32);
+                    let dst = b.alloc_ssa(RegFile::GPR, 1);
+                    b.push_op(OpShf {
+                        dst: dst.into(),
+                        low: x.into(),
+                        high: 0.into(),
+                        shift: shift,
+                        right: false,
+                        wrap: true,
+                        data_type: IntType::U32,
+                        dst_high: false,
+                    });
+                    dst
+                }
             }
             nir_op_ishr => {
-                assert!(alu.def.bit_size() == 32);
-                let dst = b.alloc_ssa(RegFile::GPR, 1);
-                b.push_op(OpShf {
-                    dst: dst.into(),
-                    low: 0.into(),
-                    high: srcs[0],
-                    shift: srcs[1],
-                    right: true,
-                    wrap: true,
-                    data_type: IntType::I32,
-                    dst_high: true,
-                });
-                dst
+                let x = *srcs[0].as_ssa().unwrap();
+                let shift = srcs[1];
+                if alu.def.bit_size() == 64 {
+                    // For 64-bit shifts, we have to use clamp mode so we need
+                    // to mask the shift in order satisfy NIR semantics.
+                    let shift = b.lop2(
+                        LogicOp::new_lut(&|x, y, _| x & y),
+                        shift,
+                        0x3f.into(),
+                    );
+                    let dst = b.alloc_ssa(RegFile::GPR, 2);
+                    b.push_op(OpShf {
+                        dst: dst[0].into(),
+                        low: x[0].into(),
+                        high: x[1].into(),
+                        shift: shift.into(),
+                        right: true,
+                        wrap: false,
+                        data_type: IntType::I64,
+                        dst_high: false,
+                    });
+                    b.push_op(OpShf {
+                        dst: dst[1].into(),
+                        low: x[0].into(),
+                        high: x[1].into(),
+                        shift: shift.into(),
+                        right: true,
+                        wrap: false,
+                        data_type: IntType::I32,
+                        dst_high: true,
+                    });
+                    dst
+                } else {
+                    assert!(alu.def.bit_size() == 32);
+                    let dst = b.alloc_ssa(RegFile::GPR, 1);
+                    b.push_op(OpShf {
+                        dst: dst.into(),
+                        low: 0.into(),
+                        high: x.into(),
+                        shift: shift,
+                        right: true,
+                        wrap: true,
+                        data_type: IntType::I32,
+                        dst_high: true,
+                    });
+                    dst
+                }
             }
             nir_op_isign => {
                 let gt_pred = b.alloc_ssa(RegFile::Pred, 1);
@@ -1104,19 +1172,53 @@ impl<'a> ShaderFromNir<'a> {
                 dst
             }
             nir_op_ushr => {
-                assert!(alu.def.bit_size() == 32);
-                let dst = b.alloc_ssa(RegFile::GPR, 1);
-                b.push_op(OpShf {
-                    dst: dst.into(),
-                    low: srcs[0],
-                    high: 0.into(),
-                    shift: srcs[1],
-                    right: true,
-                    wrap: true,
-                    data_type: IntType::U32,
-                    dst_high: false,
-                });
-                dst
+                let x = *srcs[0].as_ssa().unwrap();
+                let shift = srcs[1];
+                if alu.def.bit_size() == 64 {
+                    // For 64-bit shifts, we have to use clamp mode so we need
+                    // to mask the shift in order satisfy NIR semantics.
+                    let shift = b.lop2(
+                        LogicOp::new_lut(&|x, y, _| x & y),
+                        shift,
+                        0x3f.into(),
+                    );
+                    let dst = b.alloc_ssa(RegFile::GPR, 2);
+                    b.push_op(OpShf {
+                        dst: dst[0].into(),
+                        low: x[0].into(),
+                        high: x[1].into(),
+                        shift: shift.into(),
+                        right: true,
+                        wrap: false,
+                        data_type: IntType::U64,
+                        dst_high: false,
+                    });
+                    b.push_op(OpShf {
+                        dst: dst[1].into(),
+                        low: x[0].into(),
+                        high: x[1].into(),
+                        shift: shift.into(),
+                        right: true,
+                        wrap: false,
+                        data_type: IntType::U32,
+                        dst_high: true,
+                    });
+                    dst
+                } else {
+                    assert!(alu.def.bit_size() == 32);
+                    let dst = b.alloc_ssa(RegFile::GPR, 1);
+                    b.push_op(OpShf {
+                        dst: dst.into(),
+                        low: x.into(),
+                        high: 0.into(),
+                        shift: shift,
+                        right: true,
+                        wrap: true,
+                        data_type: IntType::U32,
+                        dst_high: false,
+                    });
+                    dst
+                }
             }
             nir_op_fddx | nir_op_fddx_coarse | nir_op_fddx_fine => {
                 // TODO: Real coarse derivatives