From d50d8ea2bae84ac3da0e435614d73cc062b6630e Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 12 Feb 2024 16:54:20 +0000 Subject: [PATCH] aco/ra: fix GFX9- writelane MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 061b8bfd2980 moved handling of fixed operands earlier, but it should have moved the fixing of writelane operands earlier too. This fixes Crucible's func.uniform-subgroup.exclusive.imin64 on GFX8. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Fixes: 061b8bfd2980 ("aco/ra: rework fixed operands") Part-of: (cherry picked from commit d0595e48055b19c3dbdc340860dd900c0ea0e0a4) --- .pick_status.json | 2 +- src/amd/compiler/aco_register_allocation.cpp | 25 ++++++++++---------- src/amd/compiler/tests/test_regalloc.cpp | 18 ++++++++++++++ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 37a43841fc1..8e750cf3815 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1974,7 +1974,7 @@ "description": "aco/ra: fix GFX9- writelane", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "061b8bfd2980a5ed903febef42be288ab1eb4e40", "notes": null diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 9945362eaa4..989e566fa26 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -1936,19 +1936,6 @@ bool operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr& instr, unsigned idx, PhysReg reg, RegClass rc) { - bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 || - instr->opcode == aco_opcode::v_writelane_b32_e64; - if (gfx_level <= GFX9 && is_writelane && idx <= 1) { - /* v_writelane_b32 can take two sgprs but only if one is m0. */ - bool is_other_sgpr = - instr->operands[!idx].isTemp() && - (!instr->operands[!idx].isFixed() || instr->operands[!idx].physReg() != m0); - if (is_other_sgpr && instr->operands[!idx].tempId() != instr->operands[idx].tempId()) { - instr->operands[idx].setFixed(m0); - return reg == m0; - } - } - if (reg.byte()) { unsigned stride = get_subdword_operand_stride(gfx_level, instr, idx, rc); if (reg.byte() % stride) @@ -2844,6 +2831,18 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra operand.isFixed() && ctx.assignments[operand.tempId()].reg != operand.physReg(); } + bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 || + instr->opcode == aco_opcode::v_writelane_b32_e64; + if (program->gfx_level <= GFX9 && is_writelane && instr->operands[0].isTemp() && + instr->operands[1].isTemp()) { + /* v_writelane_b32 can take two sgprs but only if one is m0. */ + if (ctx.assignments[instr->operands[0].tempId()].reg != m0 && + ctx.assignments[instr->operands[1].tempId()].reg != m0) { + instr->operands[0].setFixed(m0); + fixed = true; + } + } + if (fixed) handle_fixed_operands(ctx, register_file, parallelcopy, instr); diff --git a/src/amd/compiler/tests/test_regalloc.cpp b/src/amd/compiler/tests/test_regalloc.cpp index 456c42359d4..2a8ac922fc6 100644 --- a/src/amd/compiler/tests/test_regalloc.cpp +++ b/src/amd/compiler/tests/test_regalloc.cpp @@ -410,3 +410,21 @@ BEGIN_TEST(regalloc.vinterp_fp16) finish_ra_test(ra_test_policy()); END_TEST + +BEGIN_TEST(regalloc.writelane) + //>> v1: %in0:v[0], s1: %in1:s[0], s1: %in2:s[1], s1: %in3:s[2] = p_startpgm + if (!setup_cs("v1 s1 s1 s1", GFX8)) + return; + + //! s1: %tmp:m0 = p_parallelcopy %int3:s[2] + Temp tmp = bld.copy(bld.def(s1, m0), inputs[3]); + + //! s1: %in1_2:m0, s1: %tmp_2:s[0] = p_parallelcopy %in1:s[0], %tmp:m0 + //! v1: %tmp2:v[0] = v_writelane_b32_e64 %in1_2:m0, %in2:s[1], %in0:v[0] + Temp tmp2 = bld.writelane(bld.def(v1), inputs[1], inputs[2], inputs[0]); + + //! p_unit_test %tmp_2:s[0], %tmp2:v[0] + bld.pseudo(aco_opcode::p_unit_test, tmp, tmp2); + + finish_ra_test(ra_test_policy()); +END_TEST