diff --git a/.pick_status.json b/.pick_status.json index 37a43841fc1..8e750cf3815 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1974,7 +1974,7 @@ "description": "aco/ra: fix GFX9- writelane", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "061b8bfd2980a5ed903febef42be288ab1eb4e40", "notes": null diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 9945362eaa4..989e566fa26 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -1936,19 +1936,6 @@ bool operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr& instr, unsigned idx, PhysReg reg, RegClass rc) { - bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 || - instr->opcode == aco_opcode::v_writelane_b32_e64; - if (gfx_level <= GFX9 && is_writelane && idx <= 1) { - /* v_writelane_b32 can take two sgprs but only if one is m0. */ - bool is_other_sgpr = - instr->operands[!idx].isTemp() && - (!instr->operands[!idx].isFixed() || instr->operands[!idx].physReg() != m0); - if (is_other_sgpr && instr->operands[!idx].tempId() != instr->operands[idx].tempId()) { - instr->operands[idx].setFixed(m0); - return reg == m0; - } - } - if (reg.byte()) { unsigned stride = get_subdword_operand_stride(gfx_level, instr, idx, rc); if (reg.byte() % stride) @@ -2844,6 +2831,18 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra operand.isFixed() && ctx.assignments[operand.tempId()].reg != operand.physReg(); } + bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 || + instr->opcode == aco_opcode::v_writelane_b32_e64; + if (program->gfx_level <= GFX9 && is_writelane && instr->operands[0].isTemp() && + instr->operands[1].isTemp()) { + /* v_writelane_b32 can take two sgprs but only if one is m0. */ + if (ctx.assignments[instr->operands[0].tempId()].reg != m0 && + ctx.assignments[instr->operands[1].tempId()].reg != m0) { + instr->operands[0].setFixed(m0); + fixed = true; + } + } + if (fixed) handle_fixed_operands(ctx, register_file, parallelcopy, instr); diff --git a/src/amd/compiler/tests/test_regalloc.cpp b/src/amd/compiler/tests/test_regalloc.cpp index 456c42359d4..2a8ac922fc6 100644 --- a/src/amd/compiler/tests/test_regalloc.cpp +++ b/src/amd/compiler/tests/test_regalloc.cpp @@ -410,3 +410,21 @@ BEGIN_TEST(regalloc.vinterp_fp16) finish_ra_test(ra_test_policy()); END_TEST + +BEGIN_TEST(regalloc.writelane) + //>> v1: %in0:v[0], s1: %in1:s[0], s1: %in2:s[1], s1: %in3:s[2] = p_startpgm + if (!setup_cs("v1 s1 s1 s1", GFX8)) + return; + + //! s1: %tmp:m0 = p_parallelcopy %int3:s[2] + Temp tmp = bld.copy(bld.def(s1, m0), inputs[3]); + + //! s1: %in1_2:m0, s1: %tmp_2:s[0] = p_parallelcopy %in1:s[0], %tmp:m0 + //! v1: %tmp2:v[0] = v_writelane_b32_e64 %in1_2:m0, %in2:s[1], %in0:v[0] + Temp tmp2 = bld.writelane(bld.def(v1), inputs[1], inputs[2], inputs[0]); + + //! p_unit_test %tmp_2:s[0], %tmp2:v[0] + bld.pseudo(aco_opcode::p_unit_test, tmp, tmp2); + + finish_ra_test(ra_test_policy()); +END_TEST