aco/ra: fix GFX9- writelane

061b8bfd29 moved handling of fixed operands earlier, but it should have
moved the fixing of writelane operands earlier too.

This fixes Crucible's func.uniform-subgroup.exclusive.imin64 on GFX8.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Fixes: 061b8bfd29 ("aco/ra: rework fixed operands")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27583>
(cherry picked from commit d0595e48055b19c3dbdc340860dd900c0ea0e0a4)
This commit is contained in:
Rhys Perry 2024-02-12 16:54:20 +00:00 committed by Eric Engestrom
parent c7f3e736a0
commit d50d8ea2ba
3 changed files with 31 additions and 14 deletions

View file

@ -1974,7 +1974,7 @@
"description": "aco/ra: fix GFX9- writelane",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "061b8bfd2980a5ed903febef42be288ab1eb4e40",
"notes": null

View file

@ -1936,19 +1936,6 @@ bool
operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg,
RegClass rc)
{
bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 ||
instr->opcode == aco_opcode::v_writelane_b32_e64;
if (gfx_level <= GFX9 && is_writelane && idx <= 1) {
/* v_writelane_b32 can take two sgprs but only if one is m0. */
bool is_other_sgpr =
instr->operands[!idx].isTemp() &&
(!instr->operands[!idx].isFixed() || instr->operands[!idx].physReg() != m0);
if (is_other_sgpr && instr->operands[!idx].tempId() != instr->operands[idx].tempId()) {
instr->operands[idx].setFixed(m0);
return reg == m0;
}
}
if (reg.byte()) {
unsigned stride = get_subdword_operand_stride(gfx_level, instr, idx, rc);
if (reg.byte() % stride)
@ -2844,6 +2831,18 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
operand.isFixed() && ctx.assignments[operand.tempId()].reg != operand.physReg();
}
bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 ||
instr->opcode == aco_opcode::v_writelane_b32_e64;
if (program->gfx_level <= GFX9 && is_writelane && instr->operands[0].isTemp() &&
instr->operands[1].isTemp()) {
/* v_writelane_b32 can take two sgprs but only if one is m0. */
if (ctx.assignments[instr->operands[0].tempId()].reg != m0 &&
ctx.assignments[instr->operands[1].tempId()].reg != m0) {
instr->operands[0].setFixed(m0);
fixed = true;
}
}
if (fixed)
handle_fixed_operands(ctx, register_file, parallelcopy, instr);

View file

@ -410,3 +410,21 @@ BEGIN_TEST(regalloc.vinterp_fp16)
finish_ra_test(ra_test_policy());
END_TEST
BEGIN_TEST(regalloc.writelane)
//>> v1: %in0:v[0], s1: %in1:s[0], s1: %in2:s[1], s1: %in3:s[2] = p_startpgm
if (!setup_cs("v1 s1 s1 s1", GFX8))
return;
//! s1: %tmp:m0 = p_parallelcopy %int3:s[2]
Temp tmp = bld.copy(bld.def(s1, m0), inputs[3]);
//! s1: %in1_2:m0, s1: %tmp_2:s[0] = p_parallelcopy %in1:s[0], %tmp:m0
//! v1: %tmp2:v[0] = v_writelane_b32_e64 %in1_2:m0, %in2:s[1], %in0:v[0]
Temp tmp2 = bld.writelane(bld.def(v1), inputs[1], inputs[2], inputs[0]);
//! p_unit_test %tmp_2:s[0], %tmp2:v[0]
bld.pseudo(aco_opcode::p_unit_test, tmp, tmp2);
finish_ra_test(ra_test_policy());
END_TEST