aco/ra: fix GFX9- writelane
061b8bfd29moved handling of fixed operands earlier, but it should have moved the fixing of writelane operands earlier too. This fixes Crucible's func.uniform-subgroup.exclusive.imin64 on GFX8. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Fixes:061b8bfd29("aco/ra: rework fixed operands") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27583> (cherry picked from commit d0595e48055b19c3dbdc340860dd900c0ea0e0a4)
This commit is contained in:
parent
c7f3e736a0
commit
d50d8ea2ba
3 changed files with 31 additions and 14 deletions
|
|
@ -1974,7 +1974,7 @@
|
|||
"description": "aco/ra: fix GFX9- writelane",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "061b8bfd2980a5ed903febef42be288ab1eb4e40",
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -1936,19 +1936,6 @@ bool
|
|||
operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg,
|
||||
RegClass rc)
|
||||
{
|
||||
bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 ||
|
||||
instr->opcode == aco_opcode::v_writelane_b32_e64;
|
||||
if (gfx_level <= GFX9 && is_writelane && idx <= 1) {
|
||||
/* v_writelane_b32 can take two sgprs but only if one is m0. */
|
||||
bool is_other_sgpr =
|
||||
instr->operands[!idx].isTemp() &&
|
||||
(!instr->operands[!idx].isFixed() || instr->operands[!idx].physReg() != m0);
|
||||
if (is_other_sgpr && instr->operands[!idx].tempId() != instr->operands[idx].tempId()) {
|
||||
instr->operands[idx].setFixed(m0);
|
||||
return reg == m0;
|
||||
}
|
||||
}
|
||||
|
||||
if (reg.byte()) {
|
||||
unsigned stride = get_subdword_operand_stride(gfx_level, instr, idx, rc);
|
||||
if (reg.byte() % stride)
|
||||
|
|
@ -2844,6 +2831,18 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
operand.isFixed() && ctx.assignments[operand.tempId()].reg != operand.physReg();
|
||||
}
|
||||
|
||||
bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 ||
|
||||
instr->opcode == aco_opcode::v_writelane_b32_e64;
|
||||
if (program->gfx_level <= GFX9 && is_writelane && instr->operands[0].isTemp() &&
|
||||
instr->operands[1].isTemp()) {
|
||||
/* v_writelane_b32 can take two sgprs but only if one is m0. */
|
||||
if (ctx.assignments[instr->operands[0].tempId()].reg != m0 &&
|
||||
ctx.assignments[instr->operands[1].tempId()].reg != m0) {
|
||||
instr->operands[0].setFixed(m0);
|
||||
fixed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (fixed)
|
||||
handle_fixed_operands(ctx, register_file, parallelcopy, instr);
|
||||
|
||||
|
|
|
|||
|
|
@ -410,3 +410,21 @@ BEGIN_TEST(regalloc.vinterp_fp16)
|
|||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(regalloc.writelane)
|
||||
//>> v1: %in0:v[0], s1: %in1:s[0], s1: %in2:s[1], s1: %in3:s[2] = p_startpgm
|
||||
if (!setup_cs("v1 s1 s1 s1", GFX8))
|
||||
return;
|
||||
|
||||
//! s1: %tmp:m0 = p_parallelcopy %int3:s[2]
|
||||
Temp tmp = bld.copy(bld.def(s1, m0), inputs[3]);
|
||||
|
||||
//! s1: %in1_2:m0, s1: %tmp_2:s[0] = p_parallelcopy %in1:s[0], %tmp:m0
|
||||
//! v1: %tmp2:v[0] = v_writelane_b32_e64 %in1_2:m0, %in2:s[1], %in0:v[0]
|
||||
Temp tmp2 = bld.writelane(bld.def(v1), inputs[1], inputs[2], inputs[0]);
|
||||
|
||||
//! p_unit_test %tmp_2:s[0], %tmp2:v[0]
|
||||
bld.pseudo(aco_opcode::p_unit_test, tmp, tmp2);
|
||||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue