aco/ra: use fmac with DPP/opsel on GFX11

Foz-DB GFX1100:
Totals from 2 (0.00% of 132657) affected shaders:
CodeSize: 8764 -> 8732 (-0.37%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23059>
This commit is contained in:
Georg Lehmann 2023-05-16 16:32:28 +02:00 committed by Marge Bot
parent 5b2b782940
commit b4ff7e2567

View file

@ -2616,14 +2616,29 @@ optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_fil
(instr->opcode != aco_opcode::v_dot4_i32_i8 || program->family == CHIP_VEGA20)) ||
!instr->operands[2].isTemp() || !instr->operands[2].isKillBeforeDef() ||
instr->operands[2].getTemp().type() != RegType::vgpr ||
((!instr->operands[0].isTemp() || instr->operands[0].getTemp().type() != RegType::vgpr) &&
(!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)) ||
instr->usesModifiers() || instr->operands[0].physReg().byte() != 0 ||
instr->operands[1].physReg().byte() != 0 || instr->operands[2].physReg().byte() != 0)
(!instr->operands[0].isOfType(RegType::vgpr) &&
!instr->operands[1].isOfType(RegType::vgpr)) ||
instr->operands[2].physReg().byte() != 0 || instr->valu().opsel[2])
return;
if (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)
std::swap(instr->operands[0], instr->operands[1]);
if (instr->isVOP3P() && (instr->valu().opsel_lo != 0 || instr->valu().opsel_hi != 0x7))
return;
if ((instr->operands[0].physReg().byte() != 0 || instr->operands[1].physReg().byte() != 0 ||
instr->valu().opsel) &&
program->gfx_level < GFX11)
return;
unsigned im_mask = instr->isDPP16() ? 0x3 : 0;
if (instr->valu().omod || instr->valu().clamp || (instr->valu().abs & ~im_mask) ||
(instr->valu().neg & ~im_mask))
return;
if (!instr->operands[1].isOfType(RegType::vgpr))
instr->valu().swapOperands(0, 1);
if (!instr->operands[0].isOfType(RegType::vgpr) && instr->valu().opsel[0])
return;
unsigned def_id = instr->definitions[0].tempId();
if (ctx.assignments[def_id].affinity) {
@ -2633,7 +2648,8 @@ optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_fil
return;
}
instr->format = Format::VOP2;
instr->format = (Format)(((unsigned)withoutVOP3(instr->format) & ~(unsigned)Format::VOP3P) |
(unsigned)Format::VOP2);
instr->valu().opsel_hi = 0;
switch (instr->opcode) {
case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break;