diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index d57bfabed51..fd15f225e64 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2616,14 +2616,29 @@ optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_fil (instr->opcode != aco_opcode::v_dot4_i32_i8 || program->family == CHIP_VEGA20)) || !instr->operands[2].isTemp() || !instr->operands[2].isKillBeforeDef() || instr->operands[2].getTemp().type() != RegType::vgpr || - ((!instr->operands[0].isTemp() || instr->operands[0].getTemp().type() != RegType::vgpr) && - (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)) || - instr->usesModifiers() || instr->operands[0].physReg().byte() != 0 || - instr->operands[1].physReg().byte() != 0 || instr->operands[2].physReg().byte() != 0) + (!instr->operands[0].isOfType(RegType::vgpr) && + !instr->operands[1].isOfType(RegType::vgpr)) || + instr->operands[2].physReg().byte() != 0 || instr->valu().opsel[2]) return; - if (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr) - std::swap(instr->operands[0], instr->operands[1]); + if (instr->isVOP3P() && (instr->valu().opsel_lo != 0 || instr->valu().opsel_hi != 0x7)) + return; + + if ((instr->operands[0].physReg().byte() != 0 || instr->operands[1].physReg().byte() != 0 || + instr->valu().opsel) && + program->gfx_level < GFX11) + return; + + unsigned im_mask = instr->isDPP16() ? 0x3 : 0; + if (instr->valu().omod || instr->valu().clamp || (instr->valu().abs & ~im_mask) || + (instr->valu().neg & ~im_mask)) + return; + + if (!instr->operands[1].isOfType(RegType::vgpr)) + instr->valu().swapOperands(0, 1); + + if (!instr->operands[0].isOfType(RegType::vgpr) && instr->valu().opsel[0]) + return; unsigned def_id = instr->definitions[0].tempId(); if (ctx.assignments[def_id].affinity) { @@ -2633,7 +2648,8 @@ optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_fil return; } - instr->format = Format::VOP2; + instr->format = (Format)(((unsigned)withoutVOP3(instr->format) & ~(unsigned)Format::VOP3P) | + (unsigned)Format::VOP2); instr->valu().opsel_hi = 0; switch (instr->opcode) { case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break;