From 644c5e95a0f8e6b46be9cc8aac013be7588f8e46 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 16 May 2023 17:10:57 +0200 Subject: [PATCH] aco: use get_operand_size for dpp opt This matters now that v_fma_mixlo_f16/v_fma_mixhi_f16 can use dpp. Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_ir.cpp | 18 ++++++++++++++++++ src/amd/compiler/aco_ir.h | 2 ++ src/amd/compiler/aco_optimizer.cpp | 19 +------------------ src/amd/compiler/aco_optimizer_postRA.cpp | 4 ++-- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index c981a6279f7..33b107a5b05 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -781,6 +781,24 @@ get_reduction_identity(ReduceOp op, unsigned idx) return 0; } +unsigned +get_operand_size(aco_ptr& instr, unsigned index) +{ + if (instr->isPseudo()) + return instr->operands[index].bytes() * 8u; + else if (instr->opcode == aco_opcode::v_mad_u64_u32 || + instr->opcode == aco_opcode::v_mad_i64_i32) + return index == 2 ? 64 : 32; + else if (instr->opcode == aco_opcode::v_fma_mix_f32 || + instr->opcode == aco_opcode::v_fma_mixlo_f16 || + instr->opcode == aco_opcode::v_fma_mixhi_f16) + return instr->valu().opsel_hi[index] ? 16 : 32; + else if (instr->isVALU() || instr->isSALU()) + return instr_info.operand_size[(int)instr->opcode]; + else + return 0; +} + bool needs_exec_mask(const Instruction* instr) { diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 79d47417330..0309ae69937 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1853,6 +1853,8 @@ uint32_t get_reduction_identity(ReduceOp op, unsigned idx); unsigned get_mimg_nsa_dwords(const Instruction* instr); +unsigned get_operand_size(aco_ptr& instr, unsigned index); + bool should_form_clause(const Instruction* a, const Instruction* b); enum block_kind { diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 0d8b3416983..7356a547439 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -900,23 +900,6 @@ smem_combine(opt_ctx& ctx, aco_ptr& instr) skip_smem_offset_align(ctx, &instr->smem()); } -unsigned -get_operand_size(aco_ptr& instr, unsigned index) -{ - if (instr->isPseudo()) - return instr->operands[index].bytes() * 8u; - else if (instr->opcode == aco_opcode::v_mad_u64_u32 || - instr->opcode == aco_opcode::v_mad_i64_i32) - return index == 2 ? 64 : 32; - else if (instr->opcode == aco_opcode::v_fma_mix_f32 || - instr->opcode == aco_opcode::v_fma_mixlo_f16) - return instr->valu().opsel_hi & (1u << index) ? 16 : 32; - else if (instr->isVALU() || instr->isSALU()) - return instr_info.operand_size[(int)instr->opcode]; - else - return 0; -} - Operand get_constant_op(opt_ctx& ctx, ssa_info info, uint32_t bits) { @@ -4841,7 +4824,7 @@ select_instruction(opt_ctx& ctx, aco_ptr& instr) bool dpp8 = info.is_dpp8(); bool input_mods = instr_info.can_use_input_modifiers[(int)instr->opcode] && - instr_info.operand_size[(int)instr->opcode] == 32; + get_operand_size(instr, 0) == 32; bool mov_uses_mods = info.instr->valu().neg[0] || info.instr->valu().abs[0]; if (((dpp8 && ctx.program->gfx_level < GFX11) || !input_mods) && mov_uses_mods) continue; diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index 7aa554d0766..4086727b5dd 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -513,8 +513,8 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr& instr) continue; bool dpp8 = mov->isDPP8(); - bool input_mods = instr_info.can_use_input_modifiers[(int)instr->opcode] && - instr_info.operand_size[(int)instr->opcode] == 32; + bool input_mods = + instr_info.can_use_input_modifiers[(int)instr->opcode] && get_operand_size(instr, i) == 32; bool mov_uses_mods = mov->valu().neg[0] || mov->valu().abs[0]; if (((dpp8 && ctx.program->gfx_level < GFX11) || !input_mods) && mov_uses_mods) continue;