aco/optimizer: copy propagate to output modifier instructions

Foz-DB Navi21:
Totals from 847 (1.11% of 76572) affected shaders:
Instrs: 2331245 -> 2330335 (-0.04%); split: -0.04%, +0.00%
CodeSize: 12451040 -> 12451736 (+0.01%); split: -0.00%, +0.01%
Latency: 26230953 -> 26229153 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 6297802 -> 6296788 (-0.02%); split: -0.02%, +0.00%
VClause: 64527 -> 64528 (+0.00%); split: -0.00%, +0.01%
SClause: 73150 -> 73121 (-0.04%); split: -0.06%, +0.02%
Copies: 180083 -> 179172 (-0.51%); split: -0.53%, +0.02%
PreSGPRs: 62311 -> 62316 (+0.01%)
PreVGPRs: 51720 -> 51710 (-0.02%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25347>
This commit is contained in:
Georg Lehmann 2023-09-22 19:08:18 +02:00 committed by Marge Bot
parent 89f3a5ea37
commit 9508cadadb
2 changed files with 16 additions and 8 deletions

View file

@ -298,6 +298,8 @@ struct ssa_info {
void set_omod2(Instruction* mul)
{
if (label & temp_labels)
return;
add_label(label_omod2);
instr = mul;
}
@ -306,6 +308,8 @@ struct ssa_info {
void set_omod4(Instruction* mul)
{
if (label & temp_labels)
return;
add_label(label_omod4);
instr = mul;
}
@ -314,6 +318,8 @@ struct ssa_info {
void set_omod5(Instruction* mul)
{
if (label & temp_labels)
return;
add_label(label_omod5);
instr = mul;
}
@ -322,6 +328,8 @@ struct ssa_info {
void set_clamp(Instruction* med3)
{
if (label & temp_labels)
return;
add_label(label_clamp);
instr = med3;
}
@ -330,6 +338,8 @@ struct ssa_info {
void set_f2f16(Instruction* conv)
{
if (label & temp_labels)
return;
add_label(label_f2f16);
instr = conv;
}
@ -466,6 +476,8 @@ struct ssa_info {
void set_insert(Instruction* insert)
{
if (label & temp_labels)
return;
add_label(label_insert);
instr = insert;
}

View file

@ -296,10 +296,8 @@ BEGIN_TEST(optimize.sdwa.extract_modifiers)
Temp neg_byte0 = fneg(byte0);
writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], neg_byte0));
//~gfx8! v1: %neg = v_mul_f32 -1.0, %b
//~gfx8! v1: %res1 = v_mul_f32 %a, %neg dst_sel:dword src0_sel:dword src1_sel:ubyte0
//~gfx(9|10)! v1: %neg_byte0 = v_mul_f32 -1.0, %b dst_sel:ubyte0 src0_sel:dword src1_sel:dword
//~gfx(9|10)! v1: %res1 = v_mul_f32 %a, %neg_byte0
//! v1: %neg = v_mul_f32 -1.0, %b
//! v1: %res1 = v_mul_f32 %a, %neg dst_sel:dword src0_sel:dword src1_sel:ubyte0
//! p_unit_test 1, %res1
Temp neg = fneg(inputs[1]);
Temp byte0_neg =
@ -324,10 +322,8 @@ BEGIN_TEST(optimize.sdwa.extract_modifiers)
Temp neg_abs_byte0 = fneg(abs_byte0);
writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], neg_abs_byte0));
//~gfx8! v1: %neg_abs = v_mul_f32 -1.0, %abs
//~gfx8! v1: %res5 = v_mul_f32 %a, %neg_abs dst_sel:dword src0_sel:dword src1_sel:ubyte0
//~gfx(9|10)! v1: %neg_abs_byte0 = v_mul_f32 -1.0, %abs dst_sel:ubyte0 src0_sel:dword src1_sel:dword
//~gfx(9|10)! v1: %res5 = v_mul_f32 %a, %neg_abs_byte0
//! v1: %neg_abs = v_mul_f32 -1.0, |%b|
//! v1: %res5 = v_mul_f32 %a, %neg_abs dst_sel:dword src0_sel:dword src1_sel:ubyte0
//! p_unit_test 5, %res5
Temp neg_abs = fneg(abs);
Temp byte0_neg_abs =