diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index 458459f82ba..63d02ca3180 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -89,6 +89,17 @@ agx_optimizer_fmov(agx_instr **defs, agx_instr *ins) if (ins->op == AGX_OPCODE_FCMPSEL && s >= 2) continue; + /* We can fold f2f32 into 32-bit instructions, but we can't fold f2f16 + * into 16-bit instructions, since the latter would implicitly promote to + * a 32-bit instruction which is not exact. + */ + assert(def->src[0].size == AGX_SIZE_32 || + def->src[0].size == AGX_SIZE_16); + assert(src.size == AGX_SIZE_32 || src.size == AGX_SIZE_16); + + if (src.size == AGX_SIZE_16 && def->src[0].size == AGX_SIZE_32) + continue; + ins->src[s] = agx_compose_float_src(src, def->src[0]); } } @@ -154,6 +165,16 @@ agx_optimizer_fmov_rev(agx_instr *I, agx_instr *use) if (use->src[0].neg || use->src[0].abs) return false; + /* We can fold f2f16 into 32-bit instructions, but we can't fold f2f32 into + * 16-bit instructions, since the latter would implicitly promote to a 32-bit + * instruction which is not exact. + */ + assert(use->dest[0].size == AGX_SIZE_32 || use->dest[0].size == AGX_SIZE_16); + assert(I->dest[0].size == AGX_SIZE_32 || I->dest[0].size == AGX_SIZE_16); + + if (I->dest[0].size == AGX_SIZE_16 && use->dest[0].size == AGX_SIZE_32) + return false; + /* saturate(saturate(x)) = saturate(x) */ I->saturate |= use->saturate; I->dest[0] = use->dest[0]; diff --git a/src/asahi/compiler/test/test-optimizer.cpp b/src/asahi/compiler/test/test-optimizer.cpp index 2b0c6c8b324..9e05c5ee39c 100644 --- a/src/asahi/compiler/test/test-optimizer.cpp +++ b/src/asahi/compiler/test/test-optimizer.cpp @@ -78,6 +78,25 @@ TEST_F(Optimizer, FloatCopyprop) agx_fadd_to(b, out, agx_neg(wx), wy)); } +TEST_F(Optimizer, FloatConversion) +{ + CASE32( + { + agx_index cvt = agx_temp(b->shader, AGX_SIZE_32); + agx_fmov_to(b, cvt, hx); + agx_fadd_to(b, out, cvt, wy); + }, + { agx_fadd_to(b, out, hx, wy); }); + + CASE16( + { + agx_index sum = agx_temp(b->shader, AGX_SIZE_32); + agx_fadd_to(b, sum, wx, wy); + agx_fmov_to(b, out, sum); + }, + { agx_fadd_to(b, out, wx, wy); }); +} + TEST_F(Optimizer, FusedFABSNEG) { CASE32(agx_fadd_to(b, out, agx_fmov(b, agx_abs(wx)), wy), @@ -164,3 +183,14 @@ TEST_F(Optimizer, SkipPreloads) agx_xor_to(b, out, preload, wy); }); } + +TEST_F(Optimizer, NoConversionsOn16BitALU) +{ + NEGCASE16({ + agx_index cvt = agx_temp(b->shader, AGX_SIZE_16); + agx_fmov_to(b, cvt, wx); + agx_fadd_to(b, out, cvt, hy); + }); + + NEGCASE32(agx_fmov_to(b, out, agx_fadd(b, hx, hy))); +}