r300: small adress register load optimization

We do ffloor by default for adress register load so no need to do it
explicitly. This needs to happen after int lowering, otherwise we get
ftrunc by default as a bonus. This is mostly for wined3d.

Shader-db RV370:
total instructions in shared programs: 82147 -> 82071 (-0.09%)
instructions in affected programs: 2772 -> 2696 (-2.74%)
helped: 32
HURT: 0
total cycles in shared programs: 128479 -> 128403 (-0.06%)
cycles in affected programs: 2813 -> 2737 (-2.70%)
helped: 32
HURT: 0

Shader-db RV530:
total instructions in shared programs: 126141 -> 126057 (-0.07%)
instructions in affected programs: 3170 -> 3086 (-2.65%)
helped: 36
HURT: 0
total cycles in shared programs: 191688 -> 191604 (-0.04%)
cycles in affected programs: 3222 -> 3138 (-2.61%)
helped: 36
HURT: 0

Reviewed-by: Filip Gawin <filip.gawin@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26816>
This commit is contained in:
Pavel Ondračka 2023-09-20 09:21:35 +02:00
parent af3cca98a2
commit 75e7790ee5
3 changed files with 34 additions and 0 deletions

View file

@ -2440,6 +2440,8 @@ const void *nir_to_rc_options(struct nir_shader *s,
}
NIR_PASS_V(s, nir_lower_int_to_float);
NIR_PASS_V(s, nir_copy_prop);
NIR_PASS_V(s, r300_nir_post_integer_lowering);
NIR_PASS_V(s, nir_lower_bool_to_float,
!options->lower_cmp && !options->lower_fabs);
/* bool_to_float generates MOVs for b2f32 that we want to clean up. */

View file

@ -47,6 +47,24 @@ is_ubo_or_input(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
}
}
static inline bool
is_only_used_by_load_ubo_vec4(const nir_alu_instr *instr)
{
nir_foreach_use(src, &instr->def) {
if (nir_src_is_if(src))
return false;
nir_instr *user_instr = nir_src_parent_instr(src);
if (user_instr->type != nir_instr_type_intrinsic)
return false;
const nir_intrinsic_instr *const user_intrinsic = nir_instr_as_intrinsic(user_instr);
if (user_intrinsic->intrinsic != nir_intrinsic_load_ubo_vec4)
return false;
}
return true;
}
char *r300_finalize_nir(struct pipe_screen *pscreen, void *nir);
extern bool r300_transform_vs_trig_input(struct nir_shader *shader);
@ -61,4 +79,6 @@ extern bool r300_nir_prepare_presubtract(struct nir_shader *shader);
extern bool r300_nir_clean_double_fneg(struct nir_shader *shader);
extern bool r300_nir_post_integer_lowering(struct nir_shader *shader);
#endif /* R300_NIR_H */

View file

@ -94,6 +94,15 @@ r300_nir_clean_double_fneg = [
(('fneg', ('fneg', a)), a)
]
r300_nir_post_integer_lowering = [
# If ffloor result is used only for indirect constant load, we can get rid of it
# completelly as ntt emits ARL by default which already does the flooring.
# This actually checks for the lowered ffloor(a) = a - ffract(a) patterns.
(('fadd(is_only_used_by_load_ubo_vec4)', a, ('fneg', ('ffract', a))), a),
# This is a D3D9 pattern from Wine when shader wants ffloor instead of fround on register load.
(('fround_even(is_only_used_by_load_ubo_vec4)', ('fadd', a, ('fneg', ('ffract', a)))), a)
]
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--import-path', required=True)
@ -140,5 +149,8 @@ def main():
f.write(nir_algebraic.AlgebraicPass("r300_nir_clean_double_fneg",
r300_nir_clean_double_fneg).render())
f.write(nir_algebraic.AlgebraicPass("r300_nir_post_integer_lowering",
r300_nir_post_integer_lowering).render())
if __name__ == '__main__':
main()