r300: small adress register load optimization

We do ffloor by default for adress register load so no need to do it explicitly. This needs to happen after int lowering, otherwise we get ftrunc by default as a bonus. This is mostly for wined3d. Shader-db RV370: total instructions in shared programs: 82147 -> 82071 (-0.09%) instructions in affected programs: 2772 -> 2696 (-2.74%) helped: 32 HURT: 0 total cycles in shared programs: 128479 -> 128403 (-0.06%) cycles in affected programs: 2813 -> 2737 (-2.70%) helped: 32 HURT: 0 Shader-db RV530: total instructions in shared programs: 126141 -> 126057 (-0.07%) instructions in affected programs: 3170 -> 3086 (-2.65%) helped: 36 HURT: 0 total cycles in shared programs: 191688 -> 191604 (-0.04%) cycles in affected programs: 3222 -> 3138 (-2.61%) helped: 36 HURT: 0 Reviewed-by: Filip Gawin <filip.gawin@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26816>
2023-09-20 09:21:35 +02:00 · 2023-09-20 09:21:35 +02:00 · 75e7790ee5
commit 75e7790ee5
parent af3cca98a2
3 changed files with 34 additions and 0 deletions
--- a/src/gallium/drivers/r300/compiler/nir_to_rc.c
+++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c
@ -2440,6 +2440,8 @@ const void *nir_to_rc_options(struct nir_shader *s,
   }

   NIR_PASS_V(s, nir_lower_int_to_float);
+   NIR_PASS_V(s, nir_copy_prop);
+   NIR_PASS_V(s, r300_nir_post_integer_lowering);
   NIR_PASS_V(s, nir_lower_bool_to_float,
              !options->lower_cmp && !options->lower_fabs);
   /* bool_to_float generates MOVs for b2f32 that we want to clean up. */
--- a/src/gallium/drivers/r300/compiler/r300_nir.h
+++ b/src/gallium/drivers/r300/compiler/r300_nir.h
@ -47,6 +47,24 @@ is_ubo_or_input(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
   }
 }

+static inline bool
+is_only_used_by_load_ubo_vec4(const nir_alu_instr *instr)
+{
+   nir_foreach_use(src, &instr->def) {
+      if (nir_src_is_if(src))
+         return false;
+      nir_instr *user_instr = nir_src_parent_instr(src);
+      if (user_instr->type != nir_instr_type_intrinsic)
+         return false;
+
+      const nir_intrinsic_instr *const user_intrinsic = nir_instr_as_intrinsic(user_instr);
+
+      if (user_intrinsic->intrinsic != nir_intrinsic_load_ubo_vec4)
+            return false;
+   }
+   return true;
+}
+
 char *r300_finalize_nir(struct pipe_screen *pscreen, void *nir);

 extern bool r300_transform_vs_trig_input(struct nir_shader *shader);
@ -61,4 +79,6 @@ extern bool r300_nir_prepare_presubtract(struct nir_shader *shader);

 extern bool r300_nir_clean_double_fneg(struct nir_shader *shader);

+extern bool r300_nir_post_integer_lowering(struct nir_shader *shader);
+
 #endif /* R300_NIR_H */
--- a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py
+++ b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py
@ -94,6 +94,15 @@ r300_nir_clean_double_fneg = [
        (('fneg', ('fneg', a)), a)
 ]

+r300_nir_post_integer_lowering = [
+        # If ffloor result is used only for indirect constant load, we can get rid of it
+        # completelly as ntt emits ARL by default which already does the flooring.
+        # This actually checks for the lowered ffloor(a) = a - ffract(a) patterns.
+        (('fadd(is_only_used_by_load_ubo_vec4)', a, ('fneg', ('ffract', a))), a),
+        # This is a D3D9 pattern from Wine when shader wants ffloor instead of fround on register load.
+        (('fround_even(is_only_used_by_load_ubo_vec4)', ('fadd', a, ('fneg', ('ffract', a)))), a)
+]
+
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--import-path', required=True)
@ -140,5 +149,8 @@ def main():
        f.write(nir_algebraic.AlgebraicPass("r300_nir_clean_double_fneg",
                                            r300_nir_clean_double_fneg).render())

+        f.write(nir_algebraic.AlgebraicPass("r300_nir_post_integer_lowering",
+                                            r300_nir_post_integer_lowering).render())
+
 if __name__ == '__main__':
    main()