From e19584db2b8085dd6cf2a2d977b783dcd0a05378 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 2 Nov 2022 16:45:41 +0000 Subject: [PATCH] nir/algebraic: optimize open-coded uadd_sat/usub_sat fossil-db (navi21): Totals from 19 (0.01% of 135636) affected shaders: Instrs: 40730 -> 40688 (-0.10%) CodeSize: 217708 -> 217568 (-0.06%) Latency: 261466 -> 261373 (-0.04%) InvThroughput: 74944 -> 74896 (-0.06%) Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index f35232a66ca..421bb18b3f3 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2079,6 +2079,32 @@ optimizations.extend([ (('imul24', a, 0), (0)), ]) +for bit_size in [8, 16, 32, 64]: + cond = '!options->lower_uadd_sat' + if bit_size == 64: + cond += ' && !(options->lower_int64_options & nir_lower_iadd64)' + add = 'iadd@' + str(bit_size) + + optimizations += [ + (('bcsel', ('ult', ('iadd', a, b), a), -1, (add, a, b)), ('uadd_sat', a, b), cond), + (('bcsel', ('uge', ('iadd', a, b), a), (add, a, b), -1), ('uadd_sat', a, b), cond), + (('bcsel', ('ieq', ('uadd_carry', a, b), 0), (add, a, b), -1), ('uadd_sat', a, b), cond), + (('bcsel', ('ine', ('uadd_carry', a, b), 0), -1, (add, a, b)), ('uadd_sat', a, b), cond), + ] + +for bit_size in [8, 16, 32, 64]: + cond = '!options->lower_usub_sat' + if bit_size == 64: + cond += ' && !(options->lower_int64_options & nir_lower_usub_sat64)' + add = 'iadd@' + str(bit_size) + + optimizations += [ + (('bcsel', ('ult', a, b), 0, (add, a, ('ineg', b))), ('usub_sat', a, b), cond), + (('bcsel', ('uge', a, b), (add, a, ('ineg', b)), 0), ('usub_sat', a, b), cond), + (('bcsel', ('ieq', ('usub_borrow', a, b), 0), (add, a, ('ineg', b)), 0), ('usub_sat', a, b), cond), + (('bcsel', ('ine', ('usub_borrow', a, b), 0), 0, (add, a, ('ineg', b))), ('usub_sat', a, b), cond), + ] + # bit_size dependent lowerings for bit_size in [8, 16, 32, 64]: # convenience constants