diff --git a/.pick_status.json b/.pick_status.json index 501f39111a0..9c0bcf62415 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -314,7 +314,7 @@ "description": "nir: rework and fix rotate lowering", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "fe0965afa6becfc9c9aa341babd34bc5920e421b", "notes": null diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 7303685e5a6..0798b73b3dc 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -94,7 +94,6 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s .lower_ffma64 = split_fma, .lower_fpow = true, .lower_mul_2x32_64 = true, - .lower_rotate = true, .lower_iadd_sat = device->rad_info.gfx_level <= GFX8, .lower_hadd = true, .lower_mul_32x16 = true, diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 47211b59d3b..02283a120a1 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -292,7 +292,6 @@ static const nir_shader_compiler_options agx_nir_options = { .lower_hadd = true, .vectorize_io = true, .use_interpolated_input_intrinsics = true, - .lower_rotate = true, .has_isub = true, .support_16bit_alu = true, .max_unroll_iterations = 32, diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index 756d1d52e55..f339389fc3a 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -227,7 +227,6 @@ const nir_shader_compiler_options v3dv_nir_options = { .lower_ldexp = true, .lower_mul_high = true, .lower_wpos_pntc = false, - .lower_rotate = true, .lower_to_scalar = true, .lower_device_index_to_zero = true, .lower_fquantize2f16 = true, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 61c2b4bc884..c179d3ea23d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3838,8 +3838,10 @@ typedef struct nir_shader_compiler_options { /* Lowers when 32x32->64 bit multiplication is not supported */ bool lower_mul_2x32_64; - /* Lowers when rotate instruction is not supported */ - bool lower_rotate; + /* Indicates that urol and uror are supported */ + bool has_rotate8; + bool has_rotate16; + bool has_rotate32; /** Backend supports ternary addition */ bool has_iadd3; diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 55ae777a6f1..6972649d7b7 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1388,22 +1388,22 @@ optimizations.extend([ (('ishr', a, 0), a), (('ushr', 0, a), 0), (('ushr', a, 0), a), - (('ior', ('ishl@16', a, b), ('ushr@16', a, ('iadd', 16, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'), - (('ior', ('ishl@16', a, b), ('ushr@16', a, ('isub', 16, b))), ('urol', a, b), '!options->lower_rotate'), - (('ior', ('ishl@32', a, b), ('ushr@32', a, ('iadd', 32, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'), - (('ior', ('ishl@32', a, b), ('ushr@32', a, ('isub', 32, b))), ('urol', a, b), '!options->lower_rotate'), - (('ior', ('ushr@16', a, b), ('ishl@16', a, ('iadd', 16, ('ineg', b)))), ('uror', a, b), '!options->lower_rotate'), - (('ior', ('ushr@16', a, b), ('ishl@16', a, ('isub', 16, b))), ('uror', a, b), '!options->lower_rotate'), - (('ior', ('ushr@32', a, b), ('ishl@32', a, ('iadd', 32, ('ineg', b)))), ('uror', a, b), '!options->lower_rotate'), - (('ior', ('ushr@32', a, b), ('ishl@32', a, ('isub', 32, b))), ('uror', a, b), '!options->lower_rotate'), - (('urol@8', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 8, b))), 'options->lower_rotate'), - (('urol@16', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 16, b))), 'options->lower_rotate'), - (('urol@32', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 32, b))), 'options->lower_rotate'), - (('urol@64', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 64, b))), 'options->lower_rotate'), - (('uror@8', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 8, b))), 'options->lower_rotate'), - (('uror@16', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 16, b))), 'options->lower_rotate'), - (('uror@32', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 32, b))), 'options->lower_rotate'), - (('uror@64', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 64, b))), 'options->lower_rotate'), + (('ior', ('ishl@16', a, b), ('ushr@16', a, ('iadd', 16, ('ineg', b)))), ('urol', a, b), 'options->has_rotate16'), + (('ior', ('ishl@16', a, b), ('ushr@16', a, ('isub', 16, b))), ('urol', a, b), 'options->has_rotate16'), + (('ior', ('ishl@32', a, b), ('ushr@32', a, ('iadd', 32, ('ineg', b)))), ('urol', a, b), 'options->has_rotate32'), + (('ior', ('ishl@32', a, b), ('ushr@32', a, ('isub', 32, b))), ('urol', a, b), 'options->has_rotate32'), + (('ior', ('ushr@16', a, b), ('ishl@16', a, ('iadd', 16, ('ineg', b)))), ('uror', a, b), 'options->has_rotate16'), + (('ior', ('ushr@16', a, b), ('ishl@16', a, ('isub', 16, b))), ('uror', a, b), 'options->has_rotate16'), + (('ior', ('ushr@32', a, b), ('ishl@32', a, ('iadd', 32, ('ineg', b)))), ('uror', a, b), 'options->has_rotate32'), + (('ior', ('ushr@32', a, b), ('ishl@32', a, ('isub', 32, b))), ('uror', a, b), 'options->has_rotate32'), + (('urol@8', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 8, b))), '!options->has_rotate8'), + (('urol@16', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 16, b))), '!options->has_rotate16'), + (('urol@32', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 32, b))), '!options->has_rotate32'), + (('urol@64', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 64, b)))), + (('uror@8', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 8, b))), '!options->has_rotate8'), + (('uror@16', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 16, b))), '!options->has_rotate16'), + (('uror@32', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 32, b))), '!options->has_rotate32'), + (('uror@64', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 64, b)))), # bfi(X, a, b) = (b & ~X) | (a & X) # If X = ~0: (b & 0) | (a & 0xffffffff) = a diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 80ca0656df7..317748c5f92 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -109,7 +109,6 @@ static const nir_shader_compiler_options ir3_base_options = { .lower_unpack_unorm_2x16 = true, .lower_pack_split = true, .use_interpolated_input_intrinsics = true, - .lower_rotate = true, .lower_to_scalar = true, .has_imul24 = true, .has_fsub = true, diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index 1c603e4cbf7..14112ee71aa 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -3689,13 +3689,15 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s, !options->lower_fdph || !options->lower_flrp64 || !options->lower_fmod || - !options->lower_rotate || !options->lower_uadd_carry || !options->lower_usub_borrow || !options->lower_uadd_sat || !options->lower_usub_sat || !options->lower_uniforms_to_ubo || !options->lower_vector_cmp || + options->has_rotate8 || + options->has_rotate16 || + options->has_rotate32 || options->lower_fsqrt != lower_fsqrt || options->force_indirect_unrolling != no_indirects_mask || force_indirect_unrolling_sampler) { @@ -3709,7 +3711,6 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s, new_options->lower_fdph = true; new_options->lower_flrp64 = true; new_options->lower_fmod = true; - new_options->lower_rotate = true; new_options->lower_uadd_carry = true; new_options->lower_usub_borrow = true; new_options->lower_uadd_sat = true; @@ -3717,6 +3718,9 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s, new_options->lower_uniforms_to_ubo = true; new_options->lower_vector_cmp = true; new_options->lower_fsqrt = lower_fsqrt; + new_options->has_rotate8 = false; + new_options->has_rotate16 = false; + new_options->has_rotate32 = false; new_options->force_indirect_unrolling = no_indirects_mask; new_options->force_indirect_unrolling_sampler = force_indirect_unrolling_sampler; @@ -4062,7 +4066,6 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = { .lower_fdph = true, .lower_flrp64 = true, .lower_fmod = true, - .lower_rotate = true, .lower_uniforms_to_ubo = true, .lower_uadd_carry = true, .lower_usub_borrow = true, diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c index 68cca059504..bf9912e05ea 100644 --- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c +++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c @@ -43,7 +43,6 @@ static const nir_shader_compiler_options options = { .lower_all_io_to_temps = true, .vertex_id_zero_based = true, /* its not implemented anyway */ .lower_bitops = true, - .lower_rotate = true, .lower_vector_cmp = true, .lower_fdph = true, .has_fsub = true, diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 82b7c987518..a6d6d67a39c 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -117,7 +117,6 @@ static const nir_shader_compiler_options i915_compiler_options = { .lower_fdph = true, .lower_flrp32 = true, .lower_fmod = true, - .lower_rotate = true, .lower_sincos = true, .lower_uniforms_to_ubo = true, .lower_vector_cmp = true, @@ -161,7 +160,6 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_unpack_half_2x16 = true, .lower_extract_byte = true, .lower_extract_word = true, - .lower_rotate = true, .lower_uadd_carry = true, .lower_usub_borrow = true, .lower_mul_2x32_64 = true, diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index e6705248992..2d6dd4c9944 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -57,7 +57,6 @@ static const nir_shader_compiler_options vs_nir_options = { /* could be implemented by clamp */ .lower_fsat = true, .lower_bitops = true, - .lower_rotate = true, .lower_sincos = true, .lower_fceil = true, .lower_insert_byte = true, @@ -78,7 +77,6 @@ static const nir_shader_compiler_options fs_nir_options = { .lower_flrp32 = true, .lower_flrp64 = true, .lower_fsign = true, - .lower_rotate = true, .lower_fdot = true, .lower_fdph = true, .lower_insert_byte = true, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index b518da49e72..ad3d66424e1 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -616,7 +616,6 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_extract_word = true, .lower_insert_byte = true, .lower_insert_word = true, - .lower_rotate = true, .lower_uadd_carry = true, .lower_usub_borrow = true, .lower_mul_2x32_64 = true, diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index dfcebf56f9b..6c60faea722 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -477,7 +477,6 @@ static const nir_shader_compiler_options nv30_base_compiler_options = { .lower_flrp64 = true, .lower_fmod = true, .lower_fpow = true, /* In hardware as of nv40 FS */ - .lower_rotate = true, .lower_uniforms_to_ubo = true, .lower_vector_cmp = true, .force_indirect_unrolling = nir_var_all, diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 2a0bb599f55..3616db63c22 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -503,7 +503,6 @@ static int r300_get_video_param(struct pipe_screen *screen, .lower_ftrunc = true, \ .lower_insert_byte = true, \ .lower_insert_word = true, \ - .lower_rotate = true, \ .lower_uniforms_to_ubo = true, \ .lower_vector_cmp = true, \ .no_integers = true, \ diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index 4ab183dde08..ea0c1e24fb6 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -1395,7 +1395,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, .lower_insert_byte = true, .lower_insert_word = true, .lower_ldexp = true, - .lower_rotate = true, /* due to a bug in the shader compiler, some loops hang * if they are not unrolled, see: * https://bugs.freedesktop.org/show_bug.cgi?id=86720 diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 111f3adc26b..b72f693dbff 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -1435,7 +1435,6 @@ void si_init_screen_get_functions(struct si_screen *sscreen) .lower_hadd = true, .lower_hadd64 = true, .lower_fisnormal = true, - .lower_rotate = true, .lower_to_scalar = true, .lower_to_scalar_filter = sscreen->info.has_packed_math_16bit ? si_alu_to_scalar_packed_math_filter : NULL, diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 351de3f0b25..cc76e6b5c6a 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -85,7 +85,6 @@ static const nir_shader_compiler_options sp_compiler_options = { .lower_fdph = true, .lower_flrp64 = true, .lower_fmod = true, - .lower_rotate = true, .lower_uniforms_to_ubo = true, .lower_vector_cmp = true, .lower_int64_options = nir_lower_imul_2x32_64, diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 2d4072a8d4c..cce6467a3c6 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -737,7 +737,6 @@ vgpu10_get_shader_param(struct pipe_screen *screen, .lower_fdph = true, \ .lower_flrp64 = true, \ .lower_ldexp = true, \ - .lower_rotate = true, \ .lower_uniforms_to_ubo = true, \ .lower_vector_cmp = true, \ .lower_cs_local_index_to_id = true, \ diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index d8ab3de114e..dfd5b4f3d12 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -720,7 +720,6 @@ static const nir_shader_compiler_options v3d_nir_options = { .lower_ldexp = true, .lower_mul_high = true, .lower_wpos_pntc = true, - .lower_rotate = true, .lower_to_scalar = true, .lower_int64_options = nir_lower_imul_2x32_64, .lower_fquantize2f16 = true, diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 23b7790b98e..e3b27cfdeb7 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2169,7 +2169,6 @@ static const nir_shader_compiler_options nir_options = { .lower_ldexp = true, .lower_fneg = true, .lower_ineg = true, - .lower_rotate = true, .lower_to_scalar = true, .lower_umax = true, .lower_umin = true, diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index a3b9c2ca9e5..3674f412062 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -1224,7 +1224,6 @@ zink_screen_init_compiler(struct zink_screen *screen) .lower_ldexp = true, .lower_mul_high = true, - .lower_rotate = true, .lower_uadd_carry = true, .lower_usub_borrow = true, .lower_uadd_sat = true, diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 669b83dbe2b..8a3999572ef 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -192,7 +192,8 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) nir_options->has_bfm = devinfo->ver >= 7; nir_options->has_bfi = devinfo->ver >= 7; - nir_options->lower_rotate = devinfo->ver < 11; + nir_options->has_rotate16 = devinfo->ver >= 11; + nir_options->has_rotate32 = devinfo->ver >= 11; nir_options->lower_bitfield_reverse = devinfo->ver < 7; nir_options->lower_find_lsb = devinfo->ver < 7; nir_options->lower_ifind_msb = devinfo->ver < 7; diff --git a/src/microsoft/compiler/nir_to_dxil.c b/src/microsoft/compiler/nir_to_dxil.c index e67a40b14bd..babdb525191 100644 --- a/src/microsoft/compiler/nir_to_dxil.c +++ b/src/microsoft/compiler/nir_to_dxil.c @@ -118,7 +118,6 @@ nir_options = { .lower_uadd_carry = true, .lower_usub_borrow = true, .lower_mul_high = true, - .lower_rotate = true, .lower_pack_half_2x16 = true, .lower_pack_unorm_4x8 = true, .lower_pack_snorm_4x8 = true, diff --git a/src/nouveau/codegen/nv50_ir_from_nir.cpp b/src/nouveau/codegen/nv50_ir_from_nir.cpp index a89342d7379..25ce4887e43 100644 --- a/src/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3465,7 +3465,7 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type) op.unify_interfaces = false; op.use_interpolated_input_intrinsics = true; op.lower_mul_2x32_64 = true; // TODO - op.lower_rotate = (chipset < NVISA_GV100_CHIPSET); + op.has_rotate32 = (chipset >= NVISA_GV100_CHIPSET); op.has_imul24 = false; op.has_fmulz = (chipset > NVISA_G80_CHIPSET); op.intel_vec4 = false; diff --git a/src/panfrost/compiler/bifrost_compile.h b/src/panfrost/compiler/bifrost_compile.h index 5d7b3710f2b..5473ae075cf 100644 --- a/src/panfrost/compiler/bifrost_compile.h +++ b/src/panfrost/compiler/bifrost_compile.h @@ -55,7 +55,6 @@ void bifrost_compile_shader_nir(nir_shader *nir, .lower_bitfield_insert = true, \ .lower_bitfield_extract = true, \ .lower_insert_byte = true, \ - .lower_rotate = true, \ \ /* Vertex ID is zero based in the traditional geometry flows, but not in \ * the memory-allocated IDVS flow introduced and used exclusively in \ diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h index 2b9c9cf334b..95d7be1abc4 100644 --- a/src/panfrost/midgard/midgard_compile.h +++ b/src/panfrost/midgard/midgard_compile.h @@ -73,7 +73,6 @@ static const nir_shader_compiler_options midgard_nir_options = { .lower_insert_byte = true, .lower_insert_word = true, .lower_ldexp = true, - .lower_rotate = true, .lower_pack_half_2x16 = true, .lower_pack_unorm_2x16 = true,