From 212810ac8addb0e883bf53741efa123bce8b5c36 Mon Sep 17 00:00:00 2001 From: Rohan Garg Date: Wed, 17 May 2023 16:44:17 +0200 Subject: [PATCH] intel: infer scalar'ness locally for brw_postprocess_nir Signed-off-by: Rohan Garg Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/compiler/brw_fs.cpp | 6 ++--- src/intel/compiler/brw_mesh.cpp | 4 +-- src/intel/compiler/brw_nir.c | 31 +++++++++++----------- src/intel/compiler/brw_nir.h | 4 +-- src/intel/compiler/brw_nir_rt.c | 2 +- src/intel/compiler/brw_shader.cpp | 2 +- src/intel/compiler/brw_vec4.cpp | 2 +- src/intel/compiler/brw_vec4_gs_visitor.cpp | 2 +- src/intel/compiler/brw_vec4_tcs.cpp | 2 +- 9 files changed, 27 insertions(+), 28 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 59637d36fb2..e8b795a976d 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -7484,7 +7484,7 @@ brw_compile_fs(const struct brw_compiler *compiler, } NIR_PASS(_, nir, brw_nir_move_interpolation_to_top); - brw_postprocess_nir(nir, compiler, true, debug_enabled, + brw_postprocess_nir(nir, compiler, debug_enabled, key->base.robust_buffer_access); brw_nir_populate_wm_prog_data(nir, compiler->devinfo, key, prog_data, @@ -7849,7 +7849,7 @@ brw_compile_cs(const struct brw_compiler *compiler, NIR_PASS(_, shader, nir_opt_constant_folding); NIR_PASS(_, shader, nir_opt_dce); - brw_postprocess_nir(shader, compiler, true, debug_enabled, + brw_postprocess_nir(shader, compiler, debug_enabled, key->base.robust_buffer_access); v[simd] = std::make_unique(compiler, params->log_data, mem_ctx, &key->base, @@ -7968,7 +7968,7 @@ compile_single_bs(const struct brw_compiler *compiler, void *log_data, const unsigned max_dispatch_width = 16; brw_nir_apply_key(shader, compiler, &key->base, max_dispatch_width, true); - brw_postprocess_nir(shader, compiler, true, debug_enabled, + brw_postprocess_nir(shader, compiler, debug_enabled, key->base.robust_buffer_access); brw_simd_selection_state simd_state{ diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp index 05bd938412d..c6fb7a95beb 100644 --- a/src/intel/compiler/brw_mesh.cpp +++ b/src/intel/compiler/brw_mesh.cpp @@ -327,7 +327,7 @@ brw_compile_task(const struct brw_compiler *compiler, NIR_PASS(_, shader, brw_nir_lower_load_uniforms); NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width); - brw_postprocess_nir(shader, compiler, true /* is_scalar */, debug_enabled, + brw_postprocess_nir(shader, compiler, debug_enabled, key->base.robust_buffer_access); brw_nir_adjust_payload(shader, compiler); @@ -1036,7 +1036,7 @@ brw_compile_mesh(const struct brw_compiler *compiler, NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width); - brw_postprocess_nir(shader, compiler, true /* is_scalar */, debug_enabled, + brw_postprocess_nir(shader, compiler, debug_enabled, key->base.robust_buffer_access); brw_nir_adjust_payload(shader, compiler); diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 45c5df4562f..c87d4b6ebbc 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -628,14 +628,14 @@ brw_nir_lower_fs_outputs(nir_shader *nir) }) void -brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, - bool is_scalar) +brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler) { bool progress; unsigned lower_flrp = (nir->options->lower_flrp16 ? 16 : 0) | (nir->options->lower_flrp32 ? 32 : 0) | (nir->options->lower_flrp64 ? 64 : 0); + const bool is_scalar = compiler->scalar_stage[nir->info.stage]; do { progress = false; @@ -979,7 +979,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir, OPT(nir_split_var_copies); OPT(nir_split_struct_vars, nir_var_function_temp); - brw_nir_optimize(nir, compiler, is_scalar); + brw_nir_optimize(nir, compiler); OPT(nir_lower_doubles, opts->softfp64, nir->options->lower_doubles_options); if (OPT(nir_lower_int64_float_conversions)) { @@ -1053,7 +1053,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir, nir_lower_direct_array_deref_of_vec_load); /* Get rid of split copies */ - brw_nir_optimize(nir, compiler, is_scalar); + brw_nir_optimize(nir, compiler); } static bool @@ -1215,12 +1215,12 @@ brw_nir_link_shaders(const struct brw_compiler *compiler, if (p_is_scalar && c_is_scalar) { NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out); NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in); - brw_nir_optimize(producer, compiler, p_is_scalar); - brw_nir_optimize(consumer, compiler, c_is_scalar); + brw_nir_optimize(producer, compiler); + brw_nir_optimize(consumer, compiler); } if (nir_link_opt_varyings(producer, consumer)) - brw_nir_optimize(consumer, compiler, c_is_scalar); + brw_nir_optimize(consumer, compiler); NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); @@ -1249,8 +1249,8 @@ brw_nir_link_shaders(const struct brw_compiler *compiler, brw_nir_no_indirect_mask(compiler, consumer->info.stage), UINT32_MAX); - brw_nir_optimize(producer, compiler, p_is_scalar); - brw_nir_optimize(consumer, compiler, c_is_scalar); + brw_nir_optimize(producer, compiler); + brw_nir_optimize(consumer, compiler); if (producer->info.stage == MESA_SHADER_MESH && consumer->info.stage == MESA_SHADER_FRAGMENT) { @@ -1533,10 +1533,11 @@ nir_shader_has_local_variables(const nir_shader *nir) */ void brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, - bool is_scalar, bool debug_enabled, + bool debug_enabled, bool robust_buffer_access) { const struct intel_device_info *devinfo = compiler->devinfo; + const bool is_scalar = compiler->scalar_stage[nir->info.stage]; UNUSED bool progress; /* Written by OPT */ @@ -1561,21 +1562,21 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, if (gl_shader_stage_can_set_fragment_shading_rate(nir->info.stage)) NIR_PASS(_, nir, brw_nir_lower_shading_rate_output); - brw_nir_optimize(nir, compiler, is_scalar); + brw_nir_optimize(nir, compiler); if (is_scalar && nir_shader_has_local_variables(nir)) { OPT(nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_natural_size_align_bytes); OPT(nir_lower_explicit_io, nir_var_function_temp, nir_address_format_32bit_offset); - brw_nir_optimize(nir, compiler, is_scalar); + brw_nir_optimize(nir, compiler); } brw_vectorize_lower_mem_access(nir, compiler, is_scalar, robust_buffer_access); if (OPT(nir_lower_int64)) - brw_nir_optimize(nir, compiler, is_scalar); + brw_nir_optimize(nir, compiler); if (devinfo->ver >= 6) { /* Try and fuse multiply-adds, if successful, run shrink_vectors to @@ -1676,7 +1677,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, OPT(nir_lower_subgroups, &subgroups_options); if (OPT(nir_lower_int64)) - brw_nir_optimize(nir, compiler, is_scalar); + brw_nir_optimize(nir, compiler); } /* Clean up LCSSA phis */ @@ -1842,7 +1843,7 @@ brw_nir_apply_key(nir_shader *nir, OPT(brw_nir_limit_trig_input_range_workaround); if (progress) - brw_nir_optimize(nir, compiler, is_scalar); + brw_nir_optimize(nir, compiler); } enum brw_conditional_mod diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 7cdd4dfe90f..5ba1c99f383 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -148,7 +148,6 @@ bool brw_nir_lower_mem_access_bit_sizes(nir_shader *shader, void brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, - bool is_scalar, bool debug_enabled, bool robust_buffer_access); @@ -200,8 +199,7 @@ bool brw_nir_blockify_uniform_loads(nir_shader *shader, const struct intel_device_info *devinfo); void brw_nir_optimize(nir_shader *nir, - const struct brw_compiler *compiler, - bool is_scalar); + const struct brw_compiler *compiler); nir_shader *brw_nir_create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler, diff --git a/src/intel/compiler/brw_nir_rt.c b/src/intel/compiler/brw_nir_rt.c index a363965b9a9..62315233682 100644 --- a/src/intel/compiler/brw_nir_rt.c +++ b/src/intel/compiler/brw_nir_rt.c @@ -534,7 +534,7 @@ brw_nir_create_raygen_trampoline(const struct brw_compiler *compiler, NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics); - brw_nir_optimize(nir, compiler, true); + brw_nir_optimize(nir, compiler); return nir; } diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index bbca3483371..1dae72ed705 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -1310,7 +1310,7 @@ brw_compile_tes(const struct brw_compiler *compiler, brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar); brw_nir_lower_tes_inputs(nir, input_vue_map); brw_nir_lower_vue_outputs(nir); - brw_postprocess_nir(nir, compiler, is_scalar, debug_enabled, + brw_postprocess_nir(nir, compiler, debug_enabled, key->base.robust_buffer_access); brw_compute_vue_map(devinfo, &prog_data->base.vue_map, diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index eda343b48ed..d5fc1909301 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -2561,7 +2561,7 @@ brw_compile_vs(const struct brw_compiler *compiler, brw_nir_lower_vs_inputs(nir, params->edgeflag_is_last, key->gl_attrib_wa_flags); brw_nir_lower_vue_outputs(nir); - brw_postprocess_nir(nir, compiler, is_scalar, debug_enabled, + brw_postprocess_nir(nir, compiler, debug_enabled, key->base.robust_buffer_access); prog_data->base.clip_distance_mask = diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index d3fc8bb401e..04d470f6f25 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -616,7 +616,7 @@ brw_compile_gs(const struct brw_compiler *compiler, brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar); brw_nir_lower_vue_inputs(nir, &c.input_vue_map); brw_nir_lower_vue_outputs(nir); - brw_postprocess_nir(nir, compiler, is_scalar, debug_enabled, + brw_postprocess_nir(nir, compiler, debug_enabled, key->base.robust_buffer_access); prog_data->base.clip_distance_mask = diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp index 90591afa454..08b45345139 100644 --- a/src/intel/compiler/brw_vec4_tcs.cpp +++ b/src/intel/compiler/brw_vec4_tcs.cpp @@ -388,7 +388,7 @@ brw_compile_tcs(const struct brw_compiler *compiler, if (compiler->use_tcs_multi_patch) brw_nir_clamp_per_vertex_loads(nir, key->input_vertices); - brw_postprocess_nir(nir, compiler, is_scalar, debug_enabled, + brw_postprocess_nir(nir, compiler, debug_enabled, key->base.robust_buffer_access); bool has_primitive_id =