From 0e244d56e3922bcfc92d792c45b2206882988118 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sun, 6 Aug 2023 15:46:12 +0300 Subject: [PATCH] intel/fs: track more steps with INTEL_DEBUG=optimizer One particular nice thing to have is the first generated backend IR before validation. Especially if you made a mistake in the NIR translation, you can at least look at it before validation tells you off. Then the last 2 steps of the optimize() function can be interesting to look at. Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_fs.cpp | 63 ++++++++++++------- src/intel/compiler/brw_fs.h | 7 ++- .../compiler/brw_lower_logical_sends.cpp | 8 ++- 3 files changed, 51 insertions(+), 27 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 3683a180b67..8c981f01159 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2540,10 +2540,11 @@ fs_visitor::get_pull_locs(const fs_reg &src, * Replace UNIFORM register file access with either UNIFORM_PULL_CONSTANT_LOAD * or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs. */ -void +bool fs_visitor::lower_constant_loads() { unsigned index, pull_index; + bool progress = false; foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { /* Set up the annotation tracking for new generated instructions. */ @@ -2581,6 +2582,8 @@ fs_visitor::lower_constant_loads() inst->src[i].nr = dst.nr; inst->src[i].offset = (base & (block_sz - 1)) + inst->src[i].offset % 4; + + progress = true; } if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && @@ -2595,9 +2598,13 @@ fs_visitor::lower_constant_loads() inst->src[1], pull_index * 4, 4); inst->remove(block); + + progress = true; } } invalidate_analysis(DEPENDENCY_INSTRUCTIONS); + + return progress; } static uint64_t @@ -6185,9 +6192,28 @@ fs_visitor::invalidate_analysis(brw::analysis_dependency_class c) regpressure_analysis.invalidate(c); } +void +fs_visitor::debug_optimizer(const char *pass_name, + int iteration, int pass_num) const +{ + if (!INTEL_DEBUG(DEBUG_OPTIMIZER)) + return; + + char *filename; + int ret = asprintf(&filename, "%s%d-%s-%02d-%02d-%s", + stage_abbrev, dispatch_width, nir->info.name, + iteration, pass_num, pass_name); + if (ret == -1) + return; + dump_instructions(filename); + free(filename); +} + void fs_visitor::optimize() { + debug_optimizer("start", 0, 0); + /* Start by validating the shader we currently have. */ validate(); @@ -6204,22 +6230,16 @@ fs_visitor::optimize() */ bld = fs_builder(this, 64); - assign_constant_locations(); - lower_constant_loads(); - - validate(); + bool progress = false; + int iteration = 0; + int pass_num = 0; #define OPT(pass, args...) ({ \ pass_num++; \ bool this_progress = pass(args); \ \ - if (INTEL_DEBUG(DEBUG_OPTIMIZER) && this_progress) { \ - char filename[64]; \ - snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass, \ - stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \ - \ - dump_instructions(filename); \ - } \ + if (this_progress) \ + debug_optimizer(#pass, iteration, pass_num); \ \ validate(); \ \ @@ -6227,17 +6247,10 @@ fs_visitor::optimize() this_progress; \ }) - if (INTEL_DEBUG(DEBUG_OPTIMIZER)) { - char filename[64]; - snprintf(filename, 64, "%s%d-%s-00-00-start", - stage_abbrev, dispatch_width, nir->info.name); + assign_constant_locations(); + OPT(lower_constant_loads); - dump_instructions(filename); - } - - bool progress = false; - int iteration = 0; - int pass_num = 0; + validate(); OPT(split_virtual_grfs); @@ -6360,9 +6373,9 @@ fs_visitor::optimize() OPT(fixup_sends_duplicate_payload); - lower_uniform_pull_constant_loads(); + OPT(lower_uniform_pull_constant_loads); - lower_find_live_channel(); + OPT(lower_find_live_channel); validate(); } @@ -6733,6 +6746,8 @@ fs_visitor::allocate_registers(bool allow_spilling) if (needs_register_pressure) shader_stats.max_register_pressure = compute_max_register_pressure(); + debug_optimizer("pre_register_allocate", 99, 99); + bool spill_all = allow_spilling && INTEL_DEBUG(DEBUG_SPILL_FS); /* Before we schedule anything, stash off the instruction order as an array diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 25c7255198d..0098c73af4f 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -254,7 +254,7 @@ public: void assign_constant_locations(); bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index, unsigned *out_pull_index); - void lower_constant_loads(); + bool lower_constant_loads(); virtual void invalidate_analysis(brw::analysis_dependency_class c); void validate(); bool opt_algebraic(); @@ -286,7 +286,7 @@ public: void vfail(const char *msg, va_list args); void fail(const char *msg, ...); void limit_dispatch_width(unsigned n, const char *msg); - void lower_uniform_pull_constant_loads(); + bool lower_uniform_pull_constant_loads(); bool lower_load_payload(); bool lower_pack(); bool lower_regioning(); @@ -571,6 +571,9 @@ private: void lower_mulh_inst(fs_inst *inst, bblock_t *block); unsigned workgroup_size() const; + + void debug_optimizer(const char *pass_name, + int iteration, int pass_num) const; }; /** diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index e247ce89369..2405fb4f6f4 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -3033,9 +3033,11 @@ fs_visitor::lower_logical_sends() * mask, since a later instruction will use one of the result channels as a * source operand for all 8 or 16 of its channels. */ -void +bool fs_visitor::lower_uniform_pull_constant_loads() { + bool progress = false; + foreach_block_and_inst (block, fs_inst, inst, cfg) { if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD) continue; @@ -3125,5 +3127,9 @@ fs_visitor::lower_uniform_pull_constant_loads() inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->ver) + 1; inst->mlen = 1; } + + progress = true; } + + return progress; }