diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index b1178f23168..062167e0436 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -920,19 +920,18 @@ vec4_visitor::move_push_constants_to_pull_constants() { int pull_constant_loc[this->uniforms]; - /* Only allow 32 registers (256 uniform components) as push constants, - * which is the limit on gfx6. - * - * If changing this value, note the limitation about total_regs in - * brw_curbe.c. - */ - int max_uniform_components = 32 * 8; + const int max_uniform_components = push_length * 8; + if (this->uniforms * 4 <= max_uniform_components) return; assert(compiler->supports_pull_constants); assert(compiler->compact_params); + /* If we got here, we also can't have any push ranges */ + for (unsigned i = 0; i < 4; i++) + assert(prog_data->base.ubo_ranges[i].length == 0); + /* Make some sort of choice as to which uniforms get sent to pull * constants. We could potentially do something clever here like * look for the most infrequently used uniform vec4s, but leave @@ -1811,34 +1810,64 @@ vec4_vs_visitor::setup_attributes(int payload_reg) return payload_reg + vs_prog_data->nr_attribute_slots; } +void +vec4_visitor::setup_push_ranges() +{ + /* Only allow 32 registers (256 uniform components) as push constants, + * which is the limit on gfx6. + * + * If changing this value, note the limitation about total_regs in + * brw_curbe.c. + */ + const unsigned max_push_length = 32; + + push_length = DIV_ROUND_UP(prog_data->base.nr_params, 8); + push_length = MIN2(push_length, max_push_length); + + /* Shrink UBO push ranges so it all fits in max_push_length */ + for (unsigned i = 0; i < 4; i++) { + struct brw_ubo_range *range = &prog_data->base.ubo_ranges[i]; + + if (push_length + range->length > max_push_length) + range->length = max_push_length - push_length; + + push_length += range->length; + } + assert(push_length <= max_push_length); +} + int vec4_visitor::setup_uniforms(int reg) { - prog_data->base.dispatch_grf_start_reg = reg; + /* It's possible that uniform compaction will shrink further than expected + * so we re-compute the layout and set up our UBO push starts. + */ + const unsigned old_push_length = push_length; + push_length = DIV_ROUND_UP(prog_data->base.nr_params, 8); + for (unsigned i = 0; i < 4; i++) { + ubo_push_start[i] = push_length; + push_length += stage_prog_data->ubo_ranges[i].length; + } + assert(push_length <= old_push_length); + if (push_length < old_push_length) + assert(compiler->compact_params); /* The pre-gfx6 VS requires that some push constants get loaded no * matter what, or the GPU would hang. */ - if (devinfo->ver < 6 && this->uniforms == 0) { + if (devinfo->ver < 6 && push_length == 0) { brw_stage_prog_data_add_params(stage_prog_data, 4); for (unsigned int i = 0; i < 4; i++) { unsigned int slot = this->uniforms * 4 + i; stage_prog_data->param[slot] = BRW_PARAM_BUILTIN_ZERO; } - - this->uniforms++; - reg++; - } else { - reg += ALIGN(uniforms, 2) / 2; + push_length = 1; } - for (int i = 0; i < 4; i++) - reg += stage_prog_data->ubo_ranges[i].length; + prog_data->base.dispatch_grf_start_reg = reg; + prog_data->base.curb_read_length = push_length; - prog_data->base.curb_read_length = - reg - prog_data->base.dispatch_grf_start_reg; - - return reg; + return reg + push_length; } void @@ -2667,6 +2696,8 @@ vec4_visitor::run() if (shader_time_index >= 0) emit_shader_time_begin(); + setup_push_ranges(); + emit_prolog(); emit_nir_code(); diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h index b928239f1ca..f27e3d3c4ad 100644 --- a/src/intel/compiler/brw_vec4.h +++ b/src/intel/compiler/brw_vec4.h @@ -108,6 +108,8 @@ public: const char *current_annotation; int first_non_payload_grf; + unsigned ubo_push_start[4]; + unsigned push_length; unsigned int max_grf; brw_analysis live_analysis; brw_analysis performance_analysis; @@ -139,6 +141,7 @@ public: void move_push_constants_to_pull_constants(); void split_uniform_registers(); void pack_uniform_registers(); + void setup_push_ranges(); virtual void invalidate_analysis(brw::analysis_dependency_class c); void split_virtual_grfs(); bool opt_vector_float(); diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp index d790dbf7f4a..16590047c4d 100644 --- a/src/intel/compiler/brw_vec4_visitor.cpp +++ b/src/intel/compiler/brw_vec4_visitor.cpp @@ -1772,6 +1772,8 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, prog_data(prog_data), fail_msg(NULL), first_non_payload_grf(0), + ubo_push_start(), + push_length(0), live_analysis(this), performance_analysis(this), need_all_constants_in_pull_buffer(false), no_spills(no_spills),