diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 7f14d08af31..9bb36cf5c9b 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -2475,6 +2475,11 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, descriptors_state->max_sets_bound = MAX2(descriptors_state->max_sets_bound, firstSet + descriptorSetCount); + unsigned dynamic_offset_offset = 0; + for (unsigned i = 0; i < firstSet; i++) { + dynamic_offset_offset += layout->set[i].layout->dynamic_offset_size; + } + for (unsigned i = 0; i < descriptorSetCount; ++i) { unsigned idx = i + firstSet; TU_FROM_HANDLE(tu_descriptor_set, set, pDescriptorSets[i]); @@ -2494,7 +2499,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, uint32_t *src = set->dynamic_descriptors; uint32_t *dst = descriptors_state->dynamic_descriptors + - layout->set[idx].dynamic_offset_start / 4; + dynamic_offset_offset / 4; for (unsigned j = 0; j < set->layout->binding_count; j++) { struct tu_descriptor_set_binding_layout *binding = &set->layout->binding[j]; @@ -2550,15 +2555,17 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, } } } + + dynamic_offset_offset += layout->set[idx].layout->dynamic_offset_size; } assert(dyn_idx == dynamicOffsetCount); - if (layout->dynamic_offset_size) { + if (dynamic_offset_offset) { /* allocate and fill out dynamic descriptor set */ struct tu_cs_memory dynamic_desc_set; int reserved_set_idx = cmd->device->physical_device->reserved_set_idx; VkResult result = tu_cs_alloc(&cmd->sub_cs, - layout->dynamic_offset_size / (4 * A6XX_TEX_CONST_DWORDS), + dynamic_offset_offset / (4 * A6XX_TEX_CONST_DWORDS), A6XX_TEX_CONST_DWORDS, &dynamic_desc_set); if (result != VK_SUCCESS) { vk_command_buffer_set_error(&cmd->vk, result); @@ -2566,7 +2573,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, } memcpy(dynamic_desc_set.map, descriptors_state->dynamic_descriptors, - layout->dynamic_offset_size); + dynamic_offset_offset); assert(reserved_set_idx >= 0); /* reserved set must be bound */ descriptors_state->set_iova[reserved_set_idx] = dynamic_desc_set.iova | BINDLESS_DESCRIPTOR_64B; descriptors_state->dynamic_bound = true; diff --git a/src/freedreno/vulkan/tu_descriptor_set.cc b/src/freedreno/vulkan/tu_descriptor_set.cc index 3dab17978a1..5d106ed16d2 100644 --- a/src/freedreno/vulkan/tu_descriptor_set.cc +++ b/src/freedreno/vulkan/tu_descriptor_set.cc @@ -488,39 +488,15 @@ sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx, void tu_pipeline_layout_init(struct tu_pipeline_layout *layout) { - unsigned dynamic_offset_size = 0; - - for (uint32_t set = 0; set < layout->num_sets; set++) { - layout->set[set].dynamic_offset_start = dynamic_offset_size; - - if (layout->set[set].layout) - dynamic_offset_size += layout->set[set].layout->dynamic_offset_size; - } - - layout->dynamic_offset_size = dynamic_offset_size; - - /* We only care about INDEPENDENT_SETS for dynamic-offset descriptors, - * where all the descriptors from all the sets are combined into one set - * and we have to provide the dynamic_offset_start dynamically with fast - * linking. - */ - if (dynamic_offset_size == 0) { - layout->independent_sets = false; - } - struct mesa_sha1 ctx; _mesa_sha1_init(&ctx); for (unsigned s = 0; s < layout->num_sets; s++) { if (layout->set[s].layout) sha1_update_descriptor_set_layout(&ctx, layout->set[s].layout); - _mesa_sha1_update(&ctx, &layout->set[s].dynamic_offset_start, - sizeof(layout->set[s].dynamic_offset_start)); } _mesa_sha1_update(&ctx, &layout->num_sets, sizeof(layout->num_sets)); _mesa_sha1_update(&ctx, &layout->push_constant_size, sizeof(layout->push_constant_size)); - _mesa_sha1_update(&ctx, &layout->independent_sets, - sizeof(layout->independent_sets)); _mesa_sha1_final(&ctx, layout->sha1); } @@ -562,8 +538,6 @@ tu_CreatePipelineLayout(VkDevice _device, } layout->push_constant_size = align(layout->push_constant_size, 16); - layout->independent_sets = - pCreateInfo->flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT; tu_pipeline_layout_init(layout); diff --git a/src/freedreno/vulkan/tu_descriptor_set.h b/src/freedreno/vulkan/tu_descriptor_set.h index c272b084e06..92d47a953f0 100644 --- a/src/freedreno/vulkan/tu_descriptor_set.h +++ b/src/freedreno/vulkan/tu_descriptor_set.h @@ -93,14 +93,10 @@ struct tu_pipeline_layout { struct tu_descriptor_set_layout *layout; uint32_t size; - uint32_t dynamic_offset_start; } set[MAX_SETS]; - bool independent_sets; - uint32_t num_sets; uint32_t push_constant_size; - uint32_t dynamic_offset_size; unsigned char sha1[20]; }; diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 6817ff5291f..b4080598b7a 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -168,7 +168,7 @@ tu6_emit_load_state(struct tu_device *device, case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: assert(device->physical_device->reserved_set_idx >= 0); base = device->physical_device->reserved_set_idx; - offset = (layout->set[i].dynamic_offset_start + + offset = (pipeline->program.dynamic_descriptor_offsets[i] + binding->dynamic_offset_offset) / 4; FALLTHROUGH; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: @@ -205,7 +205,7 @@ tu6_emit_load_state(struct tu_device *device, case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: assert(device->physical_device->reserved_set_idx >= 0); base = device->physical_device->reserved_set_idx; - offset = (layout->set[i].dynamic_offset_start + + offset = (pipeline->program.dynamic_descriptor_offsets[i] + binding->dynamic_offset_offset) / 4; FALLTHROUGH; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { @@ -405,7 +405,7 @@ static void tu6_emit_dynamic_offset(struct tu_cs *cs, const struct ir3_shader_variant *xs, const struct tu_shader *shader, - struct tu_pipeline_builder *builder) + const struct tu_program_state *program) { const struct tu_physical_device *phys_dev = cs->device->physical_device; if (!xs || shader->const_state.dynamic_offset_loc == UINT32_MAX) @@ -422,8 +422,8 @@ tu6_emit_dynamic_offset(struct tu_cs *cs, for (unsigned i = 0; i < phys_dev->usable_sets; i++) { unsigned dynamic_offset_start = - builder->layout.set[i].dynamic_offset_start / (A6XX_TEX_CONST_DWORDS * 4); - tu_cs_emit(cs, i < builder->layout.num_sets ? dynamic_offset_start : 0); + program->dynamic_descriptor_offsets[i] / (A6XX_TEX_CONST_DWORDS * 4); + tu_cs_emit(cs, dynamic_offset_start); } } @@ -1151,14 +1151,14 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs, template static void tu6_emit_program_config(struct tu_cs *cs, - struct tu_pipeline *pipeline, - struct tu_pipeline_builder *builder, + const struct tu_program_state *prog, + struct tu_shader **shaders, const struct ir3_shader_variant **variants) { STATIC_ASSERT(MESA_SHADER_VERTEX == 0); bool shared_consts_enable = - pipeline->program.shared_consts.type == IR3_PUSH_CONSTS_SHARED; + prog->shared_consts.type == IR3_PUSH_CONSTS_SHARED; tu6_emit_shared_consts_enable(cs, shared_consts_enable); tu_cs_emit_regs(cs, HLSQ_INVALIDATE_CMD(CHIP, @@ -1178,7 +1178,7 @@ tu6_emit_program_config(struct tu_cs *cs, for (size_t stage_idx = MESA_SHADER_VERTEX; stage_idx <= MESA_SHADER_FRAGMENT; stage_idx++) { gl_shader_stage stage = (gl_shader_stage) stage_idx; - tu6_emit_dynamic_offset(cs, variants[stage], pipeline->shaders[stage], builder); + tu6_emit_dynamic_offset(cs, variants[stage], shaders[stage], prog); } const struct ir3_shader_variant *vs = variants[MESA_SHADER_VERTEX]; @@ -2245,7 +2245,6 @@ tu_pipeline_builder_parse_layout(struct tu_pipeline_builder *builder, } builder->layout.push_constant_size = library->push_constant_size; - builder->layout.independent_sets |= library->independent_sets; } tu_pipeline_layout_init(&builder->layout); @@ -2261,7 +2260,6 @@ tu_pipeline_builder_parse_layout(struct tu_pipeline_builder *builder, vk_descriptor_set_layout_ref(&library->layouts[i]->vk); } library->push_constant_size = builder->layout.push_constant_size; - library->independent_sets = builder->layout.independent_sets; } } @@ -2294,6 +2292,8 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, uint32_t safe_variants = ir3_trim_constlen(variants, builder->device->compiler); + unsigned dynamic_descriptor_sizes[MAX_SETS] = { }; + for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(variants); stage = (gl_shader_stage) (stage+1)) { if (pipeline->shaders[stage]) { @@ -2303,6 +2303,13 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, } else { draw_states[stage] = pipeline->shaders[stage]->state; } + + for (unsigned i = 0; i < MAX_SETS; i++) { + if (pipeline->shaders[stage]->dynamic_descriptor_sizes[i] >= 0) { + dynamic_descriptor_sizes[i] = + pipeline->shaders[stage]->dynamic_descriptor_sizes[i]; + } + } } } @@ -2322,6 +2329,13 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, } } + unsigned dynamic_descriptor_offset = 0; + for (unsigned i = 0; i < MAX_SETS; i++) { + pipeline->program.dynamic_descriptor_offsets[i] = + dynamic_descriptor_offset; + dynamic_descriptor_offset += dynamic_descriptor_sizes[i]; + } + /* Emit HLSQ_xS_CNTL/HLSQ_SP_xS_CONFIG *first*, before emitting anything * else that could depend on that state (like push constants) * @@ -2334,7 +2348,8 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, * and draw passes. */ tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs); - tu6_emit_program_config(&prog_cs, pipeline, builder, variants); + tu6_emit_program_config(&prog_cs, &pipeline->program, + pipeline->shaders, variants); pipeline->program.config_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs); pipeline->program.vs_state = draw_states[MESA_SHADER_VERTEX]; diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index a96e2cd4fea..523d025e6fe 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -99,6 +99,8 @@ struct tu_program_state struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES]; + unsigned dynamic_descriptor_offsets[MAX_SETS]; + bool per_view_viewport; }; diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 2179a64f8e9..fd4118a04a8 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -188,8 +188,18 @@ lower_vulkan_resource_index(struct tu_device *dev, nir_builder *b, switch (binding_layout->type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - if (layout->independent_sets) { + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { + int offset = 0; + for (unsigned i = 0; i < set; i++) { + if (shader->dynamic_descriptor_sizes[i] >= 0) { + offset += shader->dynamic_descriptor_sizes[i]; + } else { + offset = -1; + break; + } + } + + if (offset < 0) { /* With independent sets, we don't know * layout->set[set].dynamic_offset_start until after link time which * with fast linking means after the shader is compiled. We have to @@ -201,12 +211,13 @@ lower_vulkan_resource_index(struct tu_device *dev, nir_builder *b, .base = shader->const_state.dynamic_offset_loc + set); base = nir_iadd(b, base, dynamic_offset_start); } else { - base = nir_imm_int(b, (layout->set[set].dynamic_offset_start + + base = nir_imm_int(b, (offset + binding_layout->dynamic_offset_offset) / (4 * A6XX_TEX_CONST_DWORDS)); } assert(dev->physical_device->reserved_set_idx >= 0); set = dev->physical_device->reserved_set_idx; break; + } default: base = nir_imm_int(b, binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)); break; @@ -749,7 +760,21 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev, align(DIV_ROUND_UP(const_state->push_consts.dwords, 4), dev->compiler->const_upload_unit); - if (layout->independent_sets) { + bool unknown_dynamic_size = false; + bool unknown_dynamic_offset = false; + for (unsigned i = 0; i < layout->num_sets; i++) { + if (tu_shader->dynamic_descriptor_sizes[i] == -1) { + unknown_dynamic_size = true; + } else if (unknown_dynamic_size && + tu_shader->dynamic_descriptor_sizes[i] > 0) { + /* If there is an unknown size followed by a known size, then we may + * need to dynamically determine the offset when linking. + */ + unknown_dynamic_offset = true; + } + } + + if (unknown_dynamic_offset) { const_state->dynamic_offset_loc = reserved_consts_vec4 * 4; assert(dev->physical_device->reserved_set_idx >= 0); reserved_consts_vec4 += DIV_ROUND_UP(dev->physical_device->reserved_set_idx, 4); @@ -2121,6 +2146,8 @@ tu_shader_serialize(struct vk_pipeline_cache_object *object, container_of(object, struct tu_shader, base); blob_write_bytes(blob, &shader->const_state, sizeof(shader->const_state)); + blob_write_bytes(blob, &shader->dynamic_descriptor_sizes, + sizeof(shader->dynamic_descriptor_sizes)); blob_write_uint32(blob, shader->view_mask); blob_write_uint8(blob, shader->active_desc_sets); @@ -2133,6 +2160,8 @@ tu_shader_serialize(struct vk_pipeline_cache_object *object, blob_write_uint8(blob, 0); } + + switch (shader->variant->type) { case MESA_SHADER_TESS_EVAL: blob_write_bytes(blob, &shader->tes, sizeof(shader->tes)); @@ -2162,6 +2191,8 @@ tu_shader_deserialize(struct vk_pipeline_cache *cache, return NULL; blob_copy_bytes(blob, &shader->const_state, sizeof(shader->const_state)); + blob_copy_bytes(blob, &shader->dynamic_descriptor_sizes, + sizeof(shader->dynamic_descriptor_sizes)); shader->view_mask = blob_read_uint32(blob); shader->active_desc_sets = blob_read_uint8(blob); @@ -2305,6 +2336,15 @@ tu_shader_create(struct tu_device *dev, nir->info.stage == MESA_SHADER_GEOMETRY) tu_gather_xfb_info(nir, &so_info); + for (unsigned i = 0; i < layout->num_sets; i++) { + if (layout->set[i].layout) { + shader->dynamic_descriptor_sizes[i] = + layout->set[i].layout->dynamic_offset_size; + } else { + shader->dynamic_descriptor_sizes[i] = -1; + } + } + unsigned reserved_consts_vec4 = 0; NIR_PASS_V(nir, tu_lower_io, dev, shader, layout, &reserved_consts_vec4); @@ -2464,6 +2504,9 @@ tu_empty_fs_create(struct tu_device *dev, struct tu_shader **shader, if (fragment_density_map) (*shader)->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ; + for (unsigned i = 0; i < MAX_SETS; i++) + (*shader)->dynamic_descriptor_sizes[i] = -1; + struct ir3_shader *ir3_shader = ir3_shader_from_nir(dev->compiler, fs_b.shader, &options, &so_info); (*shader)->variant = ir3_shader_create_variant(ir3_shader, &key, false); diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index 1d6ec2e4af8..eed38243e9d 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -13,6 +13,7 @@ #include "tu_common.h" #include "tu_cs.h" #include "tu_suballoc.h" +#include "tu_descriptor_set.h" struct tu_inline_ubo { @@ -69,6 +70,13 @@ struct tu_shader uint32_t view_mask; uint8_t active_desc_sets; + /* The dynamic buffer descriptor size for descriptor sets that we know + * about. This is used when linking to piece together the sizes and from + * there calculate the offsets. It's -1 if we don't know because the + * descriptor set layout is NULL. + */ + int dynamic_descriptor_sizes[MAX_SETS]; + union { struct { unsigned patch_type;