radv: Use shader part caching helpers for VS prolog and PS/TCS epilog.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26028>
This commit is contained in:
Tatsuyuki Ishi 2023-11-14 17:17:28 +09:00 committed by Marge Bot
parent 611545fbfe
commit 538ca7801a
4 changed files with 140 additions and 208 deletions

View file

@ -291,6 +291,33 @@ radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, ui
radv_write_data(cmd_buffer, engine_sel, va, size / 4, zeroes, false);
}
static void
radv_cmd_buffer_finish_shader_part_cache(struct radv_cmd_buffer *cmd_buffer)
{
ralloc_free(cmd_buffer->vs_prologs.table);
ralloc_free(cmd_buffer->ps_epilogs.table);
ralloc_free(cmd_buffer->tcs_epilogs.table);
}
static bool
radv_cmd_buffer_init_shader_part_cache(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer)
{
if (device->vs_prologs.ops) {
if (!_mesa_set_init(&cmd_buffer->vs_prologs, NULL, device->vs_prologs.ops->hash, device->vs_prologs.ops->equals))
return false;
}
if (device->tcs_epilogs.ops) {
if (!_mesa_set_init(&cmd_buffer->tcs_epilogs, NULL, device->tcs_epilogs.ops->hash,
device->tcs_epilogs.ops->equals))
return false;
}
if (device->ps_epilogs.ops) {
if (!_mesa_set_init(&cmd_buffer->ps_epilogs, NULL, device->ps_epilogs.ops->hash, device->ps_epilogs.ops->equals))
return false;
}
return true;
}
static void
radv_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
{
@ -315,6 +342,8 @@ radv_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
if (cmd_buffer->transfer.copy_temp)
cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->transfer.copy_temp);
radv_cmd_buffer_finish_shader_part_cache(cmd_buffer);
for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
struct radv_descriptor_set_header *set = &cmd_buffer->descriptors[i].push_set.set;
free(set->mapped_ptr);
@ -346,6 +375,11 @@ radv_create_cmd_buffer(struct vk_command_pool *pool, struct vk_command_buffer **
return result;
}
if (!radv_cmd_buffer_init_shader_part_cache(device, cmd_buffer)) {
radv_destroy_cmd_buffer(&cmd_buffer->vk);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
list_inithead(&cmd_buffer->upload.list);
cmd_buffer->device = device;
@ -3708,22 +3742,6 @@ radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_
return start_index + offset_from_start_index + first;
}
uint32_t
radv_hash_vs_prolog(const void *key_)
{
const struct radv_vs_prolog_key *key = key_;
return _mesa_hash_data(key, sizeof(*key));
}
bool
radv_cmp_vs_prolog(const void *a_, const void *b_)
{
const struct radv_vs_prolog_key *a = a_;
const struct radv_vs_prolog_key *b = b_;
return memcmp(a, b, sizeof(*a)) == 0;
}
static struct radv_shader_part *
lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs_shader, uint32_t *nontrivial_divisors)
{
@ -3819,36 +3837,7 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v
key.next_stage = vs_shader->info.stage;
}
uint32_t hash = radv_hash_vs_prolog(&key);
u_rwlock_rdlock(&device->vs_prologs_lock);
struct hash_entry *prolog_entry = _mesa_hash_table_search_pre_hashed(device->vs_prologs, hash, &key);
u_rwlock_rdunlock(&device->vs_prologs_lock);
if (!prolog_entry) {
u_rwlock_wrlock(&device->vs_prologs_lock);
prolog_entry = _mesa_hash_table_search_pre_hashed(device->vs_prologs, hash, &key);
if (prolog_entry) {
u_rwlock_wrunlock(&device->vs_prologs_lock);
return prolog_entry->data;
}
prolog = radv_create_vs_prolog(device, &key);
struct radv_vs_prolog_key *key2 = malloc(sizeof(key));
if (!prolog || !key2) {
radv_shader_part_unref(device, prolog);
free(key2);
u_rwlock_wrunlock(&device->vs_prologs_lock);
return NULL;
}
memcpy(key2, &key, sizeof(key));
_mesa_hash_table_insert_pre_hashed(device->vs_prologs, hash, key2, prolog);
u_rwlock_wrunlock(&device->vs_prologs_lock);
return prolog;
}
return prolog_entry->data;
return radv_shader_part_cache_get(device, &device->vs_prologs, &cmd_buffer->vs_prologs, &key);
}
static void
@ -4179,28 +4168,12 @@ radv_emit_color_blend(struct radv_cmd_buffer *cmd_buffer)
}
}
uint32_t
radv_hash_ps_epilog(const void *key_)
{
const struct radv_ps_epilog_key *key = key_;
return _mesa_hash_data(key, sizeof(*key));
}
bool
radv_cmp_ps_epilog(const void *a_, const void *b_)
{
const struct radv_ps_epilog_key *a = a_;
const struct radv_ps_epilog_key *b = b_;
return memcmp(a, b, sizeof(*a)) == 0;
}
static struct radv_shader_part *
lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_rendering_state *render = &cmd_buffer->state.render;
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
struct radv_device *device = cmd_buffer->device;
struct radv_shader_part *epilog = NULL;
struct radv_ps_epilog_state state = {0};
state.color_attachment_count = render->color_att_count;
@ -4232,51 +4205,7 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer)
}
struct radv_ps_epilog_key key = radv_generate_ps_epilog_key(device, &state, true);
uint32_t hash = radv_hash_ps_epilog(&key);
u_rwlock_rdlock(&device->ps_epilogs_lock);
struct hash_entry *epilog_entry = _mesa_hash_table_search_pre_hashed(device->ps_epilogs, hash, &key);
u_rwlock_rdunlock(&device->ps_epilogs_lock);
if (!epilog_entry) {
u_rwlock_wrlock(&device->ps_epilogs_lock);
epilog_entry = _mesa_hash_table_search_pre_hashed(device->ps_epilogs, hash, &key);
if (epilog_entry) {
u_rwlock_wrunlock(&device->ps_epilogs_lock);
return epilog_entry->data;
}
epilog = radv_create_ps_epilog(device, &key, NULL);
struct radv_ps_epilog_key *key2 = malloc(sizeof(*key2));
if (!epilog || !key2) {
radv_shader_part_unref(device, epilog);
free(key2);
u_rwlock_wrunlock(&device->ps_epilogs_lock);
return NULL;
}
memcpy(key2, &key, sizeof(*key2));
_mesa_hash_table_insert_pre_hashed(device->ps_epilogs, hash, key2, epilog);
u_rwlock_wrunlock(&device->ps_epilogs_lock);
return epilog;
}
return epilog_entry->data;
}
uint32_t
radv_hash_tcs_epilog(const void *key_)
{
const struct radv_tcs_epilog_key *key = key_;
return _mesa_hash_data(key, sizeof(*key));
}
bool
radv_cmp_tcs_epilog(const void *a_, const void *b_)
{
const struct radv_tcs_epilog_key *a = a_;
const struct radv_tcs_epilog_key *b = b_;
return memcmp(a, b, sizeof(*a)) == 0;
return radv_shader_part_cache_get(device, &device->ps_epilogs, &cmd_buffer->ps_epilogs, &key);
}
static struct radv_shader_part *
@ -4285,7 +4214,6 @@ lookup_tcs_epilog(struct radv_cmd_buffer *cmd_buffer)
const struct radv_shader *tcs = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL];
const struct radv_shader *tes = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL);
struct radv_device *device = cmd_buffer->device;
struct radv_shader_part *epilog = NULL;
struct radv_tcs_epilog_key key = {
.primitive_mode = tes->info.tes._primitive_mode,
@ -4293,36 +4221,7 @@ lookup_tcs_epilog(struct radv_cmd_buffer *cmd_buffer)
.tcs_out_patch_fits_subgroup = tcs->info.wave_size % tcs->info.tcs.tcs_vertices_out == 0,
};
uint32_t hash = radv_hash_tcs_epilog(&key);
u_rwlock_rdlock(&device->tcs_epilogs_lock);
struct hash_entry *epilog_entry = _mesa_hash_table_search_pre_hashed(device->tcs_epilogs, hash, &key);
u_rwlock_rdunlock(&device->tcs_epilogs_lock);
if (!epilog_entry) {
u_rwlock_wrlock(&device->tcs_epilogs_lock);
epilog_entry = _mesa_hash_table_search_pre_hashed(device->tcs_epilogs, hash, &key);
if (epilog_entry) {
u_rwlock_wrunlock(&device->tcs_epilogs_lock);
return epilog_entry->data;
}
epilog = radv_create_tcs_epilog(device, &key);
struct radv_tcs_epilog_key *key2 = malloc(sizeof(*key2));
if (!epilog || !key2) {
radv_shader_part_unref(device, epilog);
free(key2);
u_rwlock_wrunlock(&device->tcs_epilogs_lock);
return NULL;
}
memcpy(key2, &key, sizeof(*key2));
_mesa_hash_table_insert_pre_hashed(device->tcs_epilogs, hash, key2, epilog);
u_rwlock_wrunlock(&device->tcs_epilogs_lock);
return epilog;
}
return epilog_entry->data;
return radv_shader_part_cache_get(device, &device->tcs_epilogs, &cmd_buffer->tcs_epilogs, &key);
}
static void

View file

@ -156,12 +156,39 @@ radv_device_finish_border_color(struct radv_device *device)
}
}
static struct radv_shader_part *
_radv_create_vs_prolog(struct radv_device *device, const void *_key)
{
struct radv_vs_prolog_key *key = (struct radv_vs_prolog_key *)_key;
return radv_create_vs_prolog(device, key);
}
static uint32_t
radv_hash_vs_prolog(const void *key_)
{
const struct radv_vs_prolog_key *key = key_;
return _mesa_hash_data(key, sizeof(*key));
}
static bool
radv_cmp_vs_prolog(const void *a_, const void *b_)
{
const struct radv_vs_prolog_key *a = a_;
const struct radv_vs_prolog_key *b = b_;
return memcmp(a, b, sizeof(*a)) == 0;
}
static struct radv_shader_part_cache_ops vs_prolog_ops = {
.create = _radv_create_vs_prolog,
.hash = radv_hash_vs_prolog,
.equals = radv_cmp_vs_prolog,
};
static VkResult
radv_device_init_vs_prologs(struct radv_device *device)
{
u_rwlock_init(&device->vs_prologs_lock);
device->vs_prologs = _mesa_hash_table_create(NULL, &radv_hash_vs_prolog, &radv_cmp_vs_prolog);
if (!device->vs_prologs)
if (!radv_shader_part_cache_init(&device->vs_prologs, &vs_prolog_ops))
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
/* don't pre-compile prologs if we want to print them */
@ -208,13 +235,8 @@ radv_device_init_vs_prologs(struct radv_device *device)
static void
radv_device_finish_vs_prologs(struct radv_device *device)
{
if (device->vs_prologs) {
hash_table_foreach (device->vs_prologs, entry) {
free((void *)entry->key);
radv_shader_part_unref(device, entry->data);
}
_mesa_hash_table_destroy(device->vs_prologs, NULL);
}
if (device->vs_prologs.ops)
radv_shader_part_cache_finish(device, &device->vs_prologs);
for (unsigned i = 0; i < ARRAY_SIZE(device->simple_vs_prologs); i++) {
if (!device->simple_vs_prologs[i])
@ -231,54 +253,64 @@ radv_device_finish_vs_prologs(struct radv_device *device)
}
}
static VkResult
radv_device_init_ps_epilogs(struct radv_device *device)
static struct radv_shader_part *
_radv_create_ps_epilog(struct radv_device *device, const void *_key)
{
u_rwlock_init(&device->ps_epilogs_lock);
device->ps_epilogs = _mesa_hash_table_create(NULL, &radv_hash_ps_epilog, &radv_cmp_ps_epilog);
if (!device->ps_epilogs)
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
return VK_SUCCESS;
struct radv_ps_epilog_key *key = (struct radv_ps_epilog_key *)_key;
return radv_create_ps_epilog(device, key, NULL);
}
static void
radv_device_finish_ps_epilogs(struct radv_device *device)
static uint32_t
radv_hash_ps_epilog(const void *key_)
{
if (device->ps_epilogs) {
hash_table_foreach (device->ps_epilogs, entry) {
free((void *)entry->key);
radv_shader_part_unref(device, entry->data);
}
_mesa_hash_table_destroy(device->ps_epilogs, NULL);
}
const struct radv_ps_epilog_key *key = key_;
return _mesa_hash_data(key, sizeof(*key));
}
static VkResult
radv_device_init_tcs_epilogs(struct radv_device *device)
static bool
radv_cmp_ps_epilog(const void *a_, const void *b_)
{
u_rwlock_init(&device->tcs_epilogs_lock);
const struct radv_ps_epilog_key *a = a_;
const struct radv_ps_epilog_key *b = b_;
device->tcs_epilogs = _mesa_hash_table_create(NULL, &radv_hash_tcs_epilog, &radv_cmp_tcs_epilog);
if (!device->tcs_epilogs)
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
return VK_SUCCESS;
return memcmp(a, b, sizeof(*a)) == 0;
}
static void
radv_device_finish_tcs_epilogs(struct radv_device *device)
static struct radv_shader_part_cache_ops ps_epilog_ops = {
.create = _radv_create_ps_epilog,
.hash = radv_hash_ps_epilog,
.equals = radv_cmp_ps_epilog,
};
static struct radv_shader_part *
_radv_create_tcs_epilog(struct radv_device *device, const void *_key)
{
if (device->tcs_epilogs) {
hash_table_foreach (device->tcs_epilogs, entry) {
free((void *)entry->key);
radv_shader_part_unref(device, entry->data);
}
_mesa_hash_table_destroy(device->tcs_epilogs, NULL);
}
struct radv_tcs_epilog_key *key = (struct radv_tcs_epilog_key *)_key;
return radv_create_tcs_epilog(device, key);
}
static uint32_t
radv_hash_tcs_epilog(const void *key_)
{
const struct radv_tcs_epilog_key *key = key_;
return _mesa_hash_data(key, sizeof(*key));
}
static bool
radv_cmp_tcs_epilog(const void *a_, const void *b_)
{
const struct radv_tcs_epilog_key *a = a_;
const struct radv_tcs_epilog_key *b = b_;
return memcmp(a, b, sizeof(*a)) == 0;
}
static struct radv_shader_part_cache_ops tcs_epilog_ops = {
.create = _radv_create_tcs_epilog,
.hash = radv_hash_tcs_epilog,
.equals = radv_cmp_tcs_epilog,
};
VkResult
radv_device_init_vrs_state(struct radv_device *device)
{
@ -1087,15 +1119,17 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
}
if (tcs_epilogs) {
result = radv_device_init_tcs_epilogs(device);
if (result != VK_SUCCESS)
if (!radv_shader_part_cache_init(&device->tcs_epilogs, &tcs_epilog_ops)) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
}
if (ps_epilogs) {
result = radv_device_init_ps_epilogs(device);
if (result != VK_SUCCESS)
if (!radv_shader_part_cache_init(&device->ps_epilogs, &ps_epilog_ops)) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
}
if (!(device->instance->debug_flags & RADV_DEBUG_NO_IBS))
@ -1164,8 +1198,10 @@ fail:
radv_device_finish_notifier(device);
radv_device_finish_vs_prologs(device);
radv_device_finish_tcs_epilogs(device);
radv_device_finish_ps_epilogs(device);
if (device->tcs_epilogs.ops)
radv_shader_part_cache_finish(device, &device->tcs_epilogs);
if (device->ps_epilogs.ops)
radv_shader_part_cache_finish(device, &device->ps_epilogs);
radv_device_finish_border_color(device);
radv_destroy_shader_upload_queue(device);
@ -1217,8 +1253,10 @@ radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
radv_device_finish_notifier(device);
radv_device_finish_vs_prologs(device);
radv_device_finish_tcs_epilogs(device);
radv_device_finish_ps_epilogs(device);
if (device->tcs_epilogs.ops)
radv_shader_part_cache_finish(device, &device->tcs_epilogs);
if (device->ps_epilogs.ops)
radv_shader_part_cache_finish(device, &device->ps_epilogs);
radv_device_finish_border_color(device);
radv_device_finish_vrs_image(device);

View file

@ -1099,22 +1099,16 @@ struct radv_device {
struct radv_device_memory *mem;
} vrs;
struct u_rwlock vs_prologs_lock;
struct hash_table *vs_prologs;
/* Prime blit sdma queue */
struct radv_queue *private_sdma_queue;
struct radv_shader_part_cache vs_prologs;
struct radv_shader_part *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
struct radv_shader_part *instance_rate_vs_prologs[816];
/* PS epilogs */
struct u_rwlock ps_epilogs_lock;
struct hash_table *ps_epilogs;
struct radv_shader_part_cache ps_epilogs;
/* TCS epilogs */
struct u_rwlock tcs_epilogs_lock;
struct hash_table *tcs_epilogs;
struct radv_shader_part_cache tcs_epilogs;
simple_mtx_t trace_mtx;
@ -1838,6 +1832,10 @@ struct radv_cmd_buffer {
uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */
bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
struct set vs_prologs;
struct set ps_epilogs;
struct set tcs_epilogs;
/**
* Gang state.
* Used when the command buffer needs work done on a different queue
@ -2004,11 +2002,6 @@ void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
uint32_t radv_get_vgt_index_size(uint32_t type);
unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
uint32_t radv_hash_vs_prolog(const void *key_);
bool radv_cmp_vs_prolog(const void *a_, const void *b_);
uint32_t radv_hash_ps_epilog(const void *key_);
bool radv_cmp_ps_epilog(const void *a_, const void *b_);
struct radv_ps_epilog_state {
uint8_t color_attachment_count;
@ -2025,9 +2018,6 @@ struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_device *
const struct radv_ps_epilog_state *state,
bool disable_mrt_compaction);
uint32_t radv_hash_tcs_epilog(const void *key_);
bool radv_cmp_tcs_epilog(const void *a_, const void *b_);
bool radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps,
unsigned custom_blend_mode);

View file

@ -2650,6 +2650,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke
if (!prolog)
goto fail;
prolog->key.vs = *key;
prolog->nontrivial_divisors = key->nontrivial_divisors;
if (options.dump_shader) {
@ -2704,6 +2705,8 @@ radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_ke
if (!epilog)
goto fail;
epilog->key.ps = *key;
if (options.dump_shader) {
fprintf(stderr, "Fragment epilog");
fprintf(stderr, "\ndisasm:\n%s\n", epilog->disasm_string);
@ -2757,6 +2760,8 @@ radv_create_tcs_epilog(struct radv_device *device, const struct radv_tcs_epilog_
if (!epilog)
goto fail;
epilog->key.tcs = *key;
if (options.dump_shader) {
fprintf(stderr, "TCS epilog");
fprintf(stderr, "\ndisasm:\n%s\n", epilog->disasm_string);