radv: Implement helpers for shader part caching.
Currently, shader part caching logic is duplicated between VS prolog and PS/TCS epilogs. This commit introduces a common abstraction to deduplicate the code. Additionally, there are a few design decisions that diverts from the current implementation: 1. A simple mutex is used instead of reader-writer lock. Prolog/epilog constructions are serialized, removing the need to free duplicate objects in case of a race. 2. A CS-local cache is used to quickly lookup an entry without holding a lock. This eliminates locking in over 99% of cases. 3. A set is used to reduce number of allocations. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26028>
This commit is contained in:
parent
3cd6bb3e5d
commit
611545fbfe
2 changed files with 98 additions and 0 deletions
|
|
@ -2207,6 +2207,75 @@ fail:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
bool
|
||||
radv_shader_part_cache_init(struct radv_shader_part_cache *cache, struct radv_shader_part_cache_ops *ops)
|
||||
{
|
||||
cache->ops = ops;
|
||||
if (!_mesa_set_init(&cache->entries, NULL, cache->ops->hash, cache->ops->equals))
|
||||
return false;
|
||||
simple_mtx_init(&cache->lock, mtx_plain);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
radv_shader_part_cache_finish(struct radv_device *device, struct radv_shader_part_cache *cache)
|
||||
{
|
||||
set_foreach (&cache->entries, entry)
|
||||
radv_shader_part_unref(device, radv_shader_part_from_cache_entry(entry->key));
|
||||
simple_mtx_destroy(&cache->lock);
|
||||
ralloc_free(cache->entries.table);
|
||||
}
|
||||
|
||||
/*
|
||||
* A cache with atomics-free fast path for prolog / epilog lookups.
|
||||
*
|
||||
* VS prologs and PS/TCS epilogs are used to support dynamic states. In
|
||||
* particular dynamic blend state is heavily used by Zink. These are called
|
||||
* every frame as a part of command buffer building, so these functions are
|
||||
* on the hot path.
|
||||
*
|
||||
* Originally this was implemented with a rwlock, but this lead to high
|
||||
* overhead. To avoid locking altogether in the hot path, the cache is done
|
||||
* at two levels: one at device level, and another at each CS. Access to the
|
||||
* CS cache is externally synchronized and do not require a lock.
|
||||
*/
|
||||
struct radv_shader_part *
|
||||
radv_shader_part_cache_get(struct radv_device *device, struct radv_shader_part_cache *cache, struct set *local_entries,
|
||||
const void *key)
|
||||
{
|
||||
struct set_entry *local, *global;
|
||||
bool local_found, global_found;
|
||||
uint32_t hash = cache->ops->hash(key);
|
||||
|
||||
local = _mesa_set_search_or_add_pre_hashed(local_entries, hash, key, &local_found);
|
||||
if (local_found)
|
||||
return radv_shader_part_from_cache_entry(local->key);
|
||||
|
||||
simple_mtx_lock(&cache->lock);
|
||||
global = _mesa_set_search_or_add_pre_hashed(&cache->entries, hash, key, &global_found);
|
||||
if (global_found) {
|
||||
simple_mtx_unlock(&cache->lock);
|
||||
local->key = global->key;
|
||||
return radv_shader_part_from_cache_entry(global->key);
|
||||
}
|
||||
|
||||
struct radv_shader_part *shader_part = cache->ops->create(device, key);
|
||||
if (!shader_part) {
|
||||
_mesa_set_remove(&cache->entries, global);
|
||||
simple_mtx_unlock(&cache->lock);
|
||||
_mesa_set_remove(local_entries, local);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Make the set entry a pointer to the key, so that the hash and equals
|
||||
* functions from radv_shader_part_cache_ops can be directly used.
|
||||
*/
|
||||
global->key = &shader_part->key;
|
||||
simple_mtx_unlock(&cache->lock);
|
||||
local->key = &shader_part->key;
|
||||
return shader_part;
|
||||
}
|
||||
|
||||
static char *
|
||||
radv_dump_nir_shaders(struct nir_shader *const *shaders, int shader_count)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -621,6 +621,12 @@ struct radv_shader {
|
|||
struct radv_shader_part {
|
||||
uint32_t ref_count;
|
||||
|
||||
union {
|
||||
struct radv_vs_prolog_key vs;
|
||||
struct radv_ps_epilog_key ps;
|
||||
struct radv_tcs_epilog_key tcs;
|
||||
} key;
|
||||
|
||||
uint64_t va;
|
||||
|
||||
struct radeon_winsys_bo *bo;
|
||||
|
|
@ -635,6 +641,18 @@ struct radv_shader_part {
|
|||
char *disasm_string;
|
||||
};
|
||||
|
||||
struct radv_shader_part_cache_ops {
|
||||
uint32_t (*hash)(const void *key);
|
||||
bool (*equals)(const void *a, const void *b);
|
||||
struct radv_shader_part *(*create)(struct radv_device *device, const void *key);
|
||||
};
|
||||
|
||||
struct radv_shader_part_cache {
|
||||
simple_mtx_t lock;
|
||||
struct radv_shader_part_cache_ops *ops;
|
||||
struct set entries;
|
||||
};
|
||||
|
||||
struct radv_pipeline_layout;
|
||||
struct radv_shader_stage;
|
||||
|
||||
|
|
@ -722,6 +740,11 @@ struct radv_shader_part *radv_create_tcs_epilog(struct radv_device *device, cons
|
|||
|
||||
void radv_shader_part_destroy(struct radv_device *device, struct radv_shader_part *shader_part);
|
||||
|
||||
bool radv_shader_part_cache_init(struct radv_shader_part_cache *cache, struct radv_shader_part_cache_ops *ops);
|
||||
void radv_shader_part_cache_finish(struct radv_device *device, struct radv_shader_part_cache *cache);
|
||||
struct radv_shader_part *radv_shader_part_cache_get(struct radv_device *device, struct radv_shader_part_cache *cache,
|
||||
struct set *local_entries, const void *key);
|
||||
|
||||
uint64_t radv_shader_get_va(const struct radv_shader *shader);
|
||||
struct radv_shader *radv_find_shader(struct radv_device *device, uint64_t pc);
|
||||
|
||||
|
|
@ -776,6 +799,12 @@ radv_shader_part_unref(struct radv_device *device, struct radv_shader_part *shad
|
|||
radv_shader_part_destroy(device, shader_part);
|
||||
}
|
||||
|
||||
static inline struct radv_shader_part *
|
||||
radv_shader_part_from_cache_entry(const void *key)
|
||||
{
|
||||
return container_of(key, struct radv_shader_part, key);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
get_tcs_input_vertex_stride(unsigned tcs_num_inputs)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue