radv: Move up radv_get_max_waves, radv_get_max_scratch_waves.
To avoid forward declaration. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26692>
This commit is contained in:
parent
e444908d65
commit
1161f22c27
1 changed files with 53 additions and 53 deletions
|
|
@ -2049,6 +2049,59 @@ radv_shader_upload(struct radv_device *device, struct radv_shader *shader, const
|
|||
return true;
|
||||
}
|
||||
|
||||
unsigned
|
||||
radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, gl_shader_stage stage)
|
||||
{
|
||||
const struct radeon_info *info = &device->physical_device->rad_info;
|
||||
const enum amd_gfx_level gfx_level = info->gfx_level;
|
||||
const uint8_t wave_size = shader->info.wave_size;
|
||||
const struct ac_shader_config *conf = &shader->config;
|
||||
unsigned max_simd_waves = info->max_waves_per_simd;
|
||||
unsigned lds_per_wave = 0;
|
||||
|
||||
if (stage == MESA_SHADER_FRAGMENT) {
|
||||
lds_per_wave = conf->lds_size * info->lds_encode_granularity + shader->info.ps.num_interp * 48;
|
||||
lds_per_wave = align(lds_per_wave, info->lds_alloc_granularity);
|
||||
} else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK) {
|
||||
unsigned max_workgroup_size = shader->info.workgroup_size;
|
||||
lds_per_wave = align(conf->lds_size * info->lds_encode_granularity, info->lds_alloc_granularity);
|
||||
lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size);
|
||||
}
|
||||
|
||||
if (conf->num_sgprs && gfx_level < GFX10) {
|
||||
unsigned sgprs = align(conf->num_sgprs, gfx_level >= GFX8 ? 16 : 8);
|
||||
max_simd_waves = MIN2(max_simd_waves, info->num_physical_sgprs_per_simd / sgprs);
|
||||
}
|
||||
|
||||
if (conf->num_vgprs) {
|
||||
unsigned physical_vgprs = info->num_physical_wave64_vgprs_per_simd * (64 / wave_size);
|
||||
unsigned vgprs = align(conf->num_vgprs, wave_size == 32 ? 8 : 4);
|
||||
if (gfx_level >= GFX10_3) {
|
||||
unsigned real_vgpr_gran = info->num_physical_wave64_vgprs_per_simd / 64;
|
||||
vgprs = util_align_npot(vgprs, real_vgpr_gran * (wave_size == 32 ? 2 : 1));
|
||||
}
|
||||
max_simd_waves = MIN2(max_simd_waves, physical_vgprs / vgprs);
|
||||
}
|
||||
|
||||
unsigned simd_per_workgroup = info->num_simd_per_compute_unit;
|
||||
if (gfx_level >= GFX10)
|
||||
simd_per_workgroup *= 2; /* like lds_size_per_workgroup, assume WGP on GFX10+ */
|
||||
|
||||
unsigned max_lds_per_simd = info->lds_size_per_workgroup / simd_per_workgroup;
|
||||
if (lds_per_wave)
|
||||
max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_lds_per_simd, lds_per_wave));
|
||||
|
||||
return gfx_level >= GFX10 ? max_simd_waves * (wave_size / 32) : max_simd_waves;
|
||||
}
|
||||
|
||||
unsigned
|
||||
radv_get_max_scratch_waves(const struct radv_device *device, struct radv_shader *shader)
|
||||
{
|
||||
const unsigned num_cu = device->physical_device->rad_info.num_cu;
|
||||
|
||||
return MIN2(device->scratch_waves, 4 * num_cu * radv_get_max_waves(device, shader, shader->info.stage));
|
||||
}
|
||||
|
||||
VkResult
|
||||
radv_shader_create_uncached(struct radv_device *device, const struct radv_shader_binary *binary, bool replayable,
|
||||
struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader)
|
||||
|
|
@ -2877,59 +2930,6 @@ radv_get_shader_name(const struct radv_shader_info *info, gl_shader_stage stage)
|
|||
};
|
||||
}
|
||||
|
||||
unsigned
|
||||
radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, gl_shader_stage stage)
|
||||
{
|
||||
const struct radeon_info *info = &device->physical_device->rad_info;
|
||||
const enum amd_gfx_level gfx_level = info->gfx_level;
|
||||
const uint8_t wave_size = shader->info.wave_size;
|
||||
const struct ac_shader_config *conf = &shader->config;
|
||||
unsigned max_simd_waves = info->max_waves_per_simd;
|
||||
unsigned lds_per_wave = 0;
|
||||
|
||||
if (stage == MESA_SHADER_FRAGMENT) {
|
||||
lds_per_wave = conf->lds_size * info->lds_encode_granularity + shader->info.ps.num_interp * 48;
|
||||
lds_per_wave = align(lds_per_wave, info->lds_alloc_granularity);
|
||||
} else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK) {
|
||||
unsigned max_workgroup_size = shader->info.workgroup_size;
|
||||
lds_per_wave = align(conf->lds_size * info->lds_encode_granularity, info->lds_alloc_granularity);
|
||||
lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size);
|
||||
}
|
||||
|
||||
if (conf->num_sgprs && gfx_level < GFX10) {
|
||||
unsigned sgprs = align(conf->num_sgprs, gfx_level >= GFX8 ? 16 : 8);
|
||||
max_simd_waves = MIN2(max_simd_waves, info->num_physical_sgprs_per_simd / sgprs);
|
||||
}
|
||||
|
||||
if (conf->num_vgprs) {
|
||||
unsigned physical_vgprs = info->num_physical_wave64_vgprs_per_simd * (64 / wave_size);
|
||||
unsigned vgprs = align(conf->num_vgprs, wave_size == 32 ? 8 : 4);
|
||||
if (gfx_level >= GFX10_3) {
|
||||
unsigned real_vgpr_gran = info->num_physical_wave64_vgprs_per_simd / 64;
|
||||
vgprs = util_align_npot(vgprs, real_vgpr_gran * (wave_size == 32 ? 2 : 1));
|
||||
}
|
||||
max_simd_waves = MIN2(max_simd_waves, physical_vgprs / vgprs);
|
||||
}
|
||||
|
||||
unsigned simd_per_workgroup = info->num_simd_per_compute_unit;
|
||||
if (gfx_level >= GFX10)
|
||||
simd_per_workgroup *= 2; /* like lds_size_per_workgroup, assume WGP on GFX10+ */
|
||||
|
||||
unsigned max_lds_per_simd = info->lds_size_per_workgroup / simd_per_workgroup;
|
||||
if (lds_per_wave)
|
||||
max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_lds_per_simd, lds_per_wave));
|
||||
|
||||
return gfx_level >= GFX10 ? max_simd_waves * (wave_size / 32) : max_simd_waves;
|
||||
}
|
||||
|
||||
unsigned
|
||||
radv_get_max_scratch_waves(const struct radv_device *device, struct radv_shader *shader)
|
||||
{
|
||||
const unsigned num_cu = device->physical_device->rad_info.num_cu;
|
||||
|
||||
return MIN2(device->scratch_waves, 4 * num_cu * radv_get_max_waves(device, shader, shader->info.stage));
|
||||
}
|
||||
|
||||
unsigned
|
||||
radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key, const struct radv_shader_info *info)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue