aco: adjust RT prolog for shader functions [disables RT]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22096>
This commit is contained in:
Daniel Schürmann 2023-03-09 17:44:46 +01:00 committed by Marge Bot
parent 302649dda1
commit defdcd2058

View file

@ -11581,8 +11581,8 @@ select_rt_prolog(Program* program, ac_shader_config* config,
* Local invocation IDs: v[0-2]
*/
PhysReg in_ring_offsets = get_arg_reg(in_args, in_args->ring_offsets);
PhysReg in_sbt_desc = get_arg_reg(in_args, in_args->rt.sbt_descriptors);
PhysReg in_launch_size_addr = get_arg_reg(in_args, in_args->rt.launch_size_addr);
PhysReg in_shader_addr = get_arg_reg(in_args, in_args->rt.traversal_shader);
PhysReg in_stack_base = get_arg_reg(in_args, in_args->rt.dynamic_callable_stack_base);
PhysReg in_wg_id_x = get_arg_reg(in_args, in_args->workgroup_ids[0]);
PhysReg in_wg_id_y = get_arg_reg(in_args, in_args->workgroup_ids[1]);
@ -11606,6 +11606,8 @@ select_rt_prolog(Program* program, ac_shader_config* config,
* Ring offsets (<GFX9 only): s[12-13]
* Ray launch IDs: v[0-2]
* Stack pointer: v[3]
* Shader VA: v[4-5]
* Shader Record Ptr: v[6-7]
*/
PhysReg out_shader_pc = get_arg_reg(out_args, out_args->rt.shader_pc);
PhysReg out_launch_size_x = get_arg_reg(out_args, out_args->rt.launch_size);
@ -11614,9 +11616,12 @@ select_rt_prolog(Program* program, ac_shader_config* config,
for (unsigned i = 0; i < 3; i++)
out_launch_ids[i] = get_arg_reg(out_args, out_args->rt.launch_id).advance(i * 4);
PhysReg out_stack_ptr = get_arg_reg(out_args, out_args->rt.dynamic_callable_stack_base);
PhysReg out_shader_va = get_arg_reg(out_args, out_args->rt.next_shader);
PhysReg out_record_ptr = get_arg_reg(out_args, out_args->rt.shader_record);
/* Temporaries: */
num_sgprs = align(num_sgprs, 2) + 2;
num_sgprs = align(num_sgprs, 2) + 4;
PhysReg tmp_raygen_sbt = PhysReg{num_sgprs - 4};
PhysReg tmp_ring_offsets = PhysReg{num_sgprs - 2};
/* Confirm some assumptions about register aliasing */
@ -11629,6 +11634,10 @@ select_rt_prolog(Program* program, ac_shader_config* config,
assert(in_stack_base == out_launch_size_z);
assert(in_local_ids[0] == out_launch_ids[0]);
/* load raygen sbt */
bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_raygen_sbt, s2), Operand(in_sbt_desc, s2),
Operand::c32(0u));
/* init scratch */
if (options->gfx_level < GFX9) {
/* copy ring offsets to temporary location*/
@ -11642,9 +11651,9 @@ select_rt_prolog(Program* program, ac_shader_config* config,
/* set stack ptr */
bld.vop1(aco_opcode::v_mov_b32, Definition(out_stack_ptr, v1), Operand(in_stack_base, s1));
/* load RT shader address */
/* TODO: load this from the SBT, will be possible with separate shader compilation */
bld.sop1(aco_opcode::s_mov_b64, Definition(out_shader_pc, s2), Operand(in_shader_addr, s2));
/* load raygen address */
bld.smem(aco_opcode::s_load_dwordx2, Definition(out_shader_pc, s2), Operand(tmp_raygen_sbt, s2),
Operand::c32(0u));
/* load ray launch sizes */
bld.smem(aco_opcode::s_load_dword, Definition(out_launch_size_z, s1),
@ -11676,6 +11685,23 @@ select_rt_prolog(Program* program, ac_shader_config* config,
Operand(tmp_ring_offsets, s2));
}
/* calculate shader record ptr: SBT + RADV_RT_HANDLE_SIZE */
if (options->gfx_level < GFX9) {
bld.vop2_e64(aco_opcode::v_add_co_u32, Definition(out_record_ptr, v1), Definition(vcc, s2),
Operand(tmp_raygen_sbt, s1), Operand::c32(32u));
} else {
bld.vop2_e64(aco_opcode::v_add_u32, Definition(out_record_ptr, v1),
Operand(tmp_raygen_sbt, s1), Operand::c32(32u));
}
bld.vop1(aco_opcode::v_mov_b32, Definition(out_record_ptr.advance(4), v1),
Operand(tmp_raygen_sbt.advance(4), s1));
/* initialize shader_va with raygen shader */
// TODO: we can optimize this away if we don't guard the raygen shader with an IF
bld.vop1(aco_opcode::v_mov_b32, Definition(out_shader_va, v1), Operand(out_shader_pc, s1));
bld.vop1(aco_opcode::v_mov_b32, Definition(out_shader_va.advance(4), v1),
Operand(out_shader_pc.advance(4), s1));
/* jump to raygen */
bld.sop1(aco_opcode::s_setpc_b64, Operand(out_shader_pc, s2));