From defdcd2058aa13cf5f50cf558637959cb190261d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 9 Mar 2023 17:44:46 +0100 Subject: [PATCH] aco: adjust RT prolog for shader functions [disables RT] Part-of: --- .../compiler/aco_instruction_selection.cpp | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 0f320bf3e4c..e7c7bb5f66a 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11581,8 +11581,8 @@ select_rt_prolog(Program* program, ac_shader_config* config, * Local invocation IDs: v[0-2] */ PhysReg in_ring_offsets = get_arg_reg(in_args, in_args->ring_offsets); + PhysReg in_sbt_desc = get_arg_reg(in_args, in_args->rt.sbt_descriptors); PhysReg in_launch_size_addr = get_arg_reg(in_args, in_args->rt.launch_size_addr); - PhysReg in_shader_addr = get_arg_reg(in_args, in_args->rt.traversal_shader); PhysReg in_stack_base = get_arg_reg(in_args, in_args->rt.dynamic_callable_stack_base); PhysReg in_wg_id_x = get_arg_reg(in_args, in_args->workgroup_ids[0]); PhysReg in_wg_id_y = get_arg_reg(in_args, in_args->workgroup_ids[1]); @@ -11606,6 +11606,8 @@ select_rt_prolog(Program* program, ac_shader_config* config, * Ring offsets (rt.shader_pc); PhysReg out_launch_size_x = get_arg_reg(out_args, out_args->rt.launch_size); @@ -11614,9 +11616,12 @@ select_rt_prolog(Program* program, ac_shader_config* config, for (unsigned i = 0; i < 3; i++) out_launch_ids[i] = get_arg_reg(out_args, out_args->rt.launch_id).advance(i * 4); PhysReg out_stack_ptr = get_arg_reg(out_args, out_args->rt.dynamic_callable_stack_base); + PhysReg out_shader_va = get_arg_reg(out_args, out_args->rt.next_shader); + PhysReg out_record_ptr = get_arg_reg(out_args, out_args->rt.shader_record); /* Temporaries: */ - num_sgprs = align(num_sgprs, 2) + 2; + num_sgprs = align(num_sgprs, 2) + 4; + PhysReg tmp_raygen_sbt = PhysReg{num_sgprs - 4}; PhysReg tmp_ring_offsets = PhysReg{num_sgprs - 2}; /* Confirm some assumptions about register aliasing */ @@ -11629,6 +11634,10 @@ select_rt_prolog(Program* program, ac_shader_config* config, assert(in_stack_base == out_launch_size_z); assert(in_local_ids[0] == out_launch_ids[0]); + /* load raygen sbt */ + bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_raygen_sbt, s2), Operand(in_sbt_desc, s2), + Operand::c32(0u)); + /* init scratch */ if (options->gfx_level < GFX9) { /* copy ring offsets to temporary location*/ @@ -11642,9 +11651,9 @@ select_rt_prolog(Program* program, ac_shader_config* config, /* set stack ptr */ bld.vop1(aco_opcode::v_mov_b32, Definition(out_stack_ptr, v1), Operand(in_stack_base, s1)); - /* load RT shader address */ - /* TODO: load this from the SBT, will be possible with separate shader compilation */ - bld.sop1(aco_opcode::s_mov_b64, Definition(out_shader_pc, s2), Operand(in_shader_addr, s2)); + /* load raygen address */ + bld.smem(aco_opcode::s_load_dwordx2, Definition(out_shader_pc, s2), Operand(tmp_raygen_sbt, s2), + Operand::c32(0u)); /* load ray launch sizes */ bld.smem(aco_opcode::s_load_dword, Definition(out_launch_size_z, s1), @@ -11676,6 +11685,23 @@ select_rt_prolog(Program* program, ac_shader_config* config, Operand(tmp_ring_offsets, s2)); } + /* calculate shader record ptr: SBT + RADV_RT_HANDLE_SIZE */ + if (options->gfx_level < GFX9) { + bld.vop2_e64(aco_opcode::v_add_co_u32, Definition(out_record_ptr, v1), Definition(vcc, s2), + Operand(tmp_raygen_sbt, s1), Operand::c32(32u)); + } else { + bld.vop2_e64(aco_opcode::v_add_u32, Definition(out_record_ptr, v1), + Operand(tmp_raygen_sbt, s1), Operand::c32(32u)); + } + bld.vop1(aco_opcode::v_mov_b32, Definition(out_record_ptr.advance(4), v1), + Operand(tmp_raygen_sbt.advance(4), s1)); + + /* initialize shader_va with raygen shader */ + // TODO: we can optimize this away if we don't guard the raygen shader with an IF + bld.vop1(aco_opcode::v_mov_b32, Definition(out_shader_va, v1), Operand(out_shader_pc, s1)); + bld.vop1(aco_opcode::v_mov_b32, Definition(out_shader_va.advance(4), v1), + Operand(out_shader_pc.advance(4), s1)); + /* jump to raygen */ bld.sop1(aco_opcode::s_setpc_b64, Operand(out_shader_pc, s2));