From 8fe6a8d395cf2f63518911891fe08bb987864eac Mon Sep 17 00:00:00 2001 From: Friedrich Vock Date: Mon, 5 Feb 2024 19:10:44 +0100 Subject: [PATCH] radv/rt: Optimize update shader VGPR usage Brings VGPR allocation down from 72 (absolutely insane) to 32. We can now reach the theoretical maximum occupancy of 16 waves per SIMD. Part-of: --- src/amd/vulkan/bvh/update.comp | 49 +++++++++++++++++----------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/amd/vulkan/bvh/update.comp b/src/amd/vulkan/bvh/update.comp index 905f807ebe6..c3c740238f2 100644 --- a/src/amd/vulkan/bvh/update.comp +++ b/src/amd/vulkan/bvh/update.comp @@ -74,17 +74,9 @@ void main() { bool is_active; if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) { is_active = build_triangle(bounds, dst_ptr, args.geom_data, gl_GlobalInvocationID.x); - } else if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) { - VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset); - is_active = build_aabb(bounds, src_ptr, dst_ptr, args.geom_data.geometry_id, gl_GlobalInvocationID.x); } else { VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset); - /* arrayOfPointers */ - if (args.geom_data.stride == 8) { - src_ptr = DEREF(REF(VOID_REF)(src_ptr)); - } - - is_active = build_instance(bounds, src_ptr, dst_ptr, gl_GlobalInvocationID.x); + is_active = build_aabb(bounds, src_ptr, dst_ptr, args.geom_data.geometry_id, gl_GlobalInvocationID.x); } if (!is_active) @@ -110,10 +102,15 @@ void main() { gl_StorageSemanticsBuffer, gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); - radv_bvh_box32_node node = DEREF(REF(radv_bvh_box32_node)OFFSET(src_bvh, offset)); + REF(radv_bvh_box32_node) src_node = REF(radv_bvh_box32_node)OFFSET(src_bvh, offset); + REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)OFFSET(dst_bvh, offset); + uint32_t children[4]; + for (uint32_t i = 0; i < 4; ++i) + children[i] = DEREF(src_node).children[i]; + uint32_t valid_child_count = 0; for (uint32_t i = 0; i < 4; ++valid_child_count, ++i) - if (node.children[i] == RADV_BVH_INVALID_NODE) + if (children[i] == RADV_BVH_INVALID_NODE) break; /* Check if all children have been processed. As this is an atomic the last path coming from @@ -127,33 +124,37 @@ void main() { if (ready_child_count != valid_child_count - 1) break; + for (uint32_t i = 0; i < 4; ++i) + DEREF(dst_node).children[i] = children[i]; + for (uint32_t i = 0; i < valid_child_count; ++i) { - uint32_t child_offset = id_to_offset(node.children[i]); + uint32_t child_offset = id_to_offset(children[i]); + radv_aabb child_bounds; if (child_offset == dst_offset) - node.coords[i] = bounds; + child_bounds = bounds; else if (child_offset >= internal_nodes_offset) { - radv_aabb child_bounds = radv_aabb(vec3(INFINITY), vec3(-INFINITY)); - radv_bvh_box32_node child_node = DEREF(REF(radv_bvh_box32_node)OFFSET(dst_bvh, child_offset)); + child_bounds = radv_aabb(vec3(INFINITY), vec3(-INFINITY)); + REF(radv_bvh_box32_node) child_node = REF(radv_bvh_box32_node)OFFSET(dst_bvh, child_offset); for (uint32_t j = 0; j < 4; ++j) { - if (child_node.children[j] == RADV_BVH_INVALID_NODE) + if (DEREF(child_node).children[j] == RADV_BVH_INVALID_NODE) break; - child_bounds.min = min(child_bounds.min, child_node.coords[j].min); - child_bounds.max = max(child_bounds.max, child_node.coords[j].max); + child_bounds.min = min(child_bounds.min, DEREF(child_node).coords[j].min); + child_bounds.max = max(child_bounds.max, DEREF(child_node).coords[j].max); } - node.coords[i] = child_bounds; } else { uint32_t child_index = (child_offset - first_leaf_offset) / leaf_node_size; - node.coords[i] = DEREF(INDEX(radv_aabb, args.leaf_bounds, child_index)); + child_bounds = DEREF(INDEX(radv_aabb, args.leaf_bounds, child_index)); } - } - DEREF(REF(radv_bvh_box32_node)OFFSET(dst_bvh, offset)) = node; + DEREF(dst_node).coords[i] = child_bounds; + } if (parent_id == RADV_BVH_ROOT_NODE) { radv_aabb root_bounds = radv_aabb(vec3(INFINITY), vec3(-INFINITY)); for (uint32_t i = 0; i < valid_child_count; ++i) { - root_bounds.min = min(root_bounds.min, node.coords[i].min); - root_bounds.max = max(root_bounds.max, node.coords[i].max); + radv_aabb bounds = DEREF(dst_node).coords[i]; + root_bounds.min = min(root_bounds.min, bounds.min); + root_bounds.max = max(root_bounds.max, bounds.max); } DEREF(args.dst).aabb = root_bounds; }