radv/bvh: Stop emitting leaf nodes inside the encoder

Avoids unnecessary copies.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26725>
This commit is contained in:
Konstantin Seurer 2023-12-16 17:09:52 +01:00 committed by Marge Bot
parent 8fca54a754
commit 662f86c533
6 changed files with 65 additions and 139 deletions

View file

@ -240,9 +240,6 @@ TYPE(radv_bvh_box32_node, 4);
TYPE(radv_ir_header, 4);
TYPE(radv_ir_node, 4);
TYPE(radv_ir_box_node, 4);
TYPE(radv_ir_triangle_node, 4);
TYPE(radv_ir_aabb_node, 4);
TYPE(radv_ir_instance_node, 8);
TYPE(radv_global_sync_data, 4);
@ -313,25 +310,6 @@ ir_type_to_bvh_type(uint32_t type)
return RADV_BVH_INVALID_NODE;
}
radv_aabb
calculate_instance_node_bounds(uint64_t base_ptr, mat3x4 otw_matrix)
{
radv_aabb aabb;
radv_accel_struct_header header = DEREF(REF(radv_accel_struct_header)(base_ptr));
for (uint32_t comp = 0; comp < 3; ++comp) {
aabb.min[comp] = otw_matrix[comp][3];
aabb.max[comp] = otw_matrix[comp][3];
for (uint32_t col = 0; col < 3; ++col) {
aabb.min[comp] +=
min(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
aabb.max[comp] +=
max(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
}
}
return aabb;
}
float
aabb_surface_area(radv_aabb aabb)
{

View file

@ -33,6 +33,7 @@
#endif
struct leaf_args {
VOID_REF ir;
VOID_REF bvh;
REF(radv_ir_header) header;
REF(key_id_pair) ids;

View file

@ -120,30 +120,6 @@ struct radv_ir_box_node {
uint32_t bvh_offset;
};
struct radv_ir_aabb_node {
radv_ir_node base;
uint32_t primitive_id;
uint32_t geometry_id_and_flags;
};
struct radv_ir_triangle_node {
radv_ir_node base;
float coords[3][3];
uint32_t triangle_id;
uint32_t id;
uint32_t geometry_id_and_flags;
};
struct radv_ir_instance_node {
radv_ir_node base;
/* See radv_bvh_instance_node */
uint64_t base_ptr;
uint32_t custom_instance_and_mask;
uint32_t sbt_offset_and_flags;
mat3x4 otw_matrix;
uint32_t instance_id;
};
struct radv_global_sync_data {
uint32_t task_counts[2];
uint32_t task_started_counter;

View file

@ -50,89 +50,26 @@ void set_parent(uint32_t child, uint32_t parent)
DEREF(REF(uint32_t)(addr)) = parent;
}
uint32_t
encode_sbt_offset_and_flags(uint32_t src)
{
uint32_t flags = src >> 24;
uint32_t ret = src & 0xffffffu;
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0)
ret |= RADV_INSTANCE_FORCE_OPAQUE;
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0)
ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE;
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0)
ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE;
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0)
ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING;
return ret;
}
void
encode_leaf_node(uint32_t type, uint64_t src_node, uint64_t dst_node)
{
switch (type) {
case radv_ir_node_triangle: {
radv_ir_triangle_node src = DEREF(REF(radv_ir_triangle_node)(src_node));
REF(radv_bvh_triangle_node) dst = REF(radv_bvh_triangle_node)(dst_node);
DEREF(dst).coords = src.coords;
DEREF(dst).triangle_id = src.triangle_id;
DEREF(dst).geometry_id_and_flags = src.geometry_id_and_flags;
DEREF(dst).id = src.id;
break;
}
case radv_ir_node_aabb: {
radv_ir_aabb_node src = DEREF(REF(radv_ir_aabb_node)(src_node));
REF(radv_bvh_aabb_node) dst = REF(radv_bvh_aabb_node)(dst_node);
DEREF(dst).primitive_id = src.primitive_id;
DEREF(dst).geometry_id_and_flags = src.geometry_id_and_flags;
break;
}
case radv_ir_node_instance: {
radv_ir_instance_node src = DEREF(REF(radv_ir_instance_node)(src_node));
REF(radv_bvh_instance_node) dst = REF(radv_bvh_instance_node)(dst_node);
uint32_t bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset;
DEREF(dst).bvh_ptr = addr_to_node(src.base_ptr + bvh_offset);
DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask;
DEREF(dst).sbt_offset_and_flags = encode_sbt_offset_and_flags(src.sbt_offset_and_flags);
DEREF(dst).instance_id = src.instance_id;
DEREF(dst).bvh_offset = bvh_offset;
mat4 transform = mat4(src.otw_matrix);
mat4 inv_transform = transpose(inverse(transpose(transform)));
DEREF(dst).wto_matrix = mat3x4(inv_transform);
DEREF(dst).otw_matrix = mat3x4(transform);
break;
}
}
}
void
main()
{
/* Revert the order so we start at the root */
uint32_t global_id = DEREF(args.header).ir_internal_node_count - 1 - gl_GlobalInvocationID.x;
uint32_t intermediate_leaf_node_size;
uint32_t output_leaf_node_size;
switch (args.geometry_type) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
intermediate_leaf_node_size = SIZEOF(radv_ir_triangle_node);
output_leaf_node_size = SIZEOF(radv_bvh_triangle_node);
break;
case VK_GEOMETRY_TYPE_AABBS_KHR:
intermediate_leaf_node_size = SIZEOF(radv_ir_aabb_node);
output_leaf_node_size = SIZEOF(radv_bvh_aabb_node);
break;
default: /* instances */
intermediate_leaf_node_size = SIZEOF(radv_ir_instance_node);
output_leaf_node_size = SIZEOF(radv_bvh_instance_node);
break;
}
uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * intermediate_leaf_node_size;
uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * SIZEOF(radv_ir_node);
uint32_t dst_leaf_offset =
id_to_offset(RADV_BVH_ROOT_NODE) + SIZEOF(radv_bvh_box32_node);
uint32_t dst_internal_offset = dst_leaf_offset + args.leaf_node_count * output_leaf_node_size;
@ -233,10 +170,8 @@ main()
REF(radv_ir_box_node) child_node = REF(radv_ir_box_node)OFFSET(args.intermediate_bvh, offset);
DEREF(child_node).bvh_offset = dst_offset;
} else {
uint32_t child_index = offset / intermediate_leaf_node_size;
uint32_t child_index = offset / SIZEOF(radv_ir_node);
dst_offset = dst_leaf_offset + child_index * output_leaf_node_size;
encode_leaf_node(type, args.intermediate_bvh + offset, args.output_bvh + dst_offset);
}
radv_aabb child_aabb =

View file

@ -209,7 +209,7 @@ build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, uint32_t global_id)
vertices.vertex[i] = transform * vertices.vertex[i];
}
REF(radv_ir_triangle_node) node = REF(radv_ir_triangle_node)(dst_ptr);
REF(radv_bvh_triangle_node) node = REF(radv_bvh_triangle_node)(dst_ptr);
bounds.min = vec3(INFINITY);
bounds.max = vec3(-INFINITY);
@ -221,9 +221,6 @@ build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, uint32_t global_id)
bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]);
}
DEREF(node).base.aabb = bounds;
DEREF(node).base.cost = 0.0;
DEREF(node).triangle_id = global_id;
DEREF(node).geometry_id_and_flags = args.geometry_id;
DEREF(node).id = 9;
@ -234,7 +231,7 @@ build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, uint32_t global_id)
bool
build_aabb(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id)
{
REF(radv_ir_aabb_node) node = REF(radv_ir_aabb_node)(dst_ptr);
REF(radv_bvh_aabb_node) node = REF(radv_bvh_aabb_node)(dst_ptr);
for (uint32_t vec = 0; vec < 2; vec++)
for (uint32_t comp = 0; comp < 3; comp++) {
@ -252,21 +249,51 @@ build_aabb(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t
if (isnan(bounds.min.x))
return false;
DEREF(node).base.aabb = bounds;
DEREF(node).base.cost = 0.0;
DEREF(node).primitive_id = global_id;
DEREF(node).geometry_id_and_flags = args.geometry_id;
return true;
}
radv_aabb
calculate_instance_node_bounds(radv_accel_struct_header header, mat3x4 otw_matrix)
{
radv_aabb aabb;
for (uint32_t comp = 0; comp < 3; ++comp) {
aabb.min[comp] = otw_matrix[comp][3];
aabb.max[comp] = otw_matrix[comp][3];
for (uint32_t col = 0; col < 3; ++col) {
aabb.min[comp] +=
min(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
aabb.max[comp] +=
max(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
}
}
return aabb;
}
uint32_t
encode_sbt_offset_and_flags(uint32_t src)
{
uint32_t flags = src >> 24;
uint32_t ret = src & 0xffffffu;
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0)
ret |= RADV_INSTANCE_FORCE_OPAQUE;
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0)
ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE;
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0)
ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE;
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0)
ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING;
return ret;
}
bool
build_instance(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id)
{
REF(radv_ir_instance_node) node = REF(radv_ir_instance_node)(dst_ptr);
REF(radv_bvh_instance_node) node = REF(radv_bvh_instance_node)(dst_ptr);
AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr));
DEREF(node).base_ptr = instance.accelerationStructureReference;
/* An inactive instance is one whose acceleration structure handle is VK_NULL_HANDLE. Since the active terminology is
* only relevant for BVH updates, which we do not implement, we can also skip instances with mask == 0.
@ -274,20 +301,23 @@ build_instance(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint3
if (instance.accelerationStructureReference == 0 || instance.custom_instance_and_mask < (1u << 24u))
return false;
DEREF(node).otw_matrix = instance.transform;
radv_accel_struct_header instance_header =
DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference));
bounds = calculate_instance_node_bounds(DEREF(node).base_ptr, DEREF(node).otw_matrix);
DEREF(node).bvh_ptr = addr_to_node(instance.accelerationStructureReference + instance_header.bvh_offset);
DEREF(node).bvh_offset = instance_header.bvh_offset;
mat4 transform = mat4(instance.transform);
mat4 inv_transform = transpose(inverse(transpose(transform)));
DEREF(node).wto_matrix = mat3x4(inv_transform);
DEREF(node).otw_matrix = mat3x4(transform);
bounds = calculate_instance_node_bounds(instance_header, mat3x4(transform));
DEREF(node).custom_instance_and_mask = instance.custom_instance_and_mask;
DEREF(node).sbt_offset_and_flags = instance.sbt_offset_and_flags;
DEREF(node).sbt_offset_and_flags = encode_sbt_offset_and_flags(instance.sbt_offset_and_flags);
DEREF(node).instance_id = global_id;
DEREF(node).base.aabb = bounds;
DEREF(node).base.cost = 0.0;
return true;
}
@ -303,13 +333,13 @@ main(void)
uint32_t dst_stride;
uint32_t node_type;
if (args.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) {
dst_stride = SIZEOF(radv_ir_triangle_node);
dst_stride = SIZEOF(radv_bvh_triangle_node);
node_type = radv_ir_node_triangle;
} else if (args.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) {
dst_stride = SIZEOF(radv_ir_aabb_node);
dst_stride = SIZEOF(radv_bvh_aabb_node);
node_type = radv_ir_node_aabb;
} else {
dst_stride = SIZEOF(radv_ir_instance_node);
dst_stride = SIZEOF(radv_bvh_instance_node);
node_type = radv_ir_node_instance;
}
@ -333,7 +363,14 @@ main(void)
is_active = build_instance(bounds, src_ptr, dst_ptr, global_id);
}
DEREF(id_ptr).id = is_active ? pack_ir_node_id(dst_offset, node_type) : RADV_BVH_INVALID_NODE;
if (is_active) {
REF(radv_ir_node) ir_node = INDEX(radv_ir_node, args.ir, primitive_id);
DEREF(ir_node).aabb = bounds;
DEREF(ir_node).cost = 0.0;
}
uint32_t ir_offset = primitive_id * SIZEOF(radv_ir_node);
DEREF(id_ptr).id = is_active ? pack_ir_node_id(ir_offset, node_type) : RADV_BVH_INVALID_NODE;
uvec4 ballot = subgroupBallot(is_active);
if (subgroupElect())

View file

@ -157,18 +157,14 @@ get_build_layout(struct radv_device *device, uint32_t leaf_count,
}
uint32_t bvh_leaf_size;
uint32_t ir_leaf_size;
switch (geometry_type) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
ir_leaf_size = sizeof(struct radv_ir_triangle_node);
bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
break;
case VK_GEOMETRY_TYPE_AABBS_KHR:
ir_leaf_size = sizeof(struct radv_ir_aabb_node);
bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
break;
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
ir_leaf_size = sizeof(struct radv_ir_instance_node);
bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
break;
default:
@ -242,7 +238,7 @@ get_build_layout(struct radv_device *device, uint32_t leaf_count,
offset += MAX3(requirements.internal_size, ploc_scratch_space, lbvh_node_space);
scratch->ir_offset = offset;
offset += ir_leaf_size * leaf_count;
offset += sizeof(struct radv_ir_node) * leaf_count;
scratch->internal_node_offset = offset;
offset += sizeof(struct radv_ir_box_node) * internal_count;
@ -639,8 +635,11 @@ build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount,
radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
cmd_buffer->device->meta_state.accel_struct_build.leaf_pipeline);
for (uint32_t i = 0; i < infoCount; ++i) {
RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
struct leaf_args leaf_consts = {
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
.ir = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
.bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.leaf_nodes_offset,
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
.ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
};