radv/bvh: Stop emitting leaf nodes inside the encoder
Avoids unnecessary copies. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26725>
This commit is contained in:
parent
8fca54a754
commit
662f86c533
6 changed files with 65 additions and 139 deletions
|
|
@ -240,9 +240,6 @@ TYPE(radv_bvh_box32_node, 4);
|
|||
TYPE(radv_ir_header, 4);
|
||||
TYPE(radv_ir_node, 4);
|
||||
TYPE(radv_ir_box_node, 4);
|
||||
TYPE(radv_ir_triangle_node, 4);
|
||||
TYPE(radv_ir_aabb_node, 4);
|
||||
TYPE(radv_ir_instance_node, 8);
|
||||
|
||||
TYPE(radv_global_sync_data, 4);
|
||||
|
||||
|
|
@ -313,25 +310,6 @@ ir_type_to_bvh_type(uint32_t type)
|
|||
return RADV_BVH_INVALID_NODE;
|
||||
}
|
||||
|
||||
radv_aabb
|
||||
calculate_instance_node_bounds(uint64_t base_ptr, mat3x4 otw_matrix)
|
||||
{
|
||||
radv_aabb aabb;
|
||||
radv_accel_struct_header header = DEREF(REF(radv_accel_struct_header)(base_ptr));
|
||||
|
||||
for (uint32_t comp = 0; comp < 3; ++comp) {
|
||||
aabb.min[comp] = otw_matrix[comp][3];
|
||||
aabb.max[comp] = otw_matrix[comp][3];
|
||||
for (uint32_t col = 0; col < 3; ++col) {
|
||||
aabb.min[comp] +=
|
||||
min(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
|
||||
aabb.max[comp] +=
|
||||
max(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
|
||||
}
|
||||
}
|
||||
return aabb;
|
||||
}
|
||||
|
||||
float
|
||||
aabb_surface_area(radv_aabb aabb)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@
|
|||
#endif
|
||||
|
||||
struct leaf_args {
|
||||
VOID_REF ir;
|
||||
VOID_REF bvh;
|
||||
REF(radv_ir_header) header;
|
||||
REF(key_id_pair) ids;
|
||||
|
|
|
|||
|
|
@ -120,30 +120,6 @@ struct radv_ir_box_node {
|
|||
uint32_t bvh_offset;
|
||||
};
|
||||
|
||||
struct radv_ir_aabb_node {
|
||||
radv_ir_node base;
|
||||
uint32_t primitive_id;
|
||||
uint32_t geometry_id_and_flags;
|
||||
};
|
||||
|
||||
struct radv_ir_triangle_node {
|
||||
radv_ir_node base;
|
||||
float coords[3][3];
|
||||
uint32_t triangle_id;
|
||||
uint32_t id;
|
||||
uint32_t geometry_id_and_flags;
|
||||
};
|
||||
|
||||
struct radv_ir_instance_node {
|
||||
radv_ir_node base;
|
||||
/* See radv_bvh_instance_node */
|
||||
uint64_t base_ptr;
|
||||
uint32_t custom_instance_and_mask;
|
||||
uint32_t sbt_offset_and_flags;
|
||||
mat3x4 otw_matrix;
|
||||
uint32_t instance_id;
|
||||
};
|
||||
|
||||
struct radv_global_sync_data {
|
||||
uint32_t task_counts[2];
|
||||
uint32_t task_started_counter;
|
||||
|
|
|
|||
|
|
@ -50,89 +50,26 @@ void set_parent(uint32_t child, uint32_t parent)
|
|||
DEREF(REF(uint32_t)(addr)) = parent;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
encode_sbt_offset_and_flags(uint32_t src)
|
||||
{
|
||||
uint32_t flags = src >> 24;
|
||||
uint32_t ret = src & 0xffffffu;
|
||||
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0)
|
||||
ret |= RADV_INSTANCE_FORCE_OPAQUE;
|
||||
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0)
|
||||
ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE;
|
||||
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0)
|
||||
ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE;
|
||||
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0)
|
||||
ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
encode_leaf_node(uint32_t type, uint64_t src_node, uint64_t dst_node)
|
||||
{
|
||||
switch (type) {
|
||||
case radv_ir_node_triangle: {
|
||||
radv_ir_triangle_node src = DEREF(REF(radv_ir_triangle_node)(src_node));
|
||||
REF(radv_bvh_triangle_node) dst = REF(radv_bvh_triangle_node)(dst_node);
|
||||
|
||||
DEREF(dst).coords = src.coords;
|
||||
DEREF(dst).triangle_id = src.triangle_id;
|
||||
DEREF(dst).geometry_id_and_flags = src.geometry_id_and_flags;
|
||||
DEREF(dst).id = src.id;
|
||||
break;
|
||||
}
|
||||
case radv_ir_node_aabb: {
|
||||
radv_ir_aabb_node src = DEREF(REF(radv_ir_aabb_node)(src_node));
|
||||
REF(radv_bvh_aabb_node) dst = REF(radv_bvh_aabb_node)(dst_node);
|
||||
|
||||
DEREF(dst).primitive_id = src.primitive_id;
|
||||
DEREF(dst).geometry_id_and_flags = src.geometry_id_and_flags;
|
||||
break;
|
||||
}
|
||||
case radv_ir_node_instance: {
|
||||
radv_ir_instance_node src = DEREF(REF(radv_ir_instance_node)(src_node));
|
||||
REF(radv_bvh_instance_node) dst = REF(radv_bvh_instance_node)(dst_node);
|
||||
uint32_t bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset;
|
||||
|
||||
DEREF(dst).bvh_ptr = addr_to_node(src.base_ptr + bvh_offset);
|
||||
DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask;
|
||||
DEREF(dst).sbt_offset_and_flags = encode_sbt_offset_and_flags(src.sbt_offset_and_flags);
|
||||
DEREF(dst).instance_id = src.instance_id;
|
||||
DEREF(dst).bvh_offset = bvh_offset;
|
||||
|
||||
mat4 transform = mat4(src.otw_matrix);
|
||||
|
||||
mat4 inv_transform = transpose(inverse(transpose(transform)));
|
||||
DEREF(dst).wto_matrix = mat3x4(inv_transform);
|
||||
DEREF(dst).otw_matrix = mat3x4(transform);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
main()
|
||||
{
|
||||
/* Revert the order so we start at the root */
|
||||
uint32_t global_id = DEREF(args.header).ir_internal_node_count - 1 - gl_GlobalInvocationID.x;
|
||||
|
||||
uint32_t intermediate_leaf_node_size;
|
||||
uint32_t output_leaf_node_size;
|
||||
switch (args.geometry_type) {
|
||||
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
||||
intermediate_leaf_node_size = SIZEOF(radv_ir_triangle_node);
|
||||
output_leaf_node_size = SIZEOF(radv_bvh_triangle_node);
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
||||
intermediate_leaf_node_size = SIZEOF(radv_ir_aabb_node);
|
||||
output_leaf_node_size = SIZEOF(radv_bvh_aabb_node);
|
||||
break;
|
||||
default: /* instances */
|
||||
intermediate_leaf_node_size = SIZEOF(radv_ir_instance_node);
|
||||
output_leaf_node_size = SIZEOF(radv_bvh_instance_node);
|
||||
break;
|
||||
}
|
||||
|
||||
uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * intermediate_leaf_node_size;
|
||||
uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * SIZEOF(radv_ir_node);
|
||||
uint32_t dst_leaf_offset =
|
||||
id_to_offset(RADV_BVH_ROOT_NODE) + SIZEOF(radv_bvh_box32_node);
|
||||
uint32_t dst_internal_offset = dst_leaf_offset + args.leaf_node_count * output_leaf_node_size;
|
||||
|
|
@ -233,10 +170,8 @@ main()
|
|||
REF(radv_ir_box_node) child_node = REF(radv_ir_box_node)OFFSET(args.intermediate_bvh, offset);
|
||||
DEREF(child_node).bvh_offset = dst_offset;
|
||||
} else {
|
||||
uint32_t child_index = offset / intermediate_leaf_node_size;
|
||||
uint32_t child_index = offset / SIZEOF(radv_ir_node);
|
||||
dst_offset = dst_leaf_offset + child_index * output_leaf_node_size;
|
||||
|
||||
encode_leaf_node(type, args.intermediate_bvh + offset, args.output_bvh + dst_offset);
|
||||
}
|
||||
|
||||
radv_aabb child_aabb =
|
||||
|
|
|
|||
|
|
@ -209,7 +209,7 @@ build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, uint32_t global_id)
|
|||
vertices.vertex[i] = transform * vertices.vertex[i];
|
||||
}
|
||||
|
||||
REF(radv_ir_triangle_node) node = REF(radv_ir_triangle_node)(dst_ptr);
|
||||
REF(radv_bvh_triangle_node) node = REF(radv_bvh_triangle_node)(dst_ptr);
|
||||
|
||||
bounds.min = vec3(INFINITY);
|
||||
bounds.max = vec3(-INFINITY);
|
||||
|
|
@ -221,9 +221,6 @@ build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, uint32_t global_id)
|
|||
bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]);
|
||||
}
|
||||
|
||||
DEREF(node).base.aabb = bounds;
|
||||
DEREF(node).base.cost = 0.0;
|
||||
|
||||
DEREF(node).triangle_id = global_id;
|
||||
DEREF(node).geometry_id_and_flags = args.geometry_id;
|
||||
DEREF(node).id = 9;
|
||||
|
|
@ -234,7 +231,7 @@ build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, uint32_t global_id)
|
|||
bool
|
||||
build_aabb(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id)
|
||||
{
|
||||
REF(radv_ir_aabb_node) node = REF(radv_ir_aabb_node)(dst_ptr);
|
||||
REF(radv_bvh_aabb_node) node = REF(radv_bvh_aabb_node)(dst_ptr);
|
||||
|
||||
for (uint32_t vec = 0; vec < 2; vec++)
|
||||
for (uint32_t comp = 0; comp < 3; comp++) {
|
||||
|
|
@ -252,21 +249,51 @@ build_aabb(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t
|
|||
if (isnan(bounds.min.x))
|
||||
return false;
|
||||
|
||||
DEREF(node).base.aabb = bounds;
|
||||
DEREF(node).base.cost = 0.0;
|
||||
DEREF(node).primitive_id = global_id;
|
||||
DEREF(node).geometry_id_and_flags = args.geometry_id;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
radv_aabb
|
||||
calculate_instance_node_bounds(radv_accel_struct_header header, mat3x4 otw_matrix)
|
||||
{
|
||||
radv_aabb aabb;
|
||||
for (uint32_t comp = 0; comp < 3; ++comp) {
|
||||
aabb.min[comp] = otw_matrix[comp][3];
|
||||
aabb.max[comp] = otw_matrix[comp][3];
|
||||
for (uint32_t col = 0; col < 3; ++col) {
|
||||
aabb.min[comp] +=
|
||||
min(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
|
||||
aabb.max[comp] +=
|
||||
max(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
|
||||
}
|
||||
}
|
||||
return aabb;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
encode_sbt_offset_and_flags(uint32_t src)
|
||||
{
|
||||
uint32_t flags = src >> 24;
|
||||
uint32_t ret = src & 0xffffffu;
|
||||
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0)
|
||||
ret |= RADV_INSTANCE_FORCE_OPAQUE;
|
||||
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0)
|
||||
ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE;
|
||||
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0)
|
||||
ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE;
|
||||
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0)
|
||||
ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool
|
||||
build_instance(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id)
|
||||
{
|
||||
REF(radv_ir_instance_node) node = REF(radv_ir_instance_node)(dst_ptr);
|
||||
REF(radv_bvh_instance_node) node = REF(radv_bvh_instance_node)(dst_ptr);
|
||||
|
||||
AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr));
|
||||
DEREF(node).base_ptr = instance.accelerationStructureReference;
|
||||
|
||||
/* An inactive instance is one whose acceleration structure handle is VK_NULL_HANDLE. Since the active terminology is
|
||||
* only relevant for BVH updates, which we do not implement, we can also skip instances with mask == 0.
|
||||
|
|
@ -274,20 +301,23 @@ build_instance(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint3
|
|||
if (instance.accelerationStructureReference == 0 || instance.custom_instance_and_mask < (1u << 24u))
|
||||
return false;
|
||||
|
||||
DEREF(node).otw_matrix = instance.transform;
|
||||
|
||||
radv_accel_struct_header instance_header =
|
||||
DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference));
|
||||
|
||||
bounds = calculate_instance_node_bounds(DEREF(node).base_ptr, DEREF(node).otw_matrix);
|
||||
DEREF(node).bvh_ptr = addr_to_node(instance.accelerationStructureReference + instance_header.bvh_offset);
|
||||
DEREF(node).bvh_offset = instance_header.bvh_offset;
|
||||
|
||||
mat4 transform = mat4(instance.transform);
|
||||
mat4 inv_transform = transpose(inverse(transpose(transform)));
|
||||
DEREF(node).wto_matrix = mat3x4(inv_transform);
|
||||
DEREF(node).otw_matrix = mat3x4(transform);
|
||||
|
||||
bounds = calculate_instance_node_bounds(instance_header, mat3x4(transform));
|
||||
|
||||
DEREF(node).custom_instance_and_mask = instance.custom_instance_and_mask;
|
||||
DEREF(node).sbt_offset_and_flags = instance.sbt_offset_and_flags;
|
||||
DEREF(node).sbt_offset_and_flags = encode_sbt_offset_and_flags(instance.sbt_offset_and_flags);
|
||||
DEREF(node).instance_id = global_id;
|
||||
|
||||
DEREF(node).base.aabb = bounds;
|
||||
DEREF(node).base.cost = 0.0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -303,13 +333,13 @@ main(void)
|
|||
uint32_t dst_stride;
|
||||
uint32_t node_type;
|
||||
if (args.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) {
|
||||
dst_stride = SIZEOF(radv_ir_triangle_node);
|
||||
dst_stride = SIZEOF(radv_bvh_triangle_node);
|
||||
node_type = radv_ir_node_triangle;
|
||||
} else if (args.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) {
|
||||
dst_stride = SIZEOF(radv_ir_aabb_node);
|
||||
dst_stride = SIZEOF(radv_bvh_aabb_node);
|
||||
node_type = radv_ir_node_aabb;
|
||||
} else {
|
||||
dst_stride = SIZEOF(radv_ir_instance_node);
|
||||
dst_stride = SIZEOF(radv_bvh_instance_node);
|
||||
node_type = radv_ir_node_instance;
|
||||
}
|
||||
|
||||
|
|
@ -333,7 +363,14 @@ main(void)
|
|||
is_active = build_instance(bounds, src_ptr, dst_ptr, global_id);
|
||||
}
|
||||
|
||||
DEREF(id_ptr).id = is_active ? pack_ir_node_id(dst_offset, node_type) : RADV_BVH_INVALID_NODE;
|
||||
if (is_active) {
|
||||
REF(radv_ir_node) ir_node = INDEX(radv_ir_node, args.ir, primitive_id);
|
||||
DEREF(ir_node).aabb = bounds;
|
||||
DEREF(ir_node).cost = 0.0;
|
||||
}
|
||||
|
||||
uint32_t ir_offset = primitive_id * SIZEOF(radv_ir_node);
|
||||
DEREF(id_ptr).id = is_active ? pack_ir_node_id(ir_offset, node_type) : RADV_BVH_INVALID_NODE;
|
||||
|
||||
uvec4 ballot = subgroupBallot(is_active);
|
||||
if (subgroupElect())
|
||||
|
|
|
|||
|
|
@ -157,18 +157,14 @@ get_build_layout(struct radv_device *device, uint32_t leaf_count,
|
|||
}
|
||||
|
||||
uint32_t bvh_leaf_size;
|
||||
uint32_t ir_leaf_size;
|
||||
switch (geometry_type) {
|
||||
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
||||
ir_leaf_size = sizeof(struct radv_ir_triangle_node);
|
||||
bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
||||
ir_leaf_size = sizeof(struct radv_ir_aabb_node);
|
||||
bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
|
||||
ir_leaf_size = sizeof(struct radv_ir_instance_node);
|
||||
bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
|
||||
break;
|
||||
default:
|
||||
|
|
@ -242,7 +238,7 @@ get_build_layout(struct radv_device *device, uint32_t leaf_count,
|
|||
offset += MAX3(requirements.internal_size, ploc_scratch_space, lbvh_node_space);
|
||||
|
||||
scratch->ir_offset = offset;
|
||||
offset += ir_leaf_size * leaf_count;
|
||||
offset += sizeof(struct radv_ir_node) * leaf_count;
|
||||
|
||||
scratch->internal_node_offset = offset;
|
||||
offset += sizeof(struct radv_ir_box_node) * internal_count;
|
||||
|
|
@ -639,8 +635,11 @@ build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount,
|
|||
radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cmd_buffer->device->meta_state.accel_struct_build.leaf_pipeline);
|
||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
||||
RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
|
||||
|
||||
struct leaf_args leaf_consts = {
|
||||
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
|
||||
.ir = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
|
||||
.bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.leaf_nodes_offset,
|
||||
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
|
||||
.ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue