radv: Remove the BVH depth heuristics
It only helps Quake II RTX and hurts everything else. Reviewed-by: Friedrich Vock <friedrich.vock@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26481>
This commit is contained in:
parent
719619c477
commit
2e4951d3fb
6 changed files with 5 additions and 76 deletions
|
|
@ -107,8 +107,6 @@ struct radv_accel_struct_header {
|
|||
|
||||
struct radv_ir_node {
|
||||
radv_aabb aabb;
|
||||
/* Generic normalized cost of not merging this node. */
|
||||
float cost;
|
||||
};
|
||||
|
||||
#define RADV_UNKNOWN_BVH_OFFSET 0xFFFFFFFF
|
||||
|
|
|
|||
|
|
@ -366,7 +366,6 @@ main(void)
|
|||
if (is_active) {
|
||||
REF(radv_ir_node) ir_node = INDEX(radv_ir_node, args.ir, primitive_id);
|
||||
DEREF(ir_node).aabb = bounds;
|
||||
DEREF(ir_node).cost = 0.0;
|
||||
}
|
||||
|
||||
uint32_t ir_offset = primitive_id * SIZEOF(radv_ir_node);
|
||||
|
|
|
|||
|
|
@ -63,12 +63,7 @@ bvh_shaders = [
|
|||
[
|
||||
'ploc_internal.comp',
|
||||
'ploc_internal',
|
||||
['EXTENDED_SAH=0'],
|
||||
],
|
||||
[
|
||||
'ploc_internal.comp',
|
||||
'ploc_internal_extended',
|
||||
['EXTENDED_SAH=1'],
|
||||
[],
|
||||
],
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -116,8 +116,6 @@ push_node(uint32_t children[2], radv_aabb bounds[2])
|
|||
total_bounds.min = vec3(INFINITY);
|
||||
total_bounds.max = vec3(-INFINITY);
|
||||
|
||||
float cost = 0.0;
|
||||
|
||||
for (uint i = 0; i < 2; ++i) {
|
||||
VOID_REF node = OFFSET(args.bvh, ir_id_to_offset(children[i]));
|
||||
REF(radv_ir_node) child = REF(radv_ir_node)(node);
|
||||
|
|
@ -125,15 +123,10 @@ push_node(uint32_t children[2], radv_aabb bounds[2])
|
|||
total_bounds.min = min(total_bounds.min, bounds[i].min);
|
||||
total_bounds.max = max(total_bounds.max, bounds[i].max);
|
||||
|
||||
cost += DEREF(child).cost;
|
||||
|
||||
DEREF(dst_node).children[i] = children[i];
|
||||
}
|
||||
|
||||
DEREF(dst_node).base.aabb = total_bounds;
|
||||
#if EXTENDED_SAH
|
||||
DEREF(dst_node).base.cost = cost * 0.5 + BVH_LEVEL_COST;
|
||||
#endif
|
||||
DEREF(dst_node).bvh_offset = RADV_UNKNOWN_BVH_OFFSET;
|
||||
return dst_id;
|
||||
}
|
||||
|
|
@ -159,9 +152,6 @@ decode_neighbour_offset(uint32_t encoded_offset)
|
|||
#define NUM_PLOC_LDS_ITEMS PLOC_WORKGROUP_SIZE + 4 * PLOC_NEIGHBOURHOOD
|
||||
|
||||
shared radv_aabb shared_bounds[NUM_PLOC_LDS_ITEMS];
|
||||
#if EXTENDED_SAH
|
||||
shared float shared_costs[NUM_PLOC_LDS_ITEMS];
|
||||
#endif
|
||||
shared uint32_t nearest_neighbour_indices[NUM_PLOC_LDS_ITEMS];
|
||||
|
||||
uint32_t
|
||||
|
|
@ -187,9 +177,6 @@ load_bounds(VOID_REF ids, uint32_t iter, uint32_t task_index, uint32_t lds_base,
|
|||
REF(radv_ir_node) node = REF(radv_ir_node)(addr);
|
||||
|
||||
shared_bounds[i - lds_base] = DEREF(node).aabb;
|
||||
#if EXTENDED_SAH
|
||||
shared_costs[i - lds_base] = DEREF(node).cost;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -199,34 +186,7 @@ combined_node_cost(uint32_t lds_base, uint32_t i, uint32_t j)
|
|||
radv_aabb combined_bounds;
|
||||
combined_bounds.min = min(shared_bounds[i - lds_base].min, shared_bounds[j - lds_base].min);
|
||||
combined_bounds.max = max(shared_bounds[i - lds_base].max, shared_bounds[j - lds_base].max);
|
||||
float area = aabb_surface_area(combined_bounds);
|
||||
|
||||
#if EXTENDED_SAH
|
||||
if (area == 0.0)
|
||||
return 0.0;
|
||||
|
||||
/* p_a and p_b are the probabilities that i or j are hit by a ray:
|
||||
* Assuming that the current node is hit (p = 1) and the probability of hitting a node
|
||||
* is proportional to its surface area, p = area * c with p = 1 for the current node.
|
||||
* -> c = 1 / area
|
||||
*
|
||||
* We can use those probabilities to limit the impact of child cost to be proportional to
|
||||
* its hit probability. (Child cost is the cost of not merging a node which increases with
|
||||
* tree depth for internal nodes)
|
||||
*
|
||||
* Dividing area by both relative costs will make it more likely that we merge nodes with
|
||||
* a high child cost.
|
||||
*/
|
||||
float p_i = aabb_surface_area(shared_bounds[i - lds_base]) / area;
|
||||
float p_j = aabb_surface_area(shared_bounds[j - lds_base]) / area;
|
||||
|
||||
float combined_cost =
|
||||
(1.0 + shared_costs[i - lds_base] * p_i) * (1.0 + shared_costs[j - lds_base] * p_j);
|
||||
|
||||
return area / combined_cost;
|
||||
#else
|
||||
return area;
|
||||
#endif
|
||||
return aabb_surface_area(combined_bounds);
|
||||
}
|
||||
|
||||
shared uint32_t shared_aggregate_sum;
|
||||
|
|
|
|||
|
|
@ -57,10 +57,6 @@ static const uint32_t ploc_spv[] = {
|
|||
#include "bvh/ploc_internal.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t ploc_extended_spv[] = {
|
||||
#include "bvh/ploc_internal_extended.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t copy_spv[] = {
|
||||
#include "bvh/copy.spv.h"
|
||||
};
|
||||
|
|
@ -87,7 +83,6 @@ enum internal_build_type {
|
|||
|
||||
struct build_config {
|
||||
enum internal_build_type internal_type;
|
||||
bool extended_sah;
|
||||
bool compact;
|
||||
};
|
||||
|
||||
|
|
@ -129,11 +124,6 @@ build_config(uint32_t leaf_count, const VkAccelerationStructureBuildGeometryInfo
|
|||
else
|
||||
config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
|
||||
|
||||
/* 4^(lds stack entry count) assuming we push 1 node on average. */
|
||||
uint32_t lds_spill_threshold = 1 << (8 * 2);
|
||||
if (leaf_count < lds_spill_threshold)
|
||||
config.extended_sah = true;
|
||||
|
||||
if (build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR)
|
||||
config.compact = true;
|
||||
|
||||
|
|
@ -306,7 +296,6 @@ radv_device_finish_accel_struct_build_state(struct radv_device *device)
|
|||
struct radv_meta_state *state = &device->meta_state;
|
||||
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.copy_pipeline, &state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.ploc_pipeline, &state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.ploc_extended_pipeline, &state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.lbvh_generate_ir_pipeline,
|
||||
&state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.lbvh_main_pipeline, &state->alloc);
|
||||
|
|
@ -544,12 +533,6 @@ radv_device_init_accel_struct_build_state(struct radv_device *device)
|
|||
if (result != VK_SUCCESS)
|
||||
goto exit;
|
||||
|
||||
result = create_build_pipeline_spv(device, ploc_extended_spv, sizeof(ploc_extended_spv), sizeof(struct ploc_args),
|
||||
&device->meta_state.accel_struct_build.ploc_extended_pipeline,
|
||||
&device->meta_state.accel_struct_build.ploc_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto exit;
|
||||
|
||||
result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args),
|
||||
&device->meta_state.accel_struct_build.encode_pipeline,
|
||||
&device->meta_state.accel_struct_build.encode_p_layout);
|
||||
|
|
@ -1004,19 +987,15 @@ lbvh_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
|
|||
|
||||
static void
|
||||
ploc_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
|
||||
bool extended_sah)
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
extended_sah ? cmd_buffer->device->meta_state.accel_struct_build.ploc_extended_pipeline
|
||||
: cmd_buffer->device->meta_state.accel_struct_build.ploc_pipeline);
|
||||
cmd_buffer->device->meta_state.accel_struct_build.ploc_pipeline);
|
||||
|
||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
||||
if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_PLOC)
|
||||
continue;
|
||||
if (bvh_states[i].config.extended_sah != extended_sah)
|
||||
continue;
|
||||
|
||||
uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
|
||||
uint32_t dst_scratch_offset = (src_scratch_offset == bvh_states[i].scratch.sort_buffer_offset[0])
|
||||
|
|
@ -1242,8 +1221,7 @@ radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t i
|
|||
|
||||
lbvh_build_internal(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
|
||||
|
||||
ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states, false);
|
||||
ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states, true);
|
||||
ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states);
|
||||
|
||||
cmd_buffer->state.flush_bits |= flush_bits;
|
||||
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@ struct radv_meta_state {
|
|||
VkPipeline lbvh_generate_ir_pipeline;
|
||||
VkPipelineLayout ploc_p_layout;
|
||||
VkPipeline ploc_pipeline;
|
||||
VkPipeline ploc_extended_pipeline;
|
||||
VkPipelineLayout encode_p_layout;
|
||||
VkPipeline encode_pipeline;
|
||||
VkPipeline encode_compact_pipeline;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue