anv: fix incorrect flushing on shader query copy

When doing query result copies in 3D mode, we're flushing the render
target cache, but the shader writes go through the dataport.

Fixes flakes/fails in piglit with shader query copies forced with Zink :

  $ query_copy_with_shader_threshold=0 ./bin/arb_query_buffer_object-coherency -auto -fbo

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: b3b12c2c27 ("anv: enable CmdCopyQueryPoolResults to use shader for copies")
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26797>
(cherry picked from commit c53a4711cb77fdf19b93797106b2ddf846c32d37)
This commit is contained in:
Lionel Landwerlin 2023-11-15 09:33:46 +02:00 committed by Eric Engestrom
parent 48608401a3
commit a2a141dffa
3 changed files with 42 additions and 6 deletions

View file

@ -1104,7 +1104,7 @@
"description": "anv: fix incorrect flushing on shader query copy", "description": "anv: fix incorrect flushing on shader query copy",
"nominated": true, "nominated": true,
"nomination_type": 1, "nomination_type": 1,
"resolution": 0, "resolution": 1,
"main_sha": null, "main_sha": null,
"because_sha": "b3b12c2c27fdd42668c041dd5428603d6cee4eb4", "because_sha": "b3b12c2c27fdd42668c041dd5428603d6cee4eb4",
"notes": null "notes": null

View file

@ -4164,6 +4164,13 @@ mask_is_write(const VkAccessFlags2 access)
VK_ACCESS_2_OPTICAL_FLOW_WRITE_BIT_NV); VK_ACCESS_2_OPTICAL_FLOW_WRITE_BIT_NV);
} }
static inline bool
mask_is_transfer_write(const VkAccessFlags2 access)
{
return access & (VK_ACCESS_2_TRANSFER_WRITE_BIT |
VK_ACCESS_2_MEMORY_WRITE_BIT);
}
static void static void
cmd_buffer_barrier_video(struct anv_cmd_buffer *cmd_buffer, cmd_buffer_barrier_video(struct anv_cmd_buffer *cmd_buffer,
const VkDependencyInfo *dep_info) const VkDependencyInfo *dep_info)
@ -4327,6 +4334,16 @@ cmd_buffer_barrier_blitter(struct anv_cmd_buffer *cmd_buffer,
#endif #endif
} }
static inline bool
cmd_buffer_has_pending_copy_query(struct anv_cmd_buffer *cmd_buffer)
{
/* Query copies are only written with dataport, so we only need to check
* that flag.
*/
return (cmd_buffer->state.queries.buffer_write_bits &
ANV_QUERY_WRITES_DATA_FLUSH) != 0;
}
static void static void
cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
const VkDependencyInfo *dep_info, const VkDependencyInfo *dep_info,
@ -4352,6 +4369,7 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
VkAccessFlags2 dst_flags = 0; VkAccessFlags2 dst_flags = 0;
bool apply_sparse_flushes = false; bool apply_sparse_flushes = false;
bool flush_query_copies = false;
for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) { for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask; src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask;
@ -4367,6 +4385,11 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
ANV_QUERY_COMPUTE_WRITES_PENDING_BITS; ANV_QUERY_COMPUTE_WRITES_PENDING_BITS;
} }
if (stage_is_transfer(dep_info->pMemoryBarriers[i].srcStageMask) &&
mask_is_transfer_write(dep_info->pMemoryBarriers[i].srcAccessMask) &&
cmd_buffer_has_pending_copy_query(cmd_buffer))
flush_query_copies = true;
/* There's no way of knowing if this memory barrier is related to sparse /* There's no way of knowing if this memory barrier is related to sparse
* buffers! This is pretty horrible. * buffers! This is pretty horrible.
*/ */
@ -4392,6 +4415,11 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
ANV_QUERY_COMPUTE_WRITES_PENDING_BITS; ANV_QUERY_COMPUTE_WRITES_PENDING_BITS;
} }
if (stage_is_transfer(buf_barrier->srcStageMask) &&
mask_is_transfer_write(buf_barrier->srcAccessMask) &&
cmd_buffer_has_pending_copy_query(cmd_buffer))
flush_query_copies = true;
if (anv_buffer_is_sparse(buffer) && mask_is_write(src_flags)) if (anv_buffer_is_sparse(buffer) && mask_is_write(src_flags))
apply_sparse_flushes = true; apply_sparse_flushes = true;
} }
@ -4488,6 +4516,14 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
if (apply_sparse_flushes) if (apply_sparse_flushes)
bits |= ANV_PIPE_FLUSH_BITS; bits |= ANV_PIPE_FLUSH_BITS;
/* Copies from query pools are executed with a shader writing through the
* dataport.
*/
if (flush_query_copies) {
bits |= (GFX_VER >= 12 ?
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT : ANV_PIPE_DATA_CACHE_FLUSH_BIT);
}
if (dst_flags & VK_ACCESS_INDIRECT_COMMAND_READ_BIT) if (dst_flags & VK_ACCESS_INDIRECT_COMMAND_READ_BIT)
genX(cmd_buffer_flush_generated_draws)(cmd_buffer); genX(cmd_buffer_flush_generated_draws)(cmd_buffer);

View file

@ -1812,11 +1812,11 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
genX(emit_simple_shader_dispatch)(&state, query_count, push_data_state); genX(emit_simple_shader_dispatch)(&state, query_count, push_data_state);
anv_add_pending_pipe_bits(cmd_buffer, /* The query copy result shader is writing using the dataport, flush
cmd_buffer->state.current_pipeline == GPGPU ? * HDC/Data cache depending on the generation. Also stall at pixel
ANV_QUERY_COMPUTE_WRITES_PENDING_BITS : * scoreboard in case we're doing the copy with a fragment shader.
ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(device->info), */
"after query copy results"); cmd_buffer->state.queries.buffer_write_bits |= ANV_QUERY_WRITES_DATA_FLUSH;
trace_intel_end_query_copy_shader(&cmd_buffer->trace, query_count); trace_intel_end_query_copy_shader(&cmd_buffer->trace, query_count);
} }