anv: fix incorrect flushing on shader query copy

When doing query result copies in 3D mode, we're flushing the render
target cache, but the shader writes go through the dataport.

Fixes flakes/fails in piglit with shader query copies forced with Zink :

  $ query_copy_with_shader_threshold=0 ./bin/arb_query_buffer_object-coherency -auto -fbo

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: b3b12c2c27 ("anv: enable CmdCopyQueryPoolResults to use shader for copies")
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26797>
(cherry picked from commit c53a4711cb77fdf19b93797106b2ddf846c32d37)
This commit is contained in:
Lionel Landwerlin 2023-11-15 09:33:46 +02:00 committed by Eric Engestrom
parent 48608401a3
commit a2a141dffa
3 changed files with 42 additions and 6 deletions

View file

@ -1104,7 +1104,7 @@
"description": "anv: fix incorrect flushing on shader query copy",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "b3b12c2c27fdd42668c041dd5428603d6cee4eb4",
"notes": null

View file

@ -4164,6 +4164,13 @@ mask_is_write(const VkAccessFlags2 access)
VK_ACCESS_2_OPTICAL_FLOW_WRITE_BIT_NV);
}
static inline bool
mask_is_transfer_write(const VkAccessFlags2 access)
{
return access & (VK_ACCESS_2_TRANSFER_WRITE_BIT |
VK_ACCESS_2_MEMORY_WRITE_BIT);
}
static void
cmd_buffer_barrier_video(struct anv_cmd_buffer *cmd_buffer,
const VkDependencyInfo *dep_info)
@ -4327,6 +4334,16 @@ cmd_buffer_barrier_blitter(struct anv_cmd_buffer *cmd_buffer,
#endif
}
static inline bool
cmd_buffer_has_pending_copy_query(struct anv_cmd_buffer *cmd_buffer)
{
/* Query copies are only written with dataport, so we only need to check
* that flag.
*/
return (cmd_buffer->state.queries.buffer_write_bits &
ANV_QUERY_WRITES_DATA_FLUSH) != 0;
}
static void
cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
const VkDependencyInfo *dep_info,
@ -4352,6 +4369,7 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
VkAccessFlags2 dst_flags = 0;
bool apply_sparse_flushes = false;
bool flush_query_copies = false;
for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask;
@ -4367,6 +4385,11 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
ANV_QUERY_COMPUTE_WRITES_PENDING_BITS;
}
if (stage_is_transfer(dep_info->pMemoryBarriers[i].srcStageMask) &&
mask_is_transfer_write(dep_info->pMemoryBarriers[i].srcAccessMask) &&
cmd_buffer_has_pending_copy_query(cmd_buffer))
flush_query_copies = true;
/* There's no way of knowing if this memory barrier is related to sparse
* buffers! This is pretty horrible.
*/
@ -4392,6 +4415,11 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
ANV_QUERY_COMPUTE_WRITES_PENDING_BITS;
}
if (stage_is_transfer(buf_barrier->srcStageMask) &&
mask_is_transfer_write(buf_barrier->srcAccessMask) &&
cmd_buffer_has_pending_copy_query(cmd_buffer))
flush_query_copies = true;
if (anv_buffer_is_sparse(buffer) && mask_is_write(src_flags))
apply_sparse_flushes = true;
}
@ -4488,6 +4516,14 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
if (apply_sparse_flushes)
bits |= ANV_PIPE_FLUSH_BITS;
/* Copies from query pools are executed with a shader writing through the
* dataport.
*/
if (flush_query_copies) {
bits |= (GFX_VER >= 12 ?
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT : ANV_PIPE_DATA_CACHE_FLUSH_BIT);
}
if (dst_flags & VK_ACCESS_INDIRECT_COMMAND_READ_BIT)
genX(cmd_buffer_flush_generated_draws)(cmd_buffer);

View file

@ -1812,11 +1812,11 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
genX(emit_simple_shader_dispatch)(&state, query_count, push_data_state);
anv_add_pending_pipe_bits(cmd_buffer,
cmd_buffer->state.current_pipeline == GPGPU ?
ANV_QUERY_COMPUTE_WRITES_PENDING_BITS :
ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(device->info),
"after query copy results");
/* The query copy result shader is writing using the dataport, flush
* HDC/Data cache depending on the generation. Also stall at pixel
* scoreboard in case we're doing the copy with a fragment shader.
*/
cmd_buffer->state.queries.buffer_write_bits |= ANV_QUERY_WRITES_DATA_FLUSH;
trace_intel_end_query_copy_shader(&cmd_buffer->trace, query_count);
}