From 6f3dfba80309f71f2819c67c00bffc4e52ff7554 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sun, 24 Sep 2023 22:38:47 +0300 Subject: [PATCH] anv: document the draw indirect optimization ring mode Signed-off-by: Lionel Landwerlin Reviewed-by: Ivan Briano Tested-by: Felix DeGrood Part-of: --- docs/drivers/anv.rst | 65 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/docs/drivers/anv.rst b/docs/drivers/anv.rst index 5723404ff23..3fa2e7b6252 100644 --- a/docs/drivers/anv.rst +++ b/docs/drivers/anv.rst @@ -334,11 +334,60 @@ only ``3DPRIMITIVE`` instructions and doesn't do any data loading from memory or touch HW registers, feeding the 3D pipeline as fast as it can. -In ANV this implemented by using a side batch buffer. When ANV -encounters the first indirect draws, it generates a jump into the side -batch, the side batch contains a draw call using a generation shader -for each indirect draw. We keep adding on more generation draws into -the batch until we have to stop due to command buffer end, secondary -command buffer calls or a barrier containing the access flag -``VK_ACCESS_INDIRECT_COMMAND_READ_BIT``. The side batch buffer jump -back right after the instruction where it was called. +In ANV this implemented in 2 different ways : + +By generating instructions directly into the command stream using a +side batch buffer. When ANV encounters the first indirect draws, it +generates a jump into the side batch, the side batch contains a draw +call using a generation shader for each indirect draw. We keep adding +on more generation draws into the batch until we have to stop due to +command buffer end, secondary command buffer calls or a barrier +containing the access flag ``VK_ACCESS_INDIRECT_COMMAND_READ_BIT``. +The side batch buffer jump back right after the instruction where it +was called. Here is a high level diagram showing how the generation +batch buffer writes in the main command buffer : + +.. graphviz:: + + digraph commands_mode { + rankdir = "LR" + "main-command-buffer" [ + label = "main command buffer|...|draw indirect0 start|jump to\ngeneration batch||empty instruction0|empty instruction1|...|draw indirect0 end|...|draw indirect1 start|empty instruction0|empty instruction1|...|draw indirect1 end|..." + shape = "record" + ]; + "generation-command-buffer" [ + label = "generation command buffer||write draw indirect0|write draw indirect1|...|exit jump" + shape = "record" + ]; + "main-command-buffer":f0 -> "generation-command-buffer":f0; + "generation-command-buffer":f1 -> "main-command-buffer":f2 [color="#0000ff"]; + "generation-command-buffer":f1 -> "main-command-buffer":f3 [color="#0000ff"]; + "generation-command-buffer":f2 -> "main-command-buffer":f4 [color="#0000ff"]; + "generation-command-buffer":f2 -> "main-command-buffer":f5 [color="#0000ff"]; + "generation-command-buffer":f3 -> "main-command-buffer":f1; + } + +By generating instructions into a ring buffer of commands, when the +draw count number is high. This solution allows smaller batches to be +emitted. Here is a high level diagram showing how things are +executed : + +.. graphviz:: + + digraph ring_mode { + rankdir=LR; + "main-command-buffer" [ + label = "main command buffer|...| draw indirect |generation shader| jump to ring| increment\ndraw_base|..." + shape = "record" + ]; + "ring-buffer" [ + label = "ring buffer|generated draw0|generated draw1|generated draw2|...|exit jump" + shape = "record" + ]; + "main-command-buffer":f2 -> "ring-buffer":f0; + "ring-buffer":f3 -> "main-command-buffer":f3; + "ring-buffer":f3 -> "main-command-buffer":f4; + "main-command-buffer":f3 -> "main-command-buffer":f1; + "main-command-buffer":f1 -> "ring-buffer":f1 [color="#0000ff"]; + "main-command-buffer":f1 -> "ring-buffer":f2 [color="#0000ff"]; + }