From 1c6c8ce54b4a17c7462fa9030bcba91a6b2a7fc3 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Aug 2022 11:57:36 +0200 Subject: [PATCH] tu: Make MSAA emission always dynamic This wasn't taking into account the dynamic primitive topology, and it was suboptimal with dynamic rendering, because we don't know when compiling the pipeline whether variable multisample rate is being used. It's going to be even more difficult to support the current approach with graphics pipeline library because the MSAA state is derived from mulisample state, rasterization state, input assembly state, and tessellation state, which may be in different pipelines. Just set it dynamically based on the pipeline and re-emit it when the pipeline's MSAA or rectangular/bresenham state differs. Part-of: --- src/freedreno/vulkan/tu_clear_blit.c | 3 +- src/freedreno/vulkan/tu_cmd_buffer.c | 93 ++++++++++++++++------------ src/freedreno/vulkan/tu_cmd_buffer.h | 6 +- src/freedreno/vulkan/tu_pipeline.c | 51 ++++----------- src/freedreno/vulkan/tu_pipeline.h | 3 +- 5 files changed, 76 insertions(+), 80 deletions(-) diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 0720ec2092e..3d6c004e244 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -802,8 +802,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, } } - cmd->state.line_mode = RECTANGULAR; - tu6_emit_msaa(cs, samples, cmd->state.line_mode); + tu6_emit_msaa(cs, samples, false); } static void diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 5ac898f9128..ff161b6be1f 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -323,29 +323,6 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, tu_cs_emit_regs(cs, A6XX_GRAS_MAX_LAYER_INDEX(layers - 1)); } -void -tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits vk_samples, - enum a5xx_line_mode line_mode) -{ - const enum a3xx_msaa_samples samples = tu_msaa_samples(vk_samples); - bool msaa_disable = (samples == MSAA_ONE) || (line_mode == BRESENHAM); - - tu_cs_emit_regs(cs, - A6XX_SP_TP_RAS_MSAA_CNTL(samples), - A6XX_SP_TP_DEST_MSAA_CNTL(.samples = samples, - .msaa_disable = msaa_disable)); - - tu_cs_emit_regs(cs, - A6XX_GRAS_RAS_MSAA_CNTL(samples), - A6XX_GRAS_DEST_MSAA_CNTL(.samples = samples, - .msaa_disable = msaa_disable)); - - tu_cs_emit_regs(cs, - A6XX_RB_RAS_MSAA_CNTL(samples), - A6XX_RB_DEST_MSAA_CNTL(.samples = samples, - .msaa_disable = msaa_disable)); -} - static void tu6_emit_bin_size(struct tu_cs *cs, uint32_t bin_w, uint32_t bin_h, uint32_t flags) @@ -562,6 +539,52 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state) tu_cs_emit_qw(cs, state.iova); } +void +tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits vk_samples, + bool msaa_disable) +{ + const enum a3xx_msaa_samples samples = tu_msaa_samples(vk_samples); + msaa_disable |= (samples == MSAA_ONE); + tu_cs_emit_regs(cs, + A6XX_SP_TP_RAS_MSAA_CNTL(samples), + A6XX_SP_TP_DEST_MSAA_CNTL(.samples = samples, + .msaa_disable = msaa_disable)); + + tu_cs_emit_regs(cs, + A6XX_GRAS_RAS_MSAA_CNTL(samples), + A6XX_GRAS_DEST_MSAA_CNTL(.samples = samples, + .msaa_disable = msaa_disable)); + + tu_cs_emit_regs(cs, + A6XX_RB_RAS_MSAA_CNTL(samples), + A6XX_RB_DEST_MSAA_CNTL(.samples = samples, + .msaa_disable = msaa_disable)); +} + +static void +tu6_update_msaa(struct tu_cmd_buffer *cmd, VkSampleCountFlagBits samples) +{ + bool is_line = + tu6_primtype_line(cmd->state.primtype) || + (tu6_primtype_patches(cmd->state.primtype) && + cmd->state.pipeline && + cmd->state.pipeline->tess.patch_type == IR3_TESS_ISOLINES); + bool msaa_disable = is_line && cmd->state.line_mode == BRESENHAM; + + if (cmd->state.msaa_disable != msaa_disable || + cmd->state.samples != samples) { + struct tu_cs cs; + cmd->state.msaa = tu_cs_draw_state(&cmd->sub_cs, &cs, 9); + tu6_emit_msaa(&cs, samples, msaa_disable); + if (!(cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE)) { + tu_cs_emit_pkt7(&cmd->draw_cs, CP_SET_DRAW_STATE, 3); + tu_cs_emit_draw_state(&cmd->draw_cs, TU_DRAW_STATE_MSAA, cmd->state.msaa); + } + cmd->state.msaa_disable = msaa_disable; + cmd->state.samples = samples; + } +} + static bool use_hw_binning(struct tu_cmd_buffer *cmd) { @@ -2552,20 +2575,12 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, tu_cs_emit(cs, subdraw_size); } - if (cmd->state.line_mode != pipeline->rast.line_mode) { - cmd->state.line_mode = pipeline->rast.line_mode; + cmd->state.line_mode = pipeline->rast.line_mode; + if (!(pipeline->dynamic_state_mask & + BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY))) + cmd->state.primtype = pipeline->ia.primtype; - /* We have to disable MSAA when bresenham lines are used, this is - * a hardware limitation and spec allows it: - * - * When Bresenham lines are being rasterized, sample locations may - * all be treated as being at the pixel center (this may affect - * attribute and depth interpolation). - */ - if (cmd->state.subpass && cmd->state.subpass->samples) { - tu6_emit_msaa(cs, cmd->state.subpass->samples, cmd->state.line_mode); - } - } + tu6_update_msaa(cmd, pipeline->output.samples); if ((pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_VIEWPORT)) && (pipeline->viewport.z_negative_one_to_one != cmd->state.z_negative_one_to_one)) { @@ -2824,6 +2839,7 @@ tu_CmdSetPrimitiveTopologyEXT(VkCommandBuffer commandBuffer, TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); cmd->state.primtype = tu6_primtype(primitiveTopology); + tu6_update_msaa(cmd, cmd->state.samples); } VKAPI_ATTR void VKAPI_CALL @@ -3774,8 +3790,8 @@ tu_emit_subpass_begin(struct tu_cmd_buffer *cmd) { tu6_emit_zs(cmd, cmd->state.subpass, &cmd->draw_cs); tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs); - if (cmd->state.subpass->samples) - tu6_emit_msaa(&cmd->draw_cs, cmd->state.subpass->samples, cmd->state.line_mode); + if (cmd->state.subpass->samples != 0) + tu6_update_msaa(cmd, cmd->state.subpass->samples); tu6_emit_render_cntl(cmd, cmd->state.subpass, &cmd->draw_cs, false); tu_set_input_attachments(cmd, cmd->state.subpass); @@ -4453,6 +4469,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, cmd->state.lrz_and_depth_plane_state); + tu_cs_emit_draw_state(cs, TU_DRAW_STATE_MSAA, cmd->state.msaa); for (uint32_t i = 0; i < ARRAY_SIZE(cmd->state.dynamic_state); i++) { tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index bb20ec809b9..ef679e3c2a8 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -35,6 +35,7 @@ enum tu_draw_state_group_id TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, TU_DRAW_STATE_PRIM_MODE_GMEM, TU_DRAW_STATE_PRIM_MODE_SYSMEM, + TU_DRAW_STATE_MSAA, /* dynamic state related draw states */ TU_DRAW_STATE_DYNAMIC, @@ -342,6 +343,7 @@ struct tu_cmd_state struct tu_draw_state vertex_buffers; struct tu_draw_state shader_const; struct tu_draw_state desc_sets; + struct tu_draw_state msaa; struct tu_draw_state vs_params; @@ -400,6 +402,8 @@ struct tu_cmd_state bool tessfactor_addr_set; bool predication_active; enum a5xx_line_mode line_mode; + VkSampleCountFlagBits samples; + bool msaa_disable; bool z_negative_one_to_one; /* VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT and @@ -645,7 +649,7 @@ tu_get_descriptors_state(struct tu_cmd_buffer *cmd_buffer, } void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples, - enum a5xx_line_mode line_mode); + bool msaa_disable); void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2); diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 5c4a386e394..1c932f6f30b 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -258,9 +258,7 @@ struct tu_pipeline_builder bool rasterizer_discard; /* these states are affectd by rasterizer_discard */ - bool emit_msaa_state; bool depth_clip_disable; - VkSampleCountFlagBits samples; bool use_color_attachments; bool use_dual_src_blend; bool alpha_to_coverage; @@ -3488,23 +3486,18 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, pipeline->rast.line_mode = RECTANGULAR; - if (tu6_primtype_line(pipeline->ia.primtype) || - (tu6_primtype_patches(pipeline->ia.primtype) && - pipeline->tess.patch_type == IR3_TESS_ISOLINES)) { - const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_state = - vk_find_struct_const(rast_info->pNext, - PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); + const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_state = + vk_find_struct_const(rast_info->pNext, + PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); - if (rast_line_state && rast_line_state->lineRasterizationMode == - VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) { - pipeline->rast.line_mode = BRESENHAM; - } + if (rast_line_state && + rast_line_state->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) { + pipeline->rast.line_mode = BRESENHAM; } struct tu_cs cs; uint32_t cs_size = 9 + - (builder->device->physical_device->info->a6xx.has_shading_rate ? 8 : 0) + - (builder->emit_msaa_state ? 11 : 0); + (builder->device->physical_device->info->a6xx.has_shading_rate ? 8 : 0); pipeline->rast.state = tu_cs_draw_state(&pipeline->cs, &cs, cs_size); tu_cs_emit_regs(&cs, @@ -3534,12 +3527,6 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A30()); } - /* If samples count couldn't be devised from the subpass, we should emit it here. - * It happens when subpass doesn't use any color/depth attachment. - */ - if (builder->emit_msaa_state) - tu6_emit_msaa(&cs, builder->samples, pipeline->rast.line_mode); - const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info = vk_find_struct_const(rast_info->pNext, PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT); @@ -3720,12 +3707,16 @@ tu_pipeline_builder_parse_multisample_and_color_blend( * * We leave the relevant registers stale when rasterization is disabled. */ - if (builder->rasterizer_discard) + if (builder->rasterizer_discard) { + pipeline->output.samples = VK_SAMPLE_COUNT_1_BIT; return; + } static const VkPipelineColorBlendStateCreateInfo dummy_blend_info; const VkPipelineMultisampleStateCreateInfo *msaa_info = builder->create_info->pMultisampleState; + pipeline->output.samples = msaa_info->rasterizationSamples; + const VkPipelineColorBlendStateCreateInfo *blend_info = builder->use_color_attachments ? builder->create_info->pColorBlendState : &dummy_blend_info; @@ -4021,16 +4012,6 @@ tu_pipeline_builder_init_graphics( builder->multiview_mask = rendering_info->viewMask; - /* We don't know with dynamic rendering whether the pipeline will be - * used in a render pass with none of attachments enabled, so we have to - * dynamically emit MSAA state. - * - * TODO: Move MSAA state to a separate draw state and emit it - * dynamically only when the sample count is different from the - * subpass's sample count. - */ - builder->emit_msaa_state = !builder->rasterizer_discard; - const VkRenderingSelfDependencyInfoMESA *self_dependency = vk_find_struct_const(rendering_info->pNext, RENDERING_SELF_DEPENDENCY_INFO_MESA); @@ -4073,9 +4054,6 @@ tu_pipeline_builder_init_graphics( builder->multiview_mask = subpass->multiview_mask; - /* variableMultisampleRate support */ - builder->emit_msaa_state = (subpass->samples == 0) && !builder->rasterizer_discard; - if (!builder->rasterizer_discard) { const uint32_t a = subpass->depth_stencil_attachment.attachment; builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ? @@ -4107,10 +4085,7 @@ tu_pipeline_builder_init_graphics( builder->feedback_loop_may_involve_textures = true; } - if (builder->rasterizer_discard) { - builder->samples = VK_SAMPLE_COUNT_1_BIT; - } else { - builder->samples = create_info->pMultisampleState->rasterizationSamples; + if (!builder->rasterizer_discard) { builder->alpha_to_coverage = create_info->pMultisampleState->alphaToCoverageEnable; if (tu_blend_state_is_dual_src(create_info->pColorBlendState)) { diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index f68bfe60ec1..c4796f310bb 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -153,7 +153,6 @@ struct tu_pipeline uint32_t rb_blend_cntl, rb_blend_cntl_mask; uint32_t color_write_enable, blend_enable; bool logic_op_enabled, rop_reads_dst; - } blend; /* Misc. info from the fragment output interface state that is used @@ -167,6 +166,8 @@ struct tu_pipeline bool rb_depth_cntl_disable; + VkSampleCountFlagBits samples; + bool raster_order_attachment_access; bool subpass_feedback_loop_ds; bool feedback_loop_may_involve_textures;