anv: decompress on upload for emulated formats

Add anv_astc_emu_decompress to decompress the raw texel data to the
hidden plane.  Call anv_astc_emu_decompress from anv_CmdCopyImage2 and
anv_CmdCopyBufferToImage2.

v2: support transfer queue and add missing flushes (Lionel)

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25467>
This commit is contained in:
Chia-I Wu 2023-09-28 09:40:36 -07:00 committed by Marge Bot
parent 61e480cb54
commit a73e0e9a04
6 changed files with 305 additions and 10 deletions

View file

@ -0,0 +1,195 @@
/*
* Copyright 2023 Google LLC
* SPDX-License-Identifier: MIT
*/
#include "anv_private.h"
static void
astc_emu_init_image_view(struct anv_cmd_buffer *cmd_buffer,
struct anv_image_view *iview,
struct anv_image *image,
VkFormat format,
VkImageUsageFlags usage,
uint32_t level, uint32_t layer)
{
struct anv_device *device = cmd_buffer->device;
const VkImageViewCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = &(VkImageViewUsageCreateInfo){
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
.usage = usage,
},
.image = anv_image_to_handle(image),
/* XXX we only need 2D but the shader expects 2D_ARRAY */
.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
.format = format,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = level,
.levelCount = 1,
.baseArrayLayer = layer,
.layerCount = 1,
},
};
memset(iview, 0, sizeof(*iview));
anv_image_view_init(device, iview, &create_info,
&cmd_buffer->surface_state_stream);
}
static void
astc_emu_init_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
struct anv_push_descriptor_set *push_set,
const struct vk_texcompress_astc_write_descriptor_set *writes)
{
struct anv_device *device = cmd_buffer->device;
struct anv_descriptor_set_layout *layout =
anv_descriptor_set_layout_from_handle(
device->texcompress_astc->ds_layout);
memset(push_set, 0, sizeof(*push_set));
anv_push_descriptor_set_init(cmd_buffer, push_set, layout);
anv_descriptor_set_write(device, &push_set->set,
ARRAY_SIZE(writes->descriptor_set),
writes->descriptor_set);
}
static void
astc_emu_decompress_slice(struct anv_cmd_buffer *cmd_buffer,
VkFormat astc_format,
VkImageLayout layout,
VkImageView src_view,
VkImageView dst_view,
VkRect2D rect)
{
struct anv_device *device = cmd_buffer->device;
VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer);
VkPipeline pipeline =
vk_texcompress_astc_get_decode_pipeline(&device->vk, &device->vk.alloc,
device->texcompress_astc,
VK_NULL_HANDLE, astc_format);
if (pipeline == VK_NULL_HANDLE) {
anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_UNKNOWN);
return;
}
anv_CmdBindPipeline(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
struct vk_texcompress_astc_write_descriptor_set writes;
vk_texcompress_astc_fill_write_descriptor_sets(device->texcompress_astc,
&writes, src_view, layout,
dst_view, astc_format);
struct anv_push_descriptor_set push_set;
astc_emu_init_push_descriptor_set(cmd_buffer, &push_set, &writes);
VkDescriptorSet set = anv_descriptor_set_to_handle(&push_set.set);
anv_CmdBindDescriptorSets(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE,
device->texcompress_astc->p_layout, 0, 1, &set,
0, NULL);
const uint32_t push_const[] = {
rect.offset.x,
rect.offset.y,
(rect.offset.x + rect.extent.width) *
vk_format_get_blockwidth(astc_format),
(rect.offset.y + rect.extent.height) *
vk_format_get_blockheight(astc_format),
false, /* we don't use VK_IMAGE_VIEW_TYPE_3D */
};
anv_CmdPushConstants(cmd_buffer_, device->texcompress_astc->p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0,
sizeof(push_const), push_const);
/* each workgroup processes 2x2 texel blocks */
rect.extent.width = DIV_ROUND_UP(rect.extent.width, 2);
rect.extent.height = DIV_ROUND_UP(rect.extent.height, 2);
anv_genX(device->info, CmdDispatchBase)(cmd_buffer_, 0, 0, 0,
rect.extent.width,
rect.extent.height,
1);
anv_push_descriptor_set_finish(&push_set);
}
void
anv_astc_emu_decompress(struct anv_cmd_buffer *cmd_buffer,
struct anv_image *image,
VkImageLayout layout,
const VkImageSubresourceLayers *subresource,
VkOffset3D block_offset,
VkExtent3D block_extent)
{
assert(image->emu_plane_format != VK_FORMAT_UNDEFINED);
const VkRect2D rect = {
.offset = {
.x = block_offset.x,
.y = block_offset.y,
},
.extent = {
.width = block_extent.width,
.height = block_extent.height,
},
};
/* decompress one layer at a time because anv_image_fill_surface_state
* requires an uncompressed view of a compressed image to be single layer
*/
const bool is_3d = image->vk.image_type == VK_IMAGE_TYPE_3D;
const uint32_t slice_base = is_3d ?
block_offset.z : subresource->baseArrayLayer;
const uint32_t slice_count = is_3d ?
block_extent.depth : subresource->layerCount;
struct anv_cmd_saved_state saved;
anv_cmd_buffer_save_state(cmd_buffer,
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0 |
ANV_CMD_SAVED_STATE_PUSH_CONSTANTS,
&saved);
for (uint32_t i = 0; i < slice_count; i++) {
struct anv_image_view src_view;
struct anv_image_view dst_view;
astc_emu_init_image_view(cmd_buffer, &src_view, image,
VK_FORMAT_R32G32B32A32_UINT,
VK_IMAGE_USAGE_SAMPLED_BIT,
subresource->mipLevel, slice_base + i);
astc_emu_init_image_view(cmd_buffer, &dst_view, image,
VK_FORMAT_R8G8B8A8_UINT,
VK_IMAGE_USAGE_STORAGE_BIT,
subresource->mipLevel, slice_base + i);
astc_emu_decompress_slice(cmd_buffer, image->vk.format, layout,
anv_image_view_to_handle(&src_view),
anv_image_view_to_handle(&dst_view),
rect);
}
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
}
VkResult
anv_device_init_astc_emu(struct anv_device *device)
{
if (!device->physical->emu_astc_ldr)
return VK_SUCCESS;
return vk_texcompress_astc_init(&device->vk, &device->vk.alloc,
VK_NULL_HANDLE, &device->texcompress_astc);
}
void
anv_device_finish_astc_emu(struct anv_device *device)
{
if (device->texcompress_astc) {
vk_texcompress_astc_finish(&device->vk, &device->vk.alloc,
device->texcompress_astc);
}
}

View file

@ -398,6 +398,28 @@ end_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer,
syncpoint);
}
static bool
anv_blorp_execute_on_companion(struct anv_cmd_buffer *cmd_buffer,
struct anv_image *dst_image)
{
/* MSAA images have to be dealt with on the companion RCS command buffer
* for both CCS && BCS engines.
*/
if ((anv_cmd_buffer_is_blitter_queue(cmd_buffer) ||
anv_cmd_buffer_is_compute_queue(cmd_buffer)) &&
dst_image->vk.samples > 1)
return true;
/* Emulation of formats is done through a compute shader, so we need
* the companion command buffer for the BCS engine.
*/
if (anv_cmd_buffer_is_blitter_queue(cmd_buffer) &&
dst_image->emu_plane_format != VK_FORMAT_UNDEFINED)
return true;
return false;
}
void anv_CmdCopyImage2(
VkCommandBuffer commandBuffer,
const VkCopyImageInfo2* pCopyImageInfo)
@ -407,12 +429,9 @@ void anv_CmdCopyImage2(
ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage);
struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
UNUSED struct anv_state rcs_done = ANV_STATE_NULL;;
UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
if (cmd_buffer->device->info->verx10 >= 125 &&
dst_image->vk.samples > 1 &&
(anv_cmd_buffer_is_blitter_queue(main_cmd_buffer) ||
anv_cmd_buffer_is_compute_queue(main_cmd_buffer))) {
if (anv_blorp_execute_on_companion(cmd_buffer, dst_image)) {
rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
}
@ -429,6 +448,28 @@ void anv_CmdCopyImage2(
anv_blorp_batch_finish(&batch);
if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
const enum anv_pipe_bits pipe_bits =
anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
anv_add_pending_pipe_bits(cmd_buffer, pipe_bits,
"Copy flush before decompression");
for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
const VkImageCopy2 *region = &pCopyImageInfo->pRegions[r];
const VkOffset3D block_offset = vk_image_offset_to_elements(
&dst_image->vk, region->dstOffset);
const VkExtent3D block_extent = vk_image_extent_to_elements(
&src_image->vk, region->extent);
anv_astc_emu_decompress(cmd_buffer, dst_image,
pCopyImageInfo->dstImageLayout,
&region->dstSubresource,
block_offset, block_extent);
}
}
if (rcs_done.alloc_size)
end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
}
@ -563,6 +604,14 @@ void anv_CmdCopyBufferToImage2(
ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage);
struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
if (anv_blorp_execute_on_companion(cmd_buffer, dst_image)) {
rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
}
struct blorp_batch batch;
anv_blorp_batch_init(cmd_buffer, &batch, 0);
@ -573,6 +622,32 @@ void anv_CmdCopyBufferToImage2(
}
anv_blorp_batch_finish(&batch);
if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
const enum anv_pipe_bits pipe_bits =
anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
anv_add_pending_pipe_bits(cmd_buffer, pipe_bits,
"Copy flush before decompression");
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
const VkBufferImageCopy2 *region =
&pCopyBufferToImageInfo->pRegions[r];
const VkOffset3D block_offset = vk_image_offset_to_elements(
&dst_image->vk, region->imageOffset);
const VkExtent3D block_extent = vk_image_extent_to_elements(
&dst_image->vk, region->imageExtent);
anv_astc_emu_decompress(cmd_buffer, dst_image,
pCopyBufferToImageInfo->dstImageLayout,
&region->imageSubresource,
block_offset, block_extent);
}
}
if (rcs_done.alloc_size)
end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
}
static void
@ -1018,10 +1093,7 @@ void anv_CmdClearColorImage(
struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
if (cmd_buffer->device->info->verx10 >= 125 &&
image->vk.samples > 1 &&
(anv_cmd_buffer_is_blitter_queue(main_cmd_buffer) ||
anv_cmd_buffer_is_compute_queue(main_cmd_buffer))) {
if (anv_blorp_execute_on_companion(cmd_buffer, image)) {
rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
}

View file

@ -3546,6 +3546,8 @@ VkResult anv_CreateDevice(
anv_device_init_internal_kernels(device);
anv_device_init_astc_emu(device);
anv_device_perf_init(device);
anv_device_utrace_init(device);
@ -3672,6 +3674,8 @@ void anv_DestroyDevice(
anv_device_finish_rt_shaders(device);
anv_device_finish_astc_emu(device);
anv_device_finish_internal_kernels(device);
vk_pipeline_cache_destroy(device->internal_cache, NULL);

View file

@ -2875,7 +2875,19 @@ anv_image_fill_surface_state(struct anv_device *device,
enum anv_image_view_state_flags flags,
struct anv_surface_state *state_inout)
{
const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
uint32_t plane = anv_image_aspect_to_plane(image, aspect);
if (image->emu_plane_format != VK_FORMAT_UNDEFINED) {
const uint16_t view_bpb = isl_format_get_layout(view_in->format)->bpb;
enum isl_format format =
image->planes[plane].primary_surface.isl.format;
/* redirect to the hidden plane if not size-compatible */
if (isl_format_get_layout(format)->bpb != view_bpb) {
plane = image->n_planes;
format = image->planes[plane].primary_surface.isl.format;
assert(isl_format_get_layout(format)->bpb == view_bpb);
}
}
const struct anv_surface *surface = &image->planes[plane].primary_surface,
*aux_surface = &image->planes[plane].aux_surface;

View file

@ -1670,6 +1670,8 @@ struct anv_device {
* resources but never use them.
*/
bool using_sparse;
struct vk_texcompress_astc_state *texcompress_astc;
};
static inline uint32_t
@ -5356,6 +5358,15 @@ struct anv_memcpy_state {
VkResult anv_device_init_internal_kernels(struct anv_device *device);
void anv_device_finish_internal_kernels(struct anv_device *device);
VkResult anv_device_init_astc_emu(struct anv_device *device);
void anv_device_finish_astc_emu(struct anv_device *device);
void anv_astc_emu_decompress(struct anv_cmd_buffer *cmd_buffer,
struct anv_image *image,
VkImageLayout layout,
const VkImageSubresourceLayers *subresource,
VkOffset3D block_offset,
VkExtent3D block_extent);
/* This structure is used in 2 scenarios :
*
* - copy utrace timestamps from command buffer so that command buffer can

View file

@ -183,6 +183,7 @@ libanv_files = files(
'anv_private.h',
'anv_queue.c',
'anv_sparse.c',
'anv_astc_emu.c',
'anv_util.c',
'anv_utrace.c',
'anv_va.c',