radeonsi: implement a non-scaled compute blit+resolve and use it on gfx11
This was written from scratch. Only a few pieces were taken from Indrajit's code. This is also much simpler, and hopefully easier to review. For example, out-of-bounds coordinates are handled trivially. The new blit test proves that this is identical to u_blitter except for a few precision differences (NaNs, sRGB) where the compute blit should be more precise. This is only enabled on gfx11 because it's slower than the gfx blit on gfx10. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17782>
This commit is contained in:
parent
098b9a8d02
commit
49237c0eb3
6 changed files with 399 additions and 2 deletions
|
|
@ -1219,6 +1219,9 @@ static void si_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
|
|||
return;
|
||||
}
|
||||
|
||||
if (si_compute_blit(sctx, info))
|
||||
return;
|
||||
|
||||
si_gfx_blit(ctx, info);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,17 @@ static bool si_can_use_compute_blit(struct si_context *sctx, enum pipe_format fo
|
|||
if (format == PIPE_FORMAT_A8R8_UNORM && is_store)
|
||||
return false;
|
||||
|
||||
if (num_samples > 1)
|
||||
/* MSAA image stores are broken. AMD_DEBUG=nofmask fixes them, implying that the FMASK
|
||||
* expand pass doesn't work, but let's use the gfx blit, which should be faster because
|
||||
* it doesn't require expanding the FMASK.
|
||||
*
|
||||
* TODO: Broken MSAA stores can cause app issues, though this issue might only affect
|
||||
* internal blits, not sure.
|
||||
*
|
||||
* EQAA image stores are also unimplemented, which should be rejected here after MSAA
|
||||
* image stores are fixed.
|
||||
*/
|
||||
if (num_samples > 1 && is_store)
|
||||
return false;
|
||||
|
||||
if (util_format_is_depth_or_stencil(format))
|
||||
|
|
@ -1014,3 +1024,103 @@ void si_compute_clear_render_target(struct pipe_context *ctx, struct pipe_surfac
|
|||
|
||||
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, true, &saved_cb);
|
||||
}
|
||||
|
||||
bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info)
|
||||
{
|
||||
/* Compute blits require D16 right now (see the ISA).
|
||||
*
|
||||
* Testing on Navi21 showed that the compute blit is slightly slower than the gfx blit.
|
||||
* The compute blit is even slower with DCC stores. VP13 CATIA_plane_pencil is a good test
|
||||
* for that because it's mostly just blits.
|
||||
*
|
||||
* TODO: benchmark the performance on gfx11
|
||||
*/
|
||||
if (sctx->gfx_level < GFX11)
|
||||
return false;
|
||||
|
||||
if (!si_can_use_compute_blit(sctx, info->dst.format, info->dst.resource->nr_samples, true,
|
||||
vi_dcc_enabled((struct si_texture*)info->dst.resource,
|
||||
info->dst.level)) ||
|
||||
!si_can_use_compute_blit(sctx, info->src.format, info->src.resource->nr_samples, false,
|
||||
vi_dcc_enabled((struct si_texture*)info->src.resource,
|
||||
info->src.level)))
|
||||
return false;
|
||||
|
||||
if (info->alpha_blend ||
|
||||
info->num_window_rectangles ||
|
||||
info->scissor_enable ||
|
||||
/* No scaling. */
|
||||
info->dst.box.width != abs(info->src.box.width) ||
|
||||
info->dst.box.height != abs(info->src.box.height) ||
|
||||
info->dst.box.depth != abs(info->src.box.depth))
|
||||
return false;
|
||||
|
||||
assert(info->src.box.depth >= 0);
|
||||
|
||||
/* Shader images. */
|
||||
struct pipe_image_view image[2];
|
||||
image[0].resource = info->src.resource;
|
||||
image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ;
|
||||
image[0].format = info->src.format;
|
||||
image[0].u.tex.level = info->src.level;
|
||||
image[0].u.tex.first_layer = 0;
|
||||
image[0].u.tex.last_layer = util_max_layer(info->src.resource, info->src.level);
|
||||
|
||||
image[1].resource = info->dst.resource;
|
||||
image[1].shader_access = image[1].access = PIPE_IMAGE_ACCESS_WRITE;
|
||||
image[1].format = info->dst.format;
|
||||
image[1].u.tex.level = info->dst.level;
|
||||
image[1].u.tex.first_layer = 0;
|
||||
image[1].u.tex.last_layer = util_max_layer(info->dst.resource, info->dst.level);
|
||||
|
||||
/* Get the shader key. */
|
||||
const struct util_format_description *dst_desc = util_format_description(info->dst.format);
|
||||
unsigned i = util_format_get_first_non_void_channel(info->dst.format);
|
||||
union si_compute_blit_shader_key options;
|
||||
options.key = 0;
|
||||
|
||||
options.always_true = true;
|
||||
options.src_is_1d = info->src.resource->target == PIPE_TEXTURE_1D ||
|
||||
info->src.resource->target == PIPE_TEXTURE_1D_ARRAY;
|
||||
options.dst_is_1d = info->dst.resource->target == PIPE_TEXTURE_1D ||
|
||||
info->dst.resource->target == PIPE_TEXTURE_1D_ARRAY;
|
||||
options.src_is_msaa = info->src.resource->nr_samples > 1;
|
||||
options.dst_is_msaa = info->dst.resource->nr_samples > 1;
|
||||
/* Resolving integer formats only copies sample 0. log2_samples is then unused. */
|
||||
options.sample0_only = options.src_is_msaa && !options.dst_is_msaa &&
|
||||
util_format_is_pure_integer(info->src.format);
|
||||
unsigned num_samples = MAX2(info->src.resource->nr_samples, info->dst.resource->nr_samples);
|
||||
options.log2_samples = options.sample0_only ? 0 : util_logbase2(num_samples);
|
||||
options.flip_x = info->src.box.width < 0;
|
||||
options.flip_y = info->src.box.height < 0;
|
||||
options.sint_to_uint = util_format_is_pure_sint(info->src.format) &&
|
||||
util_format_is_pure_uint(info->dst.format);
|
||||
options.uint_to_sint = util_format_is_pure_uint(info->src.format) &&
|
||||
util_format_is_pure_sint(info->dst.format);
|
||||
options.dst_is_srgb = util_format_is_srgb(info->dst.format);
|
||||
options.fp16_rtz = !util_format_is_pure_integer(info->dst.format) &&
|
||||
(dst_desc->channel[i].size <= 10 ||
|
||||
(dst_desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT &&
|
||||
dst_desc->channel[i].size <= 16));
|
||||
|
||||
struct hash_entry *entry = _mesa_hash_table_search(sctx->cs_blit_shaders,
|
||||
(void*)(uintptr_t)options.key);
|
||||
void *shader = entry ? entry->data : NULL;
|
||||
if (!shader) {
|
||||
shader = si_create_blit_cs(sctx, &options);
|
||||
_mesa_hash_table_insert(sctx->cs_blit_shaders,
|
||||
(void*)(uintptr_t)options.key, shader);
|
||||
}
|
||||
|
||||
sctx->cs_user_data[0] = (info->src.box.x & 0xffff) | ((info->dst.box.x & 0xffff) << 16);
|
||||
sctx->cs_user_data[1] = (info->src.box.y & 0xffff) | ((info->dst.box.y & 0xffff) << 16);
|
||||
sctx->cs_user_data[2] = (info->src.box.z & 0xffff) | ((info->dst.box.z & 0xffff) << 16);
|
||||
|
||||
struct pipe_grid_info grid = {0};
|
||||
set_work_size(&grid, 8, 8, 1, info->dst.box.width, info->dst.box.height, info->dst.box.depth);
|
||||
|
||||
si_launch_grid_internal_images(sctx, image, 2, &grid, shader,
|
||||
SI_OP_SYNC_BEFORE_AFTER |
|
||||
(info->render_condition_enable ? SI_OP_CS_RENDER_COND_ENABLE : 0));
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -356,6 +356,13 @@ static void si_destroy_context(struct pipe_context *context)
|
|||
if (!(sctx->context_flags & SI_CONTEXT_FLAG_AUX))
|
||||
p_atomic_dec(&context->screen->num_contexts);
|
||||
|
||||
if (sctx->cs_blit_shaders) {
|
||||
hash_table_foreach(sctx->cs_blit_shaders, entry) {
|
||||
context->delete_compute_state(context, entry->data);
|
||||
}
|
||||
_mesa_hash_table_destroy(sctx->cs_blit_shaders, NULL);
|
||||
}
|
||||
|
||||
FREE(sctx);
|
||||
}
|
||||
|
||||
|
|
@ -827,6 +834,11 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
|
|||
}
|
||||
|
||||
sctx->initial_gfx_cs_size = sctx->gfx_cs.current.cdw;
|
||||
|
||||
sctx->cs_blit_shaders = _mesa_hash_table_create_u32_keys(NULL);
|
||||
if (!sctx->cs_blit_shaders)
|
||||
goto fail;
|
||||
|
||||
return &sctx->b;
|
||||
fail:
|
||||
fprintf(stderr, "radeonsi: Failed to create a context.\n");
|
||||
|
|
|
|||
|
|
@ -984,6 +984,7 @@ struct si_context {
|
|||
void *cs_clear_12bytes_buffer;
|
||||
void *cs_dcc_retile[32];
|
||||
void *cs_fmask_expand[3][2]; /* [log2(samples)-1][is_array] */
|
||||
struct hash_table *cs_blit_shaders;
|
||||
struct si_screen *screen;
|
||||
struct util_debug_callback debug;
|
||||
struct ac_llvm_compiler compiler; /* only non-threaded compilation */
|
||||
|
|
@ -1436,6 +1437,7 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex);
|
|||
void gfx9_clear_dcc_msaa(struct si_context *sctx, struct pipe_resource *res, uint32_t clear_value,
|
||||
unsigned flags, enum si_coherency coher);
|
||||
void si_compute_expand_fmask(struct pipe_context *ctx, struct pipe_resource *tex);
|
||||
bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info);
|
||||
void si_init_compute_blit_functions(struct si_context *sctx);
|
||||
|
||||
/* si_cp_dma.c */
|
||||
|
|
@ -1547,6 +1549,31 @@ void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf)
|
|||
void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex);
|
||||
void *si_create_passthrough_tcs(struct si_context *sctx);
|
||||
|
||||
union si_compute_blit_shader_key {
|
||||
struct {
|
||||
/* The key saved in _mesa_hash_table_create_u32_keys() can't be 0. */
|
||||
bool always_true:1;
|
||||
/* Declaration modifiers. */
|
||||
bool src_is_1d:1;
|
||||
bool dst_is_1d:1;
|
||||
bool src_is_msaa:1;
|
||||
bool dst_is_msaa:1;
|
||||
uint8_t log2_samples:4;
|
||||
bool sample0_only:1; /* src is MSAA, dst is not MSAA, log2_samples is ignored */
|
||||
/* Source coordinate modifiers. */
|
||||
bool flip_x:1;
|
||||
bool flip_y:1;
|
||||
/* Output modifiers. */
|
||||
bool sint_to_uint:1;
|
||||
bool uint_to_sint:1;
|
||||
bool dst_is_srgb:1;
|
||||
bool fp16_rtz:1; /* only for equality with pixel shaders, not necessary otherwise */
|
||||
};
|
||||
uint32_t key;
|
||||
};
|
||||
|
||||
void *si_create_blit_cs(struct si_context *sctx, const union si_compute_blit_shader_key *options);
|
||||
|
||||
/* si_shaderlib_tgsi.c */
|
||||
void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type,
|
||||
unsigned num_layers);
|
||||
|
|
|
|||
|
|
@ -71,6 +71,12 @@ static void unpack_2x16(nir_builder *b, nir_ssa_def *src, nir_ssa_def **x, nir_s
|
|||
*y = nir_ushr(b, src, nir_imm_int(b, 16));
|
||||
}
|
||||
|
||||
static void unpack_2x16_signed(nir_builder *b, nir_ssa_def *src, nir_ssa_def **x, nir_ssa_def **y)
|
||||
{
|
||||
*x = nir_i2i32(b, nir_u2u16(b, src));
|
||||
*y = nir_ishr(b, src, nir_imm_int(b, 16));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
deref_ssa(nir_builder *b, nir_variable *var)
|
||||
{
|
||||
|
|
@ -347,3 +353,242 @@ void *si_create_passthrough_tcs(struct si_context *sctx)
|
|||
|
||||
return create_shader_state(sctx, b.shader);
|
||||
}
|
||||
|
||||
static nir_ssa_def *convert_linear_to_srgb(nir_builder *b, nir_ssa_def *input)
|
||||
{
|
||||
/* There are small precision differences compared to CB, so the gfx blit will return slightly
|
||||
* different results.
|
||||
*/
|
||||
nir_ssa_def *cmp[3];
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
cmp[i] = nir_flt(b, nir_channel(b, input, i), nir_imm_float(b, 0.0031308));
|
||||
|
||||
nir_ssa_def *ltvals[3];
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
ltvals[i] = nir_fmul(b, nir_channel(b, input, i), nir_imm_float(b, 12.92));
|
||||
|
||||
nir_ssa_def *gtvals[3];
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
gtvals[i] = nir_fpow(b, nir_channel(b, input, i), nir_imm_float(b, 1.0/2.4));
|
||||
gtvals[i] = nir_fmul(b, gtvals[i], nir_imm_float(b, 1.055));
|
||||
gtvals[i] = nir_fsub(b, gtvals[i], nir_imm_float(b, 0.055));
|
||||
}
|
||||
|
||||
nir_ssa_def *comp[4];
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
|
||||
comp[3] = nir_channel(b, input, 3);
|
||||
|
||||
return nir_vec(b, comp, 4);
|
||||
}
|
||||
|
||||
static nir_ssa_def *image_resolve_msaa(nir_builder *b, nir_variable *img, unsigned num_samples,
|
||||
nir_ssa_def *coord, enum amd_gfx_level gfx_level)
|
||||
{
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_ssa_def *result = NULL;
|
||||
nir_variable *var = NULL;
|
||||
|
||||
/* Gfx11 doesn't support samples_identical, so we can't use it. */
|
||||
if (gfx_level < GFX11) {
|
||||
/* We need a local variable to get the result out of conditional branches in SSA. */
|
||||
var = nir_local_variable_create(b->impl, glsl_vec4_type(), NULL);
|
||||
|
||||
/* If all samples are identical, load only sample 0. */
|
||||
nir_push_if(b, nir_image_deref_samples_identical(b, 1, deref_ssa(b, img), coord));
|
||||
result = nir_image_deref_load(b, 4, 32, deref_ssa(b, img), coord, zero, zero);
|
||||
nir_store_var(b, var, result, 0xf);
|
||||
|
||||
nir_push_else(b, NULL);
|
||||
}
|
||||
|
||||
/* Average all samples. (the only options on gfx11) */
|
||||
result = NULL;
|
||||
for (unsigned i = 0; i < num_samples; i++) {
|
||||
nir_ssa_def *sample = nir_image_deref_load(b, 4, 32, deref_ssa(b, img),
|
||||
coord, nir_imm_int(b, i), zero);
|
||||
result = result ? nir_fadd(b, result, sample) : sample;
|
||||
}
|
||||
result = nir_fmul_imm(b, result, 1.0 / num_samples); /* average the sum */
|
||||
|
||||
if (gfx_level < GFX11) {
|
||||
/* Exit the conditional branch and get the result out of the branch. */
|
||||
nir_store_var(b, var, result, 0xf);
|
||||
nir_pop_if(b, NULL);
|
||||
result = nir_load_var(b, var);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static nir_ssa_def *apply_blit_output_modifiers(nir_builder *b, nir_ssa_def *color,
|
||||
const union si_compute_blit_shader_key *options)
|
||||
{
|
||||
if (options->sint_to_uint)
|
||||
color = nir_imax(b, color, nir_imm_int(b, 0));
|
||||
|
||||
if (options->uint_to_sint)
|
||||
color = nir_umin(b, color, nir_imm_int(b, INT32_MAX));
|
||||
|
||||
if (options->dst_is_srgb)
|
||||
color = convert_linear_to_srgb(b, color);
|
||||
|
||||
/* Convert to FP16 with rtz to match the pixel shader. Not necessary, but it helps verify
|
||||
* the behavior of the whole shader by comparing it to the gfx blit.
|
||||
*/
|
||||
if (options->fp16_rtz)
|
||||
color = nir_f2f16_rtz(b, color);
|
||||
|
||||
return color;
|
||||
}
|
||||
|
||||
/* The compute blit shader.
|
||||
*
|
||||
* Differences compared to u_blitter (the gfx blit):
|
||||
* - u_blitter doesn't preserve NaNs, but the compute blit does
|
||||
* - u_blitter has lower linear->SRGB precision because the CB block doesn't
|
||||
* use FP32, but the compute blit does.
|
||||
*
|
||||
* Other than that, non-scaled blits are identical to u_blitter.
|
||||
*
|
||||
* Implementation details:
|
||||
* - Out-of-bounds dst coordinates are not clamped at all. The hw drops
|
||||
* out-of-bounds stores for us.
|
||||
* - Out-of-bounds src coordinates are clamped by emulating CLAMP_TO_EDGE using
|
||||
* the image_size NIR intrinsic.
|
||||
* - X/Y flipping just does this in the shader: -threadIDs - 1
|
||||
* - MSAA copies are implemented but disabled because MSAA image stores don't
|
||||
* work.
|
||||
*/
|
||||
void *si_create_blit_cs(struct si_context *sctx, const union si_compute_blit_shader_key *options)
|
||||
{
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
|
||||
|
||||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, nir_options,
|
||||
"blit_non_scaled_cs");
|
||||
b.shader->info.num_images = 2;
|
||||
if (options->src_is_msaa)
|
||||
BITSET_SET(b.shader->info.msaa_images, 0);
|
||||
if (options->dst_is_msaa)
|
||||
BITSET_SET(b.shader->info.msaa_images, 1);
|
||||
b.shader->info.workgroup_size[0] = 8;
|
||||
b.shader->info.workgroup_size[1] = 8;
|
||||
b.shader->info.workgroup_size[2] = 1;
|
||||
b.shader->info.cs.user_data_components_amd = 3;
|
||||
|
||||
const struct glsl_type *img_type[2] = {
|
||||
glsl_image_type(options->src_is_1d ? GLSL_SAMPLER_DIM_1D :
|
||||
options->src_is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D,
|
||||
/*is_array*/ true, GLSL_TYPE_FLOAT),
|
||||
glsl_image_type(options->dst_is_1d ? GLSL_SAMPLER_DIM_1D :
|
||||
options->dst_is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D,
|
||||
/*is_array*/ true, GLSL_TYPE_FLOAT),
|
||||
};
|
||||
|
||||
nir_variable *img_src = nir_variable_create(b.shader, nir_var_uniform, img_type[0], "img0");
|
||||
img_src->data.binding = 0;
|
||||
|
||||
nir_variable *img_dst = nir_variable_create(b.shader, nir_var_uniform, img_type[1], "img1");
|
||||
img_dst->data.binding = 1;
|
||||
|
||||
nir_ssa_def *zero = nir_imm_int(&b, 0);
|
||||
|
||||
/* Instructions. */
|
||||
/* Let's work with 0-based src and dst coordinates (thread IDs) first. */
|
||||
nir_ssa_def *dst_xyz = get_global_ids(&b, 3);
|
||||
nir_ssa_def *src_xyz = dst_xyz;
|
||||
|
||||
/* Flip src coordinates. */
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
if (i ? options->flip_y : options->flip_x) {
|
||||
/* x goes from 0 to (dim - 1).
|
||||
* The flipped blit should load from -dim to -1.
|
||||
* Therefore do: x = -x - 1;
|
||||
*/
|
||||
nir_ssa_def *comp = nir_channel(&b, src_xyz, i);
|
||||
comp = nir_iadd_imm(&b, nir_ineg(&b, comp), -1);
|
||||
src_xyz = nir_vector_insert_imm(&b, src_xyz, comp, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Add box.xyz. */
|
||||
nir_ssa_def *coord_src = NULL, *coord_dst = NULL;
|
||||
unpack_2x16_signed(&b, nir_channels(&b, nir_load_user_data_amd(&b), 0x7),
|
||||
&coord_src, &coord_dst);
|
||||
coord_dst = nir_iadd(&b, coord_dst, dst_xyz);
|
||||
coord_src = nir_iadd(&b, coord_src, src_xyz);
|
||||
|
||||
/* Clamp to edge for src, only X and Y because Z can't be out of bounds. */
|
||||
unsigned src_clamp_channels = options->src_is_1d ? 0x1 : 0x3;
|
||||
nir_ssa_def *dim = nir_image_deref_size(&b, 4, 32, deref_ssa(&b, img_src), zero);
|
||||
dim = nir_channels(&b, dim, src_clamp_channels);
|
||||
|
||||
nir_ssa_def *coord_src_clamped = nir_channels(&b, coord_src, src_clamp_channels);
|
||||
coord_src_clamped = nir_imax(&b, coord_src_clamped, nir_imm_int(&b, 0));
|
||||
coord_src_clamped = nir_imin(&b, coord_src_clamped, nir_iadd_imm(&b, dim, -1));
|
||||
|
||||
for (unsigned i = 0; i < util_bitcount(src_clamp_channels); i++)
|
||||
coord_src = nir_vector_insert_imm(&b, coord_src, nir_channel(&b, coord_src_clamped, i), i);
|
||||
|
||||
/* Swizzle coordinates for 1D_ARRAY. */
|
||||
static unsigned swizzle_xz[] = {0, 2, 0, 0};
|
||||
|
||||
if (options->src_is_1d)
|
||||
coord_src = nir_swizzle(&b, coord_src, swizzle_xz, 4);
|
||||
if (options->dst_is_1d)
|
||||
coord_dst = nir_swizzle(&b, coord_dst, swizzle_xz, 4);
|
||||
|
||||
/* Coordinates must have 4 channels in NIR. */
|
||||
coord_src = nir_pad_vector(&b, coord_src, 4);
|
||||
coord_dst = nir_pad_vector(&b, coord_dst, 4);
|
||||
|
||||
/* TODO: out-of-bounds image stores have no effect, but we could jump over them for better perf */
|
||||
|
||||
/* Execute the image loads and stores. */
|
||||
unsigned num_samples = 1 << options->log2_samples;
|
||||
nir_ssa_def *color;
|
||||
|
||||
if (options->src_is_msaa && !options->dst_is_msaa && !options->sample0_only) {
|
||||
/* MSAA resolving (downsampling). */
|
||||
assert(num_samples > 1);
|
||||
color = image_resolve_msaa(&b, img_src, num_samples, coord_src, sctx->gfx_level);
|
||||
color = apply_blit_output_modifiers(&b, color, options);
|
||||
nir_image_deref_store(&b, deref_ssa(&b, img_dst), coord_dst, zero, color, zero);
|
||||
|
||||
} else if (options->src_is_msaa && options->dst_is_msaa) {
|
||||
/* MSAA copy. */
|
||||
nir_ssa_def *color[16];
|
||||
assert(num_samples > 1);
|
||||
/* Group loads together and then stores. */
|
||||
for (unsigned i = 0; i < num_samples; i++) {
|
||||
color[i] = nir_image_deref_load(&b, 4, 32, deref_ssa(&b, img_src), coord_src,
|
||||
nir_imm_int(&b, i), zero);
|
||||
}
|
||||
for (unsigned i = 0; i < num_samples; i++)
|
||||
color[i] = apply_blit_output_modifiers(&b, color[i], options);
|
||||
for (unsigned i = 0; i < num_samples; i++) {
|
||||
nir_image_deref_store(&b, deref_ssa(&b, img_dst), coord_dst,
|
||||
nir_imm_int(&b, i), color[i], zero);
|
||||
}
|
||||
} else if (!options->src_is_msaa && options->dst_is_msaa) {
|
||||
/* MSAA upsampling. */
|
||||
assert(num_samples > 1);
|
||||
color = nir_image_deref_load(&b, 4, 32, deref_ssa(&b, img_src), coord_src, zero, zero);
|
||||
color = apply_blit_output_modifiers(&b, color, options);
|
||||
for (unsigned i = 0; i < num_samples; i++) {
|
||||
nir_image_deref_store(&b, deref_ssa(&b, img_dst), coord_dst,
|
||||
nir_imm_int(&b, i), color, zero);
|
||||
}
|
||||
} else {
|
||||
/* Non-MSAA copy or read sample 0 only. */
|
||||
/* src2 = sample_index (zero), src3 = lod (zero) */
|
||||
assert(num_samples == 1);
|
||||
color = nir_image_deref_load(&b, 4, 32, deref_ssa(&b, img_src), coord_src, zero, zero);
|
||||
color = apply_blit_output_modifiers(&b, color, options);
|
||||
nir_image_deref_store(&b, deref_ssa(&b, img_dst), coord_dst, zero, color, zero);
|
||||
}
|
||||
|
||||
return create_shader_state(sctx, b.shader);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -952,7 +952,7 @@ void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
|
|||
if (only_cb_resolve)
|
||||
success = si_msaa_resolve_blit_via_CB(ctx, &info);
|
||||
else
|
||||
success = false;
|
||||
success = si_compute_blit(sctx, &info);
|
||||
|
||||
if (success) {
|
||||
printf(" %-7s", only_cb_resolve ? "resolve" : "comp");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue