asahi: add compute blitter
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26963>
This commit is contained in:
parent
1c60ced65b
commit
9063da53d8
3 changed files with 371 additions and 24 deletions
|
|
@ -1,15 +1,314 @@
|
|||
/*
|
||||
* Copyright 2021 Alyssa Rosenzweig
|
||||
* Copyright 2020-2021 Collabora, Ltd.
|
||||
* Copyright 2019 Sonny Jiang <sonnyj608@gmail.com>
|
||||
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||
* Copyright 2014 Broadcom
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include "asahi/compiler/agx_compile.h"
|
||||
#include "asahi/layout/layout.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "compiler/nir/nir_format_convert.h"
|
||||
#include "gallium/auxiliary/util/u_blitter.h"
|
||||
#include "gallium/auxiliary/util/u_dump.h"
|
||||
#include "nir/pipe_nir.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/format/u_formats.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/u_sampler.h"
|
||||
#include "util/u_surface.h"
|
||||
#include "agx_formats.h"
|
||||
#include "agx_state.h"
|
||||
#include "shader_enums.h"
|
||||
|
||||
#define BLIT_WG_SIZE 32
|
||||
|
||||
static void *
|
||||
asahi_blit_compute_shader(struct pipe_context *ctx, enum asahi_blit_clamp clamp,
|
||||
bool array)
|
||||
{
|
||||
const nir_shader_compiler_options *options =
|
||||
ctx->screen->get_compiler_options(ctx->screen, PIPE_SHADER_IR_NIR,
|
||||
PIPE_SHADER_COMPUTE);
|
||||
|
||||
nir_builder b_ =
|
||||
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "blit_cs");
|
||||
nir_builder *b = &b_;
|
||||
b->shader->info.workgroup_size[0] = BLIT_WG_SIZE;
|
||||
b->shader->info.workgroup_size[1] = BLIT_WG_SIZE;
|
||||
b->shader->info.num_ubos = 1;
|
||||
|
||||
BITSET_SET(b->shader->info.textures_used, 0);
|
||||
BITSET_SET(b->shader->info.samplers_used, 0);
|
||||
BITSET_SET(b->shader->info.images_used, 0);
|
||||
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
nir_def *params[3];
|
||||
b->shader->num_uniforms = ARRAY_SIZE(params);
|
||||
for (unsigned i = 0; i < b->shader->num_uniforms; ++i) {
|
||||
params[i] = nir_load_ubo(b, 2, 32, zero, nir_imm_int(b, i * 8),
|
||||
.align_mul = 4, .range = ~0);
|
||||
}
|
||||
|
||||
nir_def *ids =
|
||||
nir_trim_vector(b, nir_load_global_invocation_id(b, 32), array ? 3 : 2);
|
||||
|
||||
nir_def *tex_pos = nir_u2f32(b, ids);
|
||||
nir_def *pos2 =
|
||||
nir_ffma(b, nir_trim_vector(b, tex_pos, 2), params[1], params[0]);
|
||||
if (array) {
|
||||
tex_pos = nir_vector_insert_imm(b, nir_pad_vector(b, pos2, 3),
|
||||
nir_channel(b, tex_pos, 2), 2);
|
||||
} else {
|
||||
tex_pos = pos2;
|
||||
}
|
||||
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
|
||||
tex->dest_type = nir_type_uint32; /* irrelevant */
|
||||
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
|
||||
tex->is_array = array;
|
||||
tex->op = nir_texop_tex;
|
||||
tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, tex_pos);
|
||||
tex->backend_flags = AGX_TEXTURE_FLAG_NO_CLAMP;
|
||||
tex->coord_components = array ? 3 : 2;
|
||||
tex->texture_index = 0;
|
||||
tex->sampler_index = 0;
|
||||
nir_def_init(&tex->instr, &tex->def, 4, 32);
|
||||
nir_builder_instr_insert(b, &tex->instr);
|
||||
nir_def *color = &tex->def;
|
||||
|
||||
if (clamp == ASAHI_BLIT_CLAMP_SINT_TO_UINT)
|
||||
color = nir_imax(b, color, nir_imm_int(b, 0));
|
||||
else if (clamp == ASAHI_BLIT_CLAMP_UINT_TO_SINT)
|
||||
color = nir_umin(b, color, nir_imm_int(b, INT32_MAX));
|
||||
|
||||
nir_def *image_pos =
|
||||
nir_iadd(b, ids, nir_pad_vector_imm_int(b, params[2], 0, array ? 3 : 2));
|
||||
|
||||
nir_image_store(b, nir_imm_int(b, 0), nir_pad_vec4(b, image_pos), zero,
|
||||
color, zero, .image_dim = GLSL_SAMPLER_DIM_2D,
|
||||
.access = ACCESS_NON_READABLE, .image_array = array);
|
||||
|
||||
return pipe_shader_from_nir(ctx, b->shader);
|
||||
}
|
||||
|
||||
static bool
|
||||
asahi_compute_blit_supported(const struct pipe_blit_info *info)
|
||||
{
|
||||
return (info->src.box.depth == info->dst.box.depth) && !info->alpha_blend &&
|
||||
!info->num_window_rectangles && !info->sample0_only &&
|
||||
!info->scissor_enable && !info->window_rectangle_include &&
|
||||
info->src.resource->nr_samples <= 1 &&
|
||||
info->dst.resource->nr_samples <= 1 &&
|
||||
!util_format_is_depth_and_stencil(info->src.format) &&
|
||||
!util_format_is_depth_and_stencil(info->dst.format) &&
|
||||
info->src.box.depth >= 0 &&
|
||||
info->mask == util_format_get_mask(info->src.format) &&
|
||||
/* XXX: texsubimage pbo failing otherwise, needs investigation */
|
||||
info->dst.format != PIPE_FORMAT_B5G6R5_UNORM &&
|
||||
info->dst.format != PIPE_FORMAT_B5G5R5A1_UNORM &&
|
||||
info->dst.format != PIPE_FORMAT_B5G5R5X1_UNORM &&
|
||||
info->dst.format != PIPE_FORMAT_R5G6B5_UNORM &&
|
||||
info->dst.format != PIPE_FORMAT_R5G5B5A1_UNORM &&
|
||||
info->dst.format != PIPE_FORMAT_R5G5B5X1_UNORM;
|
||||
}
|
||||
|
||||
static void
|
||||
asahi_compute_save(struct agx_context *ctx)
|
||||
{
|
||||
struct asahi_blitter *blitter = &ctx->compute_blitter;
|
||||
struct agx_stage *stage = &ctx->stage[PIPE_SHADER_COMPUTE];
|
||||
|
||||
assert(!blitter->active && "recursion detected, driver bug");
|
||||
|
||||
pipe_resource_reference(&blitter->saved_cb.buffer, stage->cb[0].buffer);
|
||||
memcpy(&blitter->saved_cb, &stage->cb[0],
|
||||
sizeof(struct pipe_constant_buffer));
|
||||
|
||||
blitter->has_saved_image = stage->image_mask & BITFIELD_BIT(0);
|
||||
if (blitter->has_saved_image) {
|
||||
pipe_resource_reference(&blitter->saved_image.resource,
|
||||
stage->images[0].resource);
|
||||
memcpy(&blitter->saved_image, &stage->images[0],
|
||||
sizeof(struct pipe_image_view));
|
||||
}
|
||||
|
||||
pipe_sampler_view_reference(&blitter->saved_sampler_view,
|
||||
&stage->textures[0]->base);
|
||||
|
||||
blitter->saved_num_sampler_states = stage->sampler_count;
|
||||
memcpy(blitter->saved_sampler_states, stage->samplers,
|
||||
stage->sampler_count * sizeof(void *));
|
||||
|
||||
blitter->saved_cs = stage->shader;
|
||||
blitter->active = true;
|
||||
}
|
||||
|
||||
static void
|
||||
asahi_compute_restore(struct agx_context *ctx)
|
||||
{
|
||||
struct pipe_context *pctx = &ctx->base;
|
||||
struct asahi_blitter *blitter = &ctx->compute_blitter;
|
||||
|
||||
if (blitter->has_saved_image) {
|
||||
pctx->set_shader_images(pctx, PIPE_SHADER_COMPUTE, 0, 1, 0,
|
||||
&blitter->saved_image);
|
||||
pipe_resource_reference(&blitter->saved_image.resource, NULL);
|
||||
}
|
||||
|
||||
/* take_ownership=true so do not unreference */
|
||||
pctx->set_constant_buffer(pctx, PIPE_SHADER_COMPUTE, 0, true,
|
||||
&blitter->saved_cb);
|
||||
blitter->saved_cb.buffer = NULL;
|
||||
|
||||
if (blitter->saved_sampler_view) {
|
||||
pctx->set_sampler_views(pctx, PIPE_SHADER_COMPUTE, 0, 1, 0, true,
|
||||
&blitter->saved_sampler_view);
|
||||
|
||||
blitter->saved_sampler_view = NULL;
|
||||
}
|
||||
|
||||
if (blitter->saved_num_sampler_states) {
|
||||
pctx->bind_sampler_states(pctx, PIPE_SHADER_COMPUTE, 0,
|
||||
blitter->saved_num_sampler_states,
|
||||
blitter->saved_sampler_states);
|
||||
}
|
||||
|
||||
pctx->bind_compute_state(pctx, blitter->saved_cs);
|
||||
blitter->saved_cs = NULL;
|
||||
blitter->active = false;
|
||||
}
|
||||
|
||||
static void
|
||||
asahi_compute_blit(struct pipe_context *ctx, const struct pipe_blit_info *info,
|
||||
struct asahi_blitter *blitter)
|
||||
{
|
||||
if (info->src.box.width == 0 || info->src.box.height == 0 ||
|
||||
info->dst.box.width == 0 || info->dst.box.height == 0)
|
||||
return;
|
||||
|
||||
assert(asahi_compute_blit_supported(info));
|
||||
asahi_compute_save(agx_context(ctx));
|
||||
|
||||
unsigned depth = info->dst.box.depth;
|
||||
bool array = depth > 1;
|
||||
|
||||
struct pipe_resource *src = info->src.resource;
|
||||
struct pipe_resource *dst = info->dst.resource;
|
||||
struct pipe_sampler_view src_templ = {0}, *src_view;
|
||||
unsigned width = info->dst.box.width;
|
||||
unsigned height = info->dst.box.height;
|
||||
|
||||
float src_width = (float)u_minify(src->width0, info->src.level);
|
||||
float src_height = (float)u_minify(src->height0, info->src.level);
|
||||
|
||||
float x_scale = (info->src.box.width / (float)width) / src_width;
|
||||
float y_scale = (info->src.box.height / (float)height) / src_height;
|
||||
|
||||
unsigned data[] = {
|
||||
fui(0.5f * x_scale + (float)info->src.box.x / src_width),
|
||||
fui(0.5f * y_scale + (float)info->src.box.y / src_height),
|
||||
fui(x_scale),
|
||||
fui(y_scale),
|
||||
info->dst.box.x,
|
||||
info->dst.box.y,
|
||||
};
|
||||
|
||||
struct pipe_constant_buffer cb = {
|
||||
.buffer_size = sizeof(data),
|
||||
.user_buffer = data,
|
||||
};
|
||||
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, false, &cb);
|
||||
|
||||
struct pipe_image_view image = {
|
||||
.resource = dst,
|
||||
.access = PIPE_IMAGE_ACCESS_WRITE | PIPE_IMAGE_ACCESS_DRIVER_INTERNAL,
|
||||
.shader_access = PIPE_IMAGE_ACCESS_WRITE,
|
||||
.format = info->dst.format,
|
||||
.u.tex.level = info->dst.level,
|
||||
.u.tex.first_layer = info->dst.box.z,
|
||||
.u.tex.last_layer = info->dst.box.z + depth - 1,
|
||||
.u.tex.single_layer_view = !array,
|
||||
};
|
||||
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, 0, &image);
|
||||
|
||||
if (!blitter->sampler[info->filter]) {
|
||||
struct pipe_sampler_state sampler_state = {
|
||||
.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE,
|
||||
.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE,
|
||||
.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE,
|
||||
.min_img_filter = info->filter,
|
||||
.mag_img_filter = info->filter,
|
||||
.compare_func = PIPE_FUNC_ALWAYS,
|
||||
.seamless_cube_map = true,
|
||||
.max_lod = 31.0f,
|
||||
};
|
||||
|
||||
blitter->sampler[info->filter] =
|
||||
ctx->create_sampler_state(ctx, &sampler_state);
|
||||
}
|
||||
|
||||
ctx->bind_sampler_states(ctx, PIPE_SHADER_COMPUTE, 0, 1,
|
||||
&blitter->sampler[info->filter]);
|
||||
|
||||
/* Initialize the sampler view. */
|
||||
u_sampler_view_default_template(&src_templ, src, src->format);
|
||||
src_templ.format = info->src.format;
|
||||
src_templ.target = array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
|
||||
src_templ.swizzle_r = PIPE_SWIZZLE_X;
|
||||
src_templ.swizzle_g = PIPE_SWIZZLE_Y;
|
||||
src_templ.swizzle_b = PIPE_SWIZZLE_Z;
|
||||
src_templ.swizzle_a = PIPE_SWIZZLE_W;
|
||||
src_templ.u.tex.first_layer = info->src.box.z;
|
||||
src_templ.u.tex.last_layer = info->src.box.z + depth - 1;
|
||||
src_templ.u.tex.first_level = info->src.level;
|
||||
src_templ.u.tex.last_level = info->src.level;
|
||||
src_view = ctx->create_sampler_view(ctx, src, &src_templ);
|
||||
ctx->set_sampler_views(ctx, PIPE_SHADER_COMPUTE, 0, 1, 0, true, &src_view);
|
||||
|
||||
enum asahi_blit_clamp clamp = ASAHI_BLIT_CLAMP_NONE;
|
||||
bool src_sint = util_format_is_pure_sint(info->src.format);
|
||||
bool dst_sint = util_format_is_pure_sint(info->dst.format);
|
||||
if (util_format_is_pure_integer(info->src.format) &&
|
||||
util_format_is_pure_integer(info->dst.format)) {
|
||||
|
||||
if (src_sint && !dst_sint)
|
||||
clamp = ASAHI_BLIT_CLAMP_SINT_TO_UINT;
|
||||
else if (!src_sint && dst_sint)
|
||||
clamp = ASAHI_BLIT_CLAMP_UINT_TO_SINT;
|
||||
}
|
||||
|
||||
if (!blitter->blit_cs[clamp][array]) {
|
||||
blitter->blit_cs[clamp][array] =
|
||||
asahi_blit_compute_shader(ctx, clamp, array);
|
||||
}
|
||||
|
||||
ctx->bind_compute_state(ctx, blitter->blit_cs[clamp][array]);
|
||||
|
||||
struct pipe_grid_info grid_info = {
|
||||
.block = {BLIT_WG_SIZE, BLIT_WG_SIZE, 1},
|
||||
.last_block = {width % BLIT_WG_SIZE, height % BLIT_WG_SIZE, 1},
|
||||
.grid =
|
||||
{
|
||||
DIV_ROUND_UP(width, BLIT_WG_SIZE),
|
||||
DIV_ROUND_UP(height, BLIT_WG_SIZE),
|
||||
depth,
|
||||
},
|
||||
};
|
||||
ctx->launch_grid(ctx, &grid_info);
|
||||
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL);
|
||||
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, false, NULL);
|
||||
ctx->set_sampler_views(ctx, PIPE_SHADER_COMPUTE, 0, 0, 1, false, NULL);
|
||||
|
||||
asahi_compute_restore(agx_context(ctx));
|
||||
}
|
||||
|
||||
void
|
||||
agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
|
||||
|
|
@ -65,9 +364,6 @@ agx_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
|
|||
unreachable("Unsupported blit");
|
||||
}
|
||||
|
||||
/* Handle self-blits */
|
||||
agx_flush_writer(ctx, agx_resource(info->dst.resource), "Blit");
|
||||
|
||||
/* Legalize compression /before/ calling into u_blitter to avoid recursion.
|
||||
* u_blitter bans recursive usage.
|
||||
*/
|
||||
|
|
@ -77,6 +373,17 @@ agx_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
|
|||
agx_legalize_compression(ctx, agx_resource(info->src.resource),
|
||||
info->src.format);
|
||||
|
||||
if (asahi_compute_blit_supported(info) &&
|
||||
!(ail_is_compressed(&agx_resource(info->dst.resource)->layout) &&
|
||||
util_format_get_blocksize(info->dst.format) == 16)) {
|
||||
|
||||
asahi_compute_blit(pipe, info, &ctx->compute_blitter);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Handle self-blits */
|
||||
agx_flush_writer(ctx, agx_resource(info->dst.resource), "Blit");
|
||||
|
||||
agx_blitter_save(ctx, ctx->blitter, info->render_condition_enable);
|
||||
util_blitter_blit(ctx->blitter, info);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -103,28 +103,20 @@ agx_set_shader_images(struct pipe_context *pctx, enum pipe_shader_type shader,
|
|||
return;
|
||||
}
|
||||
|
||||
/* Bind start_slot...start_slot+count */
|
||||
/* Images writeable with pixel granularity are incompatible with
|
||||
* compression. Decompress if necessary.
|
||||
*
|
||||
* Driver-internal images are used by the compute blitter and are exempt
|
||||
* from these transitions, as it only uses compressed images when safe.
|
||||
*
|
||||
* We do this upfront because agx_decompress and agx_legalize_compression can
|
||||
* call set_shader_images internall.
|
||||
*/
|
||||
for (int i = 0; i < count; i++) {
|
||||
const struct pipe_image_view *image = &iviews[i];
|
||||
struct agx_resource *rsrc = agx_resource(image->resource);
|
||||
|
||||
if (image->resource)
|
||||
ctx->stage[shader].image_mask |= BITFIELD_BIT(start_slot + i);
|
||||
else
|
||||
ctx->stage[shader].image_mask &= ~BITFIELD_BIT(start_slot + i);
|
||||
|
||||
if (!image->resource) {
|
||||
util_copy_image_view(&ctx->stage[shader].images[start_slot + i], NULL);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Images writeable with pixel granularity are incompatible with
|
||||
* compression. Decompress if necessary.
|
||||
*
|
||||
* Driver-internal images are used by the compute blitter and are exempt
|
||||
* from these transitions, as it only uses compressed images when safe.
|
||||
*/
|
||||
if (!(image->access & PIPE_IMAGE_ACCESS_DRIVER_INTERNAL)) {
|
||||
struct agx_resource *rsrc = agx_resource(image->resource);
|
||||
if (rsrc && !(image->access & PIPE_IMAGE_ACCESS_DRIVER_INTERNAL)) {
|
||||
if (!rsrc->layout.writeable_image &&
|
||||
(image->shader_access & PIPE_IMAGE_ACCESS_WRITE)) {
|
||||
|
||||
|
|
@ -139,8 +131,20 @@ agx_set_shader_images(struct pipe_context *pctx, enum pipe_shader_type shader,
|
|||
if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE)
|
||||
assert(rsrc->layout.writeable_image);
|
||||
}
|
||||
}
|
||||
|
||||
util_copy_image_view(&ctx->stage[shader].images[start_slot + i], image);
|
||||
/* Bind start_slot...start_slot+count */
|
||||
for (int i = 0; i < count; i++) {
|
||||
const struct pipe_image_view *image = &iviews[i];
|
||||
|
||||
if (!image->resource) {
|
||||
util_copy_image_view(&ctx->stage[shader].images[start_slot + i], NULL);
|
||||
ctx->stage[shader].image_mask &= ~BITFIELD_BIT(start_slot + i);
|
||||
} else {
|
||||
util_copy_image_view(&ctx->stage[shader].images[start_slot + i],
|
||||
image);
|
||||
ctx->stage[shader].image_mask |= BITFIELD_BIT(start_slot + i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Unbind start_slot+count...start_slot+count+unbind_num_trailing_slots */
|
||||
|
|
@ -4456,7 +4460,8 @@ static void
|
|||
agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
||||
{
|
||||
struct agx_context *ctx = agx_context(pipe);
|
||||
if (unlikely(!agx_render_condition_check(ctx)))
|
||||
if (unlikely(!ctx->compute_blitter.active &&
|
||||
!agx_render_condition_check(ctx)))
|
||||
return;
|
||||
|
||||
struct agx_batch *batch = agx_get_compute_batch(ctx);
|
||||
|
|
|
|||
|
|
@ -452,6 +452,40 @@ enum agx_dirty {
|
|||
*/
|
||||
#define AGX_MAX_BATCHES (128)
|
||||
|
||||
static_assert(PIPE_TEX_FILTER_NEAREST < 2, "known order");
|
||||
static_assert(PIPE_TEX_FILTER_LINEAR < 2, "known order");
|
||||
|
||||
enum asahi_blit_clamp {
|
||||
ASAHI_BLIT_CLAMP_NONE,
|
||||
ASAHI_BLIT_CLAMP_UINT_TO_SINT,
|
||||
ASAHI_BLIT_CLAMP_SINT_TO_UINT,
|
||||
|
||||
/* keep last */
|
||||
ASAHI_BLIT_CLAMP_COUNT,
|
||||
};
|
||||
|
||||
struct asahi_blitter {
|
||||
bool active;
|
||||
|
||||
/* [clamp_type][is_array] */
|
||||
void *blit_cs[ASAHI_BLIT_CLAMP_COUNT][2];
|
||||
|
||||
/* [filter] */
|
||||
void *sampler[2];
|
||||
|
||||
struct pipe_constant_buffer saved_cb;
|
||||
|
||||
bool has_saved_image;
|
||||
struct pipe_image_view saved_image;
|
||||
|
||||
unsigned saved_num_sampler_states;
|
||||
void *saved_sampler_states[PIPE_MAX_SAMPLERS];
|
||||
|
||||
struct pipe_sampler_view *saved_sampler_view;
|
||||
|
||||
void *saved_cs;
|
||||
};
|
||||
|
||||
struct agx_context {
|
||||
struct pipe_context base;
|
||||
struct agx_compiled_shader *vs, *fs, *gs;
|
||||
|
|
@ -510,6 +544,7 @@ struct agx_context {
|
|||
bool is_noop;
|
||||
|
||||
struct blitter_context *blitter;
|
||||
struct asahi_blitter compute_blitter;
|
||||
|
||||
/* Map of GEM handle to (batch index + 1) that (conservatively) writes that
|
||||
* BO, or 0 if no writer.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue