freedreno, nir, ir3: implement GL_EXT_shader_framebuffer_fetch

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21260>
This commit is contained in:
Amber 2023-03-02 17:45:30 +01:00 committed by Marge Bot
parent ca92183845
commit 8da3494d53
15 changed files with 184 additions and 95 deletions

View file

@ -317,6 +317,8 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
GL_ARB_sparse_texture_clamp DONE (radeonsi/gfx9+, zink)
GL_ARB_texture_filter_minmax DONE (nvc0/gm200+, zink)
GL_ARM_shader_framebuffer_fetch_depth_stencil DONE (llvmpipe)
GL_EXT_shader_framebuffer_fetch DONE (freedreno/a6xx, iris/gen9+, llvmpipe, panfrost, virgl, zink, asahi)
GL_EXT_shader_framebuffer_fetch_non_coherent DONE (freedreno/a6xx, iris, llvmpipe, panfrost, virgl, zink, asahi)
GL_EXT_color_buffer_half_float DONE (freedreno, i965, iris, llvmpipe, nv50, nvc0, radeonsi, zink)
GL_EXT_depth_bounds_test DONE (i965/gen12+, nv50, nvc0, radeonsi, softpipe, zink)
GL_EXT_memory_object DONE (freedreno, radeonsi, i965/gen7+, llvmpipe, zink, d3d12)

View file

@ -60,16 +60,18 @@ nir_lower_fb_read_instr(nir_builder *b, nir_instr *instr, UNUSED void *cb_data)
nir_ssa_def *fragcoord = nir_load_frag_coord(b);
nir_ssa_def *sampid = nir_load_sample_id(b);
nir_ssa_def *layer = nir_load_layer_id(b);
fragcoord = nir_f2i32(b, fragcoord);
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
tex->op = nir_texop_txf_ms_fb;
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
tex->coord_components = 2;
tex->coord_components = 3;
tex->dest_type = nir_type_float32;
tex->is_array = true;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(nir_channels(b, fragcoord, 0x3));
tex->src[0].src =
nir_src_for_ssa(nir_vec3(b, nir_channel(b, fragcoord, 0), nir_channel(b, fragcoord, 1), layer));
tex->src[1].src_type = nir_tex_src_ms_index;
tex->src[1].src = nir_src_for_ssa(sampid);
struct nir_io_semantics io = nir_intrinsic_io_semantics(intr);

View file

@ -3086,17 +3086,36 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
type = TYPE_S32;
if (tex->op == nir_texop_txf_ms_fb) {
/* only expect a single txf_ms_fb per shader: */
compile_assert(ctx, !ctx->so->fb_read);
compile_assert(ctx, ctx->so->type == MESA_SHADER_FRAGMENT);
ctx->so->fb_read = true;
if (ctx->compiler->options.bindless_fb_read_descriptor >= 0) {
ctx->so->bindless_tex = true;
info.flags = IR3_INSTR_B | IR3_INSTR_A1EN;
info.flags = IR3_INSTR_B;
info.base = ctx->compiler->options.bindless_fb_read_descriptor;
info.a1_val = ctx->compiler->options.bindless_fb_read_slot << 3;
struct ir3_instruction *texture, *sampler;
int base_index =
nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
nir_src tex_src = tex->src[base_index].src;
if (nir_src_is_const(tex_src)) {
texture = create_immed_typed(b,
nir_src_as_uint(tex_src) + ctx->compiler->options.bindless_fb_read_slot,
TYPE_U32);
} else {
texture = create_immed_typed(
ctx->block, ctx->compiler->options.bindless_fb_read_slot, TYPE_U32);
struct ir3_instruction *base =
ir3_get_src(ctx, &tex->src[base_index].src)[0];
texture = ir3_ADD_U(b, texture, 0, base, 0);
}
sampler = create_immed_typed(ctx->block, 0, TYPE_U32);
info.samp_tex = ir3_collect(b, texture, sampler);
info.flags |= IR3_INSTR_S2EN;
if (tex->texture_non_uniform) {
info.flags |= IR3_INSTR_NONUNIF;
}
} else {
/* Otherwise append a sampler to be patched into the texture
* state:

View file

@ -472,6 +472,7 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
NIR_PASS_V(s, ir3_nir_lower_load_barycentric_at_offset);
NIR_PASS_V(s, ir3_nir_move_varying_inputs);
NIR_PASS_V(s, nir_lower_fb_read);
NIR_PASS_V(s, ir3_nir_lower_layer_id);
}
if (compiler->gen >= 6 && s->info.stage == MESA_SHADER_FRAGMENT &&

View file

@ -42,6 +42,7 @@ bool ir3_nir_move_varying_inputs(nir_shader *shader);
int ir3_nir_coord_offset(nir_ssa_def *ssa);
bool ir3_nir_lower_tex_prefetch(nir_shader *shader);
bool ir3_nir_lower_wide_load_store(nir_shader *shader);
bool ir3_nir_lower_layer_id(nir_shader *shader);
void ir3_nir_lower_to_explicit_output(nir_shader *shader,
struct ir3_shader_variant *v,

View file

@ -0,0 +1,51 @@
/*
* Copyright 2023 Igalia S.L.
* SPDX-License-Identifier: MIT
*/
#include "compiler/nir/nir_builder.h"
#include "ir3_nir.h"
static bool
nir_lower_layer_id(nir_builder *b, nir_instr *instr, UNUSED void *cb_data)
{
if (instr->type != nir_instr_type_intrinsic) {
return false;
}
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_layer_id)
return false;
b->cursor = nir_before_instr(&intr->instr);
nir_variable *layer = nir_find_variable_with_location(b->shader, nir_var_shader_in, VARYING_SLOT_LAYER);
if (!layer) {
layer = nir_variable_create(b->shader, nir_var_shader_in, glsl_int_type(), "layer");
layer->data.location = VARYING_SLOT_LAYER;
layer->data.driver_location = b->shader->num_inputs++;
}
nir_intrinsic_instr *load_input = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
nir_intrinsic_set_base(load_input, layer->data.driver_location);
nir_intrinsic_set_component(load_input, 0);
load_input->num_components = 1;
load_input->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_intrinsic_set_dest_type(load_input, nir_type_int);
nir_io_semantics semantics = {
.location = VARYING_SLOT_LAYER,
.num_slots = 1,
};
nir_intrinsic_set_io_semantics(load_input, semantics);
nir_ssa_dest_init(&load_input->instr, &load_input->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &load_input->instr);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, &load_input->dest.ssa);
return true;
}
bool ir3_nir_lower_layer_id(nir_shader *shader)
{
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
return nir_shader_instructions_pass(shader, nir_lower_layer_id,
nir_metadata_block_index | nir_metadata_dominance,
NULL);
}

View file

@ -283,6 +283,7 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
v->fs.early_fragment_tests = info->fs.early_fragment_tests;
v->fs.color_is_dual_source = info->fs.color_is_dual_source;
v->fs.uses_fbfetch_output = info->fs.uses_fbfetch_output;
v->fs.fbfetch_coherent = info->fs.fbfetch_coherent;
break;
case MESA_SHADER_COMPUTE:

View file

@ -769,6 +769,7 @@ struct ir3_shader_variant {
bool early_fragment_tests : 1;
bool color_is_dual_source : 1;
bool uses_fbfetch_output : 1;
bool fbfetch_coherent : 1;
} fs;
struct {
unsigned req_input_mem;

View file

@ -100,6 +100,7 @@ libfreedreno_ir3_files = files(
'ir3_nir_lower_tex_prefetch.c',
'ir3_nir_lower_wide_load_store.c',
'ir3_nir_move_varying_inputs.c',
'ir3_nir_lower_layer_id.c',
'ir3_nir_opt_preamble.c',
'ir3_postsched.c',
'ir3_print.c',

View file

@ -522,7 +522,8 @@ build_prim_mode(struct fd6_emit *emit, struct fd_context *ctx, bool gmem)
uint32_t prim_mode = NO_FLUSH;
if (emit->fs->fs.uses_fbfetch_output) {
if (gmem) {
prim_mode = ctx->blend->blend_coherent ? FLUSH_PER_OVERLAP : NO_FLUSH;
prim_mode = (ctx->blend->blend_coherent || emit->fs->fs.fbfetch_coherent)
? FLUSH_PER_OVERLAP : NO_FLUSH;
} else {
prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE;
}

View file

@ -255,101 +255,107 @@ use_hw_binning(struct fd_batch *batch)
static void
patch_fb_read_gmem(struct fd_batch *batch)
{
struct fd_screen *screen = batch->ctx->screen;
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
if (!num_patches)
return;
struct fd_screen *screen = batch->ctx->screen;
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
struct pipe_surface *psurf = pfb->cbufs[0];
struct pipe_resource *prsc = psurf->texture;
struct fd_resource *rsc = fd_resource(prsc);
enum pipe_format format = psurf->format;
for (unsigned i = 0; i < num_patches; i++) {
struct fd_cs_patch *patch =
fd_patch_element(&batch->fb_read_patches, i);
int buf = patch->val;
struct pipe_surface *psurf = pfb->cbufs[buf];
struct pipe_resource *prsc = psurf->texture;
struct fd_resource *rsc = fd_resource(prsc);
enum pipe_format format = psurf->format;
uint8_t swiz[4];
fdl6_format_swiz(psurf->format, false, swiz);
uint8_t swiz[4];
fdl6_format_swiz(psurf->format, false, swiz);
/* always TILE6_2 mode in GMEM, which also means no swap: */
uint32_t descriptor[FDL6_TEX_CONST_DWORDS] = {
A6XX_TEX_CONST_0_FMT(fd6_texture_format(
format, (enum a6xx_tile_mode)rsc->layout.tile_mode)) |
A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
A6XX_TEX_CONST_0_SWAP(WZYX) |
A6XX_TEX_CONST_0_TILE_MODE(TILE6_2) |
COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
A6XX_TEX_CONST_0_SWIZ_X(fdl6_swiz(swiz[0])) |
A6XX_TEX_CONST_0_SWIZ_Y(fdl6_swiz(swiz[1])) |
A6XX_TEX_CONST_0_SWIZ_Z(fdl6_swiz(swiz[2])) |
A6XX_TEX_CONST_0_SWIZ_W(fdl6_swiz(swiz[3])),
uint64_t base = screen->gmem_base + gmem->cbuf_base[buf];
/* always TILE6_2 mode in GMEM, which also means no swap: */
uint32_t descriptor[FDL6_TEX_CONST_DWORDS] = {
A6XX_TEX_CONST_0_FMT(fd6_texture_format(
format, (enum a6xx_tile_mode)rsc->layout.tile_mode)) |
A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
A6XX_TEX_CONST_0_SWAP(WZYX) |
A6XX_TEX_CONST_0_TILE_MODE(TILE6_2) |
COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
A6XX_TEX_CONST_0_SWIZ_X(fdl6_swiz(swiz[0])) |
A6XX_TEX_CONST_0_SWIZ_Y(fdl6_swiz(swiz[1])) |
A6XX_TEX_CONST_0_SWIZ_Z(fdl6_swiz(swiz[2])) |
A6XX_TEX_CONST_0_SWIZ_W(fdl6_swiz(swiz[3])),
A6XX_TEX_CONST_1_WIDTH(pfb->width) |
A6XX_TEX_CONST_1_HEIGHT(pfb->height),
A6XX_TEX_CONST_1_HEIGHT(pfb->height),
A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[0]) |
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D),
A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[buf]) |
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D),
A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size),
A6XX_TEX_CONST_4_BASE_LO(screen->gmem_base),
A6XX_TEX_CONST_4_BASE_LO(base),
A6XX_TEX_CONST_5_BASE_HI(screen->gmem_base >> 32) |
A6XX_TEX_CONST_5_DEPTH(1)
};
A6XX_TEX_CONST_5_BASE_HI(base >> 32) |
A6XX_TEX_CONST_5_DEPTH(prsc->array_size)
};
for (unsigned i = 0; i < num_patches; i++) {
struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
memcpy(patch->cs, descriptor, FDL6_TEX_CONST_DWORDS * 4);
}
util_dynarray_clear(&batch->fb_read_patches);
}
static void
patch_fb_read_sysmem(struct fd_batch *batch)
{
unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
unsigned num_patches =
fd_patch_num_elements(&batch->fb_read_patches);
if (!num_patches)
return;
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
struct pipe_surface *psurf = pfb->cbufs[0];
if (!psurf)
return;
struct fd_resource *rsc = fd_resource(psurf->texture);
uint32_t block_width, block_height;
fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
struct fdl_view_args args = {
.iova = fd_bo_get_iova(rsc->bo),
.base_miplevel = psurf->u.tex.level,
.level_count = 1,
.base_array_layer = psurf->u.tex.first_layer,
.layer_count = 1,
.swiz = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W},
.format = psurf->format,
.type = FDL_VIEW_TYPE_2D,
.chroma_offsets = {FDL_CHROMA_LOCATION_COSITED_EVEN,
FDL_CHROMA_LOCATION_COSITED_EVEN},
};
const struct fdl_layout *layouts[3] = {&rsc->layout, NULL, NULL};
struct fdl6_view view;
fdl6_view_init(&view, layouts, &args,
batch->ctx->screen->info->a6xx.has_z24uint_s8uint);
for (unsigned i = 0; i < num_patches; i++) {
struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
struct fd_cs_patch *patch =
fd_patch_element(&batch->fb_read_patches, i);
int buf = patch->val;
/* This is cheating a bit, since we can't use OUT_RELOC() here.. but
* the render target will already have a reloc emitted for RB_MRT state,
* so we can get away with manually patching in the address here:
*/
struct pipe_surface *psurf = pfb->cbufs[buf];
if (!psurf)
return;
struct pipe_resource *prsc = psurf->texture;
struct fd_resource *rsc = fd_resource(prsc);
uint32_t block_width, block_height;
fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
struct fdl_view_args args = {
.iova = fd_bo_get_iova(rsc->bo),
.base_miplevel = psurf->u.tex.level,
.level_count = 1,
.base_array_layer = psurf->u.tex.first_layer,
.layer_count = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1,
.swiz = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
PIPE_SWIZZLE_W},
.format = psurf->format,
.type = FDL_VIEW_TYPE_2D,
.chroma_offsets = {FDL_CHROMA_LOCATION_COSITED_EVEN,
FDL_CHROMA_LOCATION_COSITED_EVEN},
};
const struct fdl_layout *layouts[3] = {&rsc->layout, NULL, NULL};
struct fdl6_view view;
fdl6_view_init(&view, layouts, &args,
batch->ctx->screen->info->a6xx.has_z24uint_s8uint);
memcpy(patch->cs, view.descriptor, FDL6_TEX_CONST_DWORDS * 4);
}
util_dynarray_clear(&batch->fb_read_patches);
}

View file

@ -229,18 +229,20 @@ fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader,
memcpy(desc_buf, set->descriptor, sizeof(set->descriptor));
if (unlikely(append_fb_read)) {
/* The last image slot is used for fb-read: */
unsigned idx = IR3_BINDLESS_DESC_COUNT - 1;
/* Reserve A6XX_MAX_RENDER_TARGETS image slots for fb-read */
unsigned idx = IR3_BINDLESS_DESC_COUNT - 1 - A6XX_MAX_RENDER_TARGETS;
/* This is patched with the appropriate descriptor for GMEM or
* sysmem rendering path in fd6_gmem
*/
struct fd_cs_patch patch = {
.cs = &desc_buf[idx * FDL6_TEX_CONST_DWORDS],
};
util_dynarray_append(&ctx->batch->fb_read_patches,
__typeof__(patch), patch);
for (int i = 0; i < ctx->batch->framebuffer.nr_cbufs; i++) {
/* This is patched with the appropriate descriptor for GMEM or
* sysmem rendering path in fd6_gmem
*/
struct fd_cs_patch patch = {
.cs = &desc_buf[(idx + i) * FDL6_TEX_CONST_DWORDS],
.val = i,
};
util_dynarray_append(&ctx->batch->fb_read_patches,
__typeof__(patch), patch);
}
}
}

View file

@ -102,7 +102,7 @@ batch_init(struct fd_batch *batch)
fd_reset_wfi(batch);
util_dynarray_init(&batch->draw_patches, NULL);
util_dynarray_init(&batch->fb_read_patches, NULL);
util_dynarray_init(&(batch->fb_read_patches), NULL);
if (is_a2xx(ctx->screen)) {
util_dynarray_init(&batch->shader_patches, NULL);
@ -204,7 +204,8 @@ batch_fini(struct fd_batch *batch)
cleanup_submit(batch);
util_dynarray_fini(&batch->draw_patches);
util_dynarray_fini(&batch->fb_read_patches);
for (int i = 0; i < MAX_RENDER_TARGETS; i++)
util_dynarray_fini(&(batch->fb_read_patches));
if (is_a2xx(batch->ctx->screen)) {
util_dynarray_fini(&batch->shader_patches);

View file

@ -370,7 +370,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FBFETCH:
if (fd_device_version(screen->dev) >= FD_VERSION_GMEM_BASE &&
is_a6xx(screen))
return 1;
return screen->max_rts;
return 0;
case PIPE_CAP_SAMPLE_SHADING:
if (is_a6xx(screen))

View file

@ -555,10 +555,10 @@ ir3_screen_init(struct pipe_screen *pscreen)
struct fd_screen *screen = fd_screen(pscreen);
struct ir3_compiler_options options = {
.bindless_fb_read_descriptor =
ir3_shader_descriptor_set(PIPE_SHADER_FRAGMENT),
.bindless_fb_read_slot =
IR3_BINDLESS_IMAGE_OFFSET + IR3_BINDLESS_IMAGE_COUNT - 1,
.bindless_fb_read_descriptor =
ir3_shader_descriptor_set(PIPE_SHADER_FRAGMENT),
.bindless_fb_read_slot = IR3_BINDLESS_IMAGE_OFFSET +
IR3_BINDLESS_IMAGE_COUNT - 1 - screen->max_rts,
};
screen->compiler = ir3_compiler_create(screen->dev, screen->dev_id, &options);