ir3, freedreno: implement GL_ARB_shader_draw_parameters
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21593>
This commit is contained in:
parent
2cc77088b9
commit
7609f83c70
13 changed files with 84 additions and 22 deletions
|
|
@ -285,6 +285,10 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
|||
compiler->nir_options.force_indirect_unrolling = nir_var_all;
|
||||
}
|
||||
|
||||
if (options->lower_base_vertex) {
|
||||
compiler->nir_options.lower_base_vertex = true;
|
||||
}
|
||||
|
||||
/* 16-bit ALU op generation is mostly controlled by frontend compiler options, but
|
||||
* this core NIR option enables some optimizations of 16-bit operations.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -67,6 +67,9 @@ struct ir3_compiler_options {
|
|||
|
||||
/* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */
|
||||
bool storage_16bit;
|
||||
|
||||
/* If base_vertex should be lowered in nir */
|
||||
bool lower_base_vertex;
|
||||
};
|
||||
|
||||
struct ir3_compiler {
|
||||
|
|
|
|||
|
|
@ -2204,6 +2204,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
}
|
||||
dst[0] = ctx->basevertex;
|
||||
break;
|
||||
case nir_intrinsic_load_is_indexed_draw:
|
||||
if (!ctx->is_indexed_draw) {
|
||||
ctx->is_indexed_draw = create_driver_param(ctx, IR3_DP_IS_INDEXED_DRAW);
|
||||
}
|
||||
dst[0] = ctx->is_indexed_draw;
|
||||
break;
|
||||
case nir_intrinsic_load_draw_id:
|
||||
if (!ctx->draw_id) {
|
||||
ctx->draw_id = create_driver_param(ctx, IR3_DP_DRAWID);
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ struct ir3_context {
|
|||
|
||||
/* For vertex shaders, keep track of the system values sources */
|
||||
struct ir3_instruction *vertex_id, *basevertex, *instance_id, *base_instance,
|
||||
*draw_id, *view_index;
|
||||
*draw_id, *view_index, *is_indexed_draw;
|
||||
|
||||
/* For fragment shaders: */
|
||||
struct ir3_instruction *samp_id, *samp_mask_in;
|
||||
|
|
|
|||
|
|
@ -864,6 +864,10 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
|
|||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_VTXID_BASE + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_is_indexed_draw:
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_IS_INDEXED_DRAW + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_base_instance:
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_INSTID_BASE + 1);
|
||||
|
|
|
|||
|
|
@ -69,11 +69,12 @@ enum ir3_driver_param {
|
|||
IR3_DP_VTXID_BASE = 1,
|
||||
IR3_DP_INSTID_BASE = 2,
|
||||
IR3_DP_VTXCNT_MAX = 3,
|
||||
IR3_DP_IS_INDEXED_DRAW = 4, /* Note: boolean, ie. 0 or ~0 */
|
||||
/* user-clip-plane components, up to 8x vec4's: */
|
||||
IR3_DP_UCP0_X = 4,
|
||||
IR3_DP_UCP0_X = 5,
|
||||
/* .... */
|
||||
IR3_DP_UCP7_W = 35,
|
||||
IR3_DP_VS_COUNT = 36, /* must be aligned to vec4 */
|
||||
IR3_DP_UCP7_W = 36,
|
||||
IR3_DP_VS_COUNT = 40, /* must be aligned to vec4 */
|
||||
|
||||
/* TCS driver params: */
|
||||
IR3_DP_HS_DEFAULT_OUTER_LEVEL_X = 0,
|
||||
|
|
|
|||
|
|
@ -186,12 +186,25 @@ fd6_memory_barrier(struct pipe_context *pctx, unsigned flags)
|
|||
|
||||
if (flags & (PIPE_BARRIER_TEXTURE |
|
||||
PIPE_BARRIER_IMAGE |
|
||||
PIPE_BARRIER_INDIRECT_BUFFER |
|
||||
PIPE_BARRIER_UPDATE_BUFFER |
|
||||
PIPE_BARRIER_UPDATE_TEXTURE)) {
|
||||
flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE;
|
||||
}
|
||||
|
||||
if (flags & PIPE_BARRIER_INDIRECT_BUFFER) {
|
||||
flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE;
|
||||
|
||||
/* Various firmware bugs/inconsistencies mean that some indirect draw opcodes
|
||||
* do not wait for WFI's to complete before executing. Add a WAIT_FOR_ME if
|
||||
* pending for these opcodes. This may result in a few extra WAIT_FOR_ME's
|
||||
* with these opcodes, but the alternative would add unnecessary WAIT_FOR_ME's
|
||||
* before draw opcodes that don't need it.
|
||||
*/
|
||||
if (fd_context(pctx)->screen->info->a6xx.indirect_draw_wfm_quirk) {
|
||||
flushes |= FD6_WAIT_FOR_ME;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & PIPE_BARRIER_FRAMEBUFFER) {
|
||||
fd6_texture_barrier(pctx, PIPE_TEXTURE_BARRIER_FRAMEBUFFER);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -300,12 +300,12 @@ fd6_build_driver_params(struct fd6_emit *emit)
|
|||
|
||||
if (emit->vs->need_driver_params) {
|
||||
ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info,
|
||||
emit->indirect, emit->draw);
|
||||
emit->indirect, emit->draw, emit->draw_id);
|
||||
}
|
||||
|
||||
if (emit->gs && emit->gs->need_driver_params) {
|
||||
ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info,
|
||||
emit->indirect, emit->draw);
|
||||
emit->indirect, emit->draw, 0);
|
||||
}
|
||||
|
||||
if (emit->hs && emit->hs->need_driver_params) {
|
||||
|
|
@ -314,7 +314,7 @@ fd6_build_driver_params(struct fd6_emit *emit)
|
|||
|
||||
if (emit->ds && emit->ds->need_driver_params) {
|
||||
ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info,
|
||||
emit->indirect, emit->draw);
|
||||
emit->indirect, emit->draw, 0);
|
||||
}
|
||||
|
||||
fd6_ctx->has_dp_state = true;
|
||||
|
|
|
|||
|
|
@ -74,26 +74,39 @@ draw_emit_xfb(struct fd_ringbuffer *ring, struct CP_DRAW_INDX_OFFSET_0 *draw0,
|
|||
}
|
||||
|
||||
static void
|
||||
draw_emit_indirect(struct fd_ringbuffer *ring,
|
||||
draw_emit_indirect(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
struct CP_DRAW_INDX_OFFSET_0 *draw0,
|
||||
const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
unsigned index_offset)
|
||||
unsigned index_offset, uint32_t driver_param)
|
||||
{
|
||||
struct fd_resource *ind = fd_resource(indirect->buffer);
|
||||
|
||||
if (info->index_size) {
|
||||
OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 9);
|
||||
OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
|
||||
OUT_RING(ring,
|
||||
(A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDEXED)
|
||||
| A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
|
||||
struct pipe_resource *idx = info->index.resource;
|
||||
unsigned max_indices = (idx->width0 - index_offset) / info->index_size;
|
||||
|
||||
OUT_PKT(ring, CP_DRAW_INDX_INDIRECT, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
|
||||
A5XX_CP_DRAW_INDX_INDIRECT_INDX_BASE(fd_resource(idx)->bo,
|
||||
index_offset),
|
||||
A5XX_CP_DRAW_INDX_INDIRECT_3(.max_indices = max_indices),
|
||||
A5XX_CP_DRAW_INDX_INDIRECT_INDIRECT(ind->bo, indirect->offset));
|
||||
OUT_RING(ring, indirect->draw_count);
|
||||
//index va
|
||||
OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
|
||||
//max indices
|
||||
OUT_RING(ring, max_indices);
|
||||
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
|
||||
OUT_RING(ring, indirect->stride);
|
||||
} else {
|
||||
OUT_PKT(ring, CP_DRAW_INDIRECT, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
|
||||
A5XX_CP_DRAW_INDIRECT_INDIRECT(ind->bo, indirect->offset));
|
||||
OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 6);
|
||||
OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
|
||||
OUT_RING(ring,
|
||||
(A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_NORMAL)
|
||||
| A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
|
||||
OUT_RING(ring, indirect->draw_count);
|
||||
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
|
||||
OUT_RING(ring, indirect->stride);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -228,6 +241,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
emit.state.num_groups = 0;
|
||||
emit.streamout_mask = 0;
|
||||
emit.prog = NULL;
|
||||
emit.draw_id = 0;
|
||||
|
||||
if (!(ctx->prog.vs && ctx->prog.fs))
|
||||
return;
|
||||
|
|
@ -365,7 +379,14 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
if (indirect->count_from_stream_output) {
|
||||
draw_emit_xfb(ring, &draw0, info, indirect);
|
||||
} else {
|
||||
draw_emit_indirect(ring, &draw0, info, indirect, index_offset);
|
||||
const struct ir3_const_state *const_state = ir3_const_state(emit.vs);
|
||||
uint32_t dst_offset_dp = const_state->offsets.driver_param;
|
||||
|
||||
/* If unused, pass 0 for DST_OFF: */
|
||||
if (dst_offset_dp > emit.vs->constlen)
|
||||
dst_offset_dp = 0;
|
||||
|
||||
draw_emit_indirect(ctx, ring, &draw0, info, indirect, index_offset, dst_offset_dp);
|
||||
}
|
||||
} else {
|
||||
draw_emit(ring, &draw0, info, &draws[0], index_offset);
|
||||
|
|
@ -401,6 +422,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
if (emit.dirty_groups) {
|
||||
emit.state.num_groups = 0;
|
||||
emit.draw = &draws[i];
|
||||
emit.draw_id = info->increment_draw_id ? i : 0;
|
||||
fd6_emit_3d_state<CHIP>(ring, &emit);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -183,6 +183,7 @@ struct fd6_emit {
|
|||
bool rasterflat : 1;
|
||||
bool primitive_restart : 1;
|
||||
uint8_t streamout_mask;
|
||||
uint32_t draw_id;
|
||||
|
||||
/* cached to avoid repeated lookups: */
|
||||
const struct fd6_program_state *prog;
|
||||
|
|
|
|||
|
|
@ -213,6 +213,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT:
|
||||
case PIPE_CAP_DRAW_PARAMETERS:
|
||||
return is_a6xx(screen);
|
||||
|
||||
case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
|
||||
|
|
|
|||
|
|
@ -454,17 +454,19 @@ ir3_emit_driver_params(const struct ir3_shader_variant *v,
|
|||
struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draw) assert_dt
|
||||
const struct pipe_draw_start_count_bias *draw,
|
||||
const uint32_t draw_id) assert_dt
|
||||
{
|
||||
assert(v->need_driver_params);
|
||||
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
uint32_t offset = const_state->offsets.driver_param;
|
||||
uint32_t vertex_params[IR3_DP_VS_COUNT] = {
|
||||
[IR3_DP_DRAWID] = 0, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
|
||||
[IR3_DP_DRAWID] = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
|
||||
[IR3_DP_VTXID_BASE] = info->index_size ? draw->index_bias : draw->start,
|
||||
[IR3_DP_INSTID_BASE] = info->start_instance,
|
||||
[IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx,
|
||||
[IR3_DP_IS_INDEXED_DRAW] = info->index_size != 0 ? ~0 : 0,
|
||||
};
|
||||
if (v->key.ucp_enables) {
|
||||
struct pipe_clip_state *ucp = &ctx->ucp;
|
||||
|
|
@ -573,7 +575,7 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v,
|
|||
/* emit driver params every time: */
|
||||
if (info && v->need_driver_params) {
|
||||
ring_wfi(ctx->batch, ring);
|
||||
ir3_emit_driver_params(v, ring, ctx, info, indirect, draw);
|
||||
ir3_emit_driver_params(v, ring, ctx, info, indirect, draw, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -570,6 +570,10 @@ ir3_screen_init(struct pipe_screen *pscreen)
|
|||
.bindless_fb_read_slot = IR3_BINDLESS_IMAGE_OFFSET +
|
||||
IR3_BINDLESS_IMAGE_COUNT - 1 - screen->max_rts,
|
||||
};
|
||||
|
||||
if (screen->gen >= 6) {
|
||||
options.lower_base_vertex = true;
|
||||
}
|
||||
screen->compiler = ir3_compiler_create(screen->dev, screen->dev_id, &options);
|
||||
|
||||
/* TODO do we want to limit things to # of fast cores, or just limit
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue