freedreno: register usage queries

Avg number of (half) regs per draw, so we can corrolate fps dips to
shader register usage.

Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
Rob Clark 2018-06-25 08:47:55 -04:00
parent 8dfc9e22c1
commit 97a9283f5d
8 changed files with 91 additions and 22 deletions

View file

@ -149,12 +149,17 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
fixup_shader_state(ctx, &emit.key);
unsigned dirty = ctx->dirty;
const struct ir3_shader_variant *vp = fd3_emit_get_vp(&emit);
const struct ir3_shader_variant *fp = fd3_emit_get_fp(&emit);
/* do regular pass first, since that is more likely to fail compiling: */
if (!(fd3_emit_get_vp(&emit) && fd3_emit_get_fp(&emit)))
if (!vp || !fp)
return false;
ctx->stats.vs_regs += ir3_shader_halfregs(vp);
ctx->stats.fs_regs += ir3_shader_halfregs(fp);
emit.key.binning_pass = false;
emit.dirty = dirty;
draw_impl(ctx, ctx->batch->draw, &emit, index_offset);

View file

@ -135,12 +135,17 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
fixup_shader_state(ctx, &emit.key);
enum fd_dirty_3d_state dirty = ctx->dirty;
const struct ir3_shader_variant *vp = fd4_emit_get_vp(&emit);
const struct ir3_shader_variant *fp = fd4_emit_get_fp(&emit);
/* do regular pass first, since that is more likely to fail compiling: */
if (!(fd4_emit_get_vp(&emit) && fd4_emit_get_fp(&emit)))
if (!vp || !fp)
return false;
ctx->stats.vs_regs += ir3_shader_halfregs(vp);
ctx->stats.fs_regs += ir3_shader_halfregs(fp);
emit.key.binning_pass = false;
emit.dirty = dirty;

View file

@ -134,6 +134,9 @@ fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
if (!vp || !fp)
return false;
ctx->stats.vs_regs += ir3_shader_halfregs(vp);
ctx->stats.fs_regs += ir3_shader_halfregs(fp);
/* figure out whether we need to disable LRZ write for binning
* pass using draw pass's fp:
*/

View file

@ -219,6 +219,7 @@ struct fd_context {
uint64_t draw_calls;
uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw, batch_restore;
uint64_t staging_uploads, shadow_uploads;
uint64_t vs_regs, fs_regs;
} stats;
/* Current batch.. the rule here is that you can deref ctx->batch

View file

@ -118,29 +118,45 @@ fd_render_condition(struct pipe_context *pctx, struct pipe_query *pq,
ctx->cond_mode = mode;
}
#define _Q(_name, _query_type, _type, _result_type) { \
.name = _name, \
.query_type = _query_type, \
.type = PIPE_DRIVER_QUERY_TYPE_ ## _type, \
.result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_ ## _result_type, \
.group_id = ~(unsigned)0, \
}
#define FQ(_name, _query_type, _type, _result_type) \
_Q(_name, FD_QUERY_ ## _query_type, _type, _result_type)
#define PQ(_name, _query_type, _type, _result_type) \
_Q(_name, PIPE_QUERY_ ## _query_type, _type, _result_type)
static const struct pipe_driver_query_info sw_query_list[] = {
FQ("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
FQ("batches", BATCH_TOTAL, UINT64, AVERAGE),
FQ("batches-sysmem", BATCH_SYSMEM, UINT64, AVERAGE),
FQ("batches-gmem", BATCH_GMEM, UINT64, AVERAGE),
FQ("batches-nondraw", BATCH_NONDRAW, UINT64, AVERAGE),
FQ("restores", BATCH_RESTORE, UINT64, AVERAGE),
PQ("prims-emitted", PRIMITIVES_EMITTED, UINT64, AVERAGE),
FQ("staging", STAGING_UPLOADS, UINT64, AVERAGE),
FQ("shadow", SHADOW_UPLOADS, UINT64, AVERAGE),
FQ("vsregs", VS_REGS, FLOAT, AVERAGE),
FQ("fsregs", FS_REGS, FLOAT, AVERAGE),
};
static int
fd_get_driver_query_info(struct pipe_screen *pscreen,
unsigned index, struct pipe_driver_query_info *info)
{
struct pipe_driver_query_info list[] = {
{"draw-calls", FD_QUERY_DRAW_CALLS, {0}},
{"batches", FD_QUERY_BATCH_TOTAL, {0}},
{"batches-sysmem", FD_QUERY_BATCH_SYSMEM, {0}},
{"batches-gmem", FD_QUERY_BATCH_GMEM, {0}},
{"batches-nondraw", FD_QUERY_BATCH_NONDRAW, {0}},
{"restores", FD_QUERY_BATCH_RESTORE, {0}},
{"prims-emitted", PIPE_QUERY_PRIMITIVES_EMITTED, {0}},
{"staging", FD_QUERY_STAGING_UPLOADS, {0}},
{"shadow", FD_QUERY_SHADOW_UPLOADS, {0}},
};
if (!info)
return ARRAY_SIZE(list);
return ARRAY_SIZE(sw_query_list);
if (index >= ARRAY_SIZE(list))
if (index >= ARRAY_SIZE(sw_query_list))
return 0;
*info = list[index];
*info = sw_query_list[index];
return 1;
}

View file

@ -64,6 +64,8 @@ fd_query(struct pipe_query *pq)
#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 5) /* batches requiring GMEM restore */
#define FD_QUERY_STAGING_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 6) /* texture/buffer uploads using staging blit */
#define FD_QUERY_SHADOW_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 7) /* texture/buffer uploads that shadowed rsc */
#define FD_QUERY_VS_REGS (PIPE_QUERY_DRIVER_SPECIFIC + 8) /* avg # of VS registers (scaled up by 100x) */
#define FD_QUERY_FS_REGS (PIPE_QUERY_DRIVER_SPECIFIC + 9) /* avg # of VS registers (scaled up by 100x) */
void fd_query_screen_init(struct pipe_screen *pscreen);
void fd_query_context_init(struct pipe_context *pctx);

View file

@ -73,12 +73,16 @@ read_counter(struct fd_context *ctx, int type)
return ctx->stats.staging_uploads;
case FD_QUERY_SHADOW_UPLOADS:
return ctx->stats.shadow_uploads;
case FD_QUERY_VS_REGS:
return ctx->stats.vs_regs;
case FD_QUERY_FS_REGS:
return ctx->stats.fs_regs;
}
return 0;
}
static bool
is_rate_query(struct fd_query *q)
is_time_rate_query(struct fd_query *q)
{
switch (q->type) {
case FD_QUERY_BATCH_TOTAL:
@ -94,14 +98,29 @@ is_rate_query(struct fd_query *q)
}
}
static bool
is_draw_rate_query(struct fd_query *q)
{
switch (q->type) {
case FD_QUERY_VS_REGS:
case FD_QUERY_FS_REGS:
return true;
default:
return false;
}
}
static boolean
fd_sw_begin_query(struct fd_context *ctx, struct fd_query *q)
{
struct fd_sw_query *sq = fd_sw_query(q);
sq->begin_value = read_counter(ctx, q->type);
if (is_rate_query(q))
if (is_time_rate_query(q)) {
sq->begin_time = os_time_get();
return true;
} else if (is_draw_rate_query(q)) {
sq->begin_time = ctx->stats.draw_calls;
}
return true;
}
static void
@ -109,8 +128,11 @@ fd_sw_end_query(struct fd_context *ctx, struct fd_query *q)
{
struct fd_sw_query *sq = fd_sw_query(q);
sq->end_value = read_counter(ctx, q->type);
if (is_rate_query(q))
if (is_time_rate_query(q)) {
sq->end_time = os_time_get();
} else if (is_draw_rate_query(q)) {
sq->end_time = ctx->stats.draw_calls;
}
}
static boolean
@ -121,10 +143,14 @@ fd_sw_get_query_result(struct fd_context *ctx, struct fd_query *q,
result->u64 = sq->end_value - sq->begin_value;
if (is_rate_query(q)) {
if (is_time_rate_query(q)) {
double fps = (result->u64 * 1000000) /
(double)(sq->end_time - sq->begin_time);
result->u64 = (uint64_t)fps;
} else if (is_draw_rate_query(q)) {
double avg = ((double)result->u64) /
(double)(sq->end_time - sq->begin_time);
result->f = avg;
}
return true;
@ -154,6 +180,8 @@ fd_sw_create_query(struct fd_context *ctx, unsigned query_type)
case FD_QUERY_BATCH_RESTORE:
case FD_QUERY_STAGING_UPLOADS:
case FD_QUERY_SHADOW_UPLOADS:
case FD_QUERY_VS_REGS:
case FD_QUERY_FS_REGS:
break;
default:
return NULL;

View file

@ -527,4 +527,13 @@ ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot)
return regid(63, 0);
}
/* calculate register footprint in terms of half-regs (ie. one full
* reg counts as two half-regs).
*/
static inline uint32_t
ir3_shader_halfregs(const struct ir3_shader_variant *v)
{
return (2 * (v->info.max_reg + 1)) + (v->info.max_half_reg + 1);
}
#endif /* IR3_SHADER_H_ */