freedreno: register usage queries
Avg number of (half) regs per draw, so we can corrolate fps dips to shader register usage. Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
parent
8dfc9e22c1
commit
97a9283f5d
8 changed files with 91 additions and 22 deletions
|
|
@ -149,12 +149,17 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
fixup_shader_state(ctx, &emit.key);
|
||||
|
||||
unsigned dirty = ctx->dirty;
|
||||
const struct ir3_shader_variant *vp = fd3_emit_get_vp(&emit);
|
||||
const struct ir3_shader_variant *fp = fd3_emit_get_fp(&emit);
|
||||
|
||||
/* do regular pass first, since that is more likely to fail compiling: */
|
||||
|
||||
if (!(fd3_emit_get_vp(&emit) && fd3_emit_get_fp(&emit)))
|
||||
if (!vp || !fp)
|
||||
return false;
|
||||
|
||||
ctx->stats.vs_regs += ir3_shader_halfregs(vp);
|
||||
ctx->stats.fs_regs += ir3_shader_halfregs(fp);
|
||||
|
||||
emit.key.binning_pass = false;
|
||||
emit.dirty = dirty;
|
||||
draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
|
||||
|
|
|
|||
|
|
@ -135,12 +135,17 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
fixup_shader_state(ctx, &emit.key);
|
||||
|
||||
enum fd_dirty_3d_state dirty = ctx->dirty;
|
||||
const struct ir3_shader_variant *vp = fd4_emit_get_vp(&emit);
|
||||
const struct ir3_shader_variant *fp = fd4_emit_get_fp(&emit);
|
||||
|
||||
/* do regular pass first, since that is more likely to fail compiling: */
|
||||
|
||||
if (!(fd4_emit_get_vp(&emit) && fd4_emit_get_fp(&emit)))
|
||||
if (!vp || !fp)
|
||||
return false;
|
||||
|
||||
ctx->stats.vs_regs += ir3_shader_halfregs(vp);
|
||||
ctx->stats.fs_regs += ir3_shader_halfregs(fp);
|
||||
|
||||
emit.key.binning_pass = false;
|
||||
emit.dirty = dirty;
|
||||
|
||||
|
|
|
|||
|
|
@ -134,6 +134,9 @@ fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
if (!vp || !fp)
|
||||
return false;
|
||||
|
||||
ctx->stats.vs_regs += ir3_shader_halfregs(vp);
|
||||
ctx->stats.fs_regs += ir3_shader_halfregs(fp);
|
||||
|
||||
/* figure out whether we need to disable LRZ write for binning
|
||||
* pass using draw pass's fp:
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -219,6 +219,7 @@ struct fd_context {
|
|||
uint64_t draw_calls;
|
||||
uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw, batch_restore;
|
||||
uint64_t staging_uploads, shadow_uploads;
|
||||
uint64_t vs_regs, fs_regs;
|
||||
} stats;
|
||||
|
||||
/* Current batch.. the rule here is that you can deref ctx->batch
|
||||
|
|
|
|||
|
|
@ -118,29 +118,45 @@ fd_render_condition(struct pipe_context *pctx, struct pipe_query *pq,
|
|||
ctx->cond_mode = mode;
|
||||
}
|
||||
|
||||
#define _Q(_name, _query_type, _type, _result_type) { \
|
||||
.name = _name, \
|
||||
.query_type = _query_type, \
|
||||
.type = PIPE_DRIVER_QUERY_TYPE_ ## _type, \
|
||||
.result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_ ## _result_type, \
|
||||
.group_id = ~(unsigned)0, \
|
||||
}
|
||||
|
||||
#define FQ(_name, _query_type, _type, _result_type) \
|
||||
_Q(_name, FD_QUERY_ ## _query_type, _type, _result_type)
|
||||
|
||||
#define PQ(_name, _query_type, _type, _result_type) \
|
||||
_Q(_name, PIPE_QUERY_ ## _query_type, _type, _result_type)
|
||||
|
||||
static const struct pipe_driver_query_info sw_query_list[] = {
|
||||
FQ("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
|
||||
FQ("batches", BATCH_TOTAL, UINT64, AVERAGE),
|
||||
FQ("batches-sysmem", BATCH_SYSMEM, UINT64, AVERAGE),
|
||||
FQ("batches-gmem", BATCH_GMEM, UINT64, AVERAGE),
|
||||
FQ("batches-nondraw", BATCH_NONDRAW, UINT64, AVERAGE),
|
||||
FQ("restores", BATCH_RESTORE, UINT64, AVERAGE),
|
||||
PQ("prims-emitted", PRIMITIVES_EMITTED, UINT64, AVERAGE),
|
||||
FQ("staging", STAGING_UPLOADS, UINT64, AVERAGE),
|
||||
FQ("shadow", SHADOW_UPLOADS, UINT64, AVERAGE),
|
||||
FQ("vsregs", VS_REGS, FLOAT, AVERAGE),
|
||||
FQ("fsregs", FS_REGS, FLOAT, AVERAGE),
|
||||
};
|
||||
|
||||
static int
|
||||
fd_get_driver_query_info(struct pipe_screen *pscreen,
|
||||
unsigned index, struct pipe_driver_query_info *info)
|
||||
{
|
||||
struct pipe_driver_query_info list[] = {
|
||||
{"draw-calls", FD_QUERY_DRAW_CALLS, {0}},
|
||||
{"batches", FD_QUERY_BATCH_TOTAL, {0}},
|
||||
{"batches-sysmem", FD_QUERY_BATCH_SYSMEM, {0}},
|
||||
{"batches-gmem", FD_QUERY_BATCH_GMEM, {0}},
|
||||
{"batches-nondraw", FD_QUERY_BATCH_NONDRAW, {0}},
|
||||
{"restores", FD_QUERY_BATCH_RESTORE, {0}},
|
||||
{"prims-emitted", PIPE_QUERY_PRIMITIVES_EMITTED, {0}},
|
||||
{"staging", FD_QUERY_STAGING_UPLOADS, {0}},
|
||||
{"shadow", FD_QUERY_SHADOW_UPLOADS, {0}},
|
||||
};
|
||||
|
||||
if (!info)
|
||||
return ARRAY_SIZE(list);
|
||||
return ARRAY_SIZE(sw_query_list);
|
||||
|
||||
if (index >= ARRAY_SIZE(list))
|
||||
if (index >= ARRAY_SIZE(sw_query_list))
|
||||
return 0;
|
||||
|
||||
*info = list[index];
|
||||
*info = sw_query_list[index];
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -64,6 +64,8 @@ fd_query(struct pipe_query *pq)
|
|||
#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 5) /* batches requiring GMEM restore */
|
||||
#define FD_QUERY_STAGING_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 6) /* texture/buffer uploads using staging blit */
|
||||
#define FD_QUERY_SHADOW_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 7) /* texture/buffer uploads that shadowed rsc */
|
||||
#define FD_QUERY_VS_REGS (PIPE_QUERY_DRIVER_SPECIFIC + 8) /* avg # of VS registers (scaled up by 100x) */
|
||||
#define FD_QUERY_FS_REGS (PIPE_QUERY_DRIVER_SPECIFIC + 9) /* avg # of VS registers (scaled up by 100x) */
|
||||
|
||||
void fd_query_screen_init(struct pipe_screen *pscreen);
|
||||
void fd_query_context_init(struct pipe_context *pctx);
|
||||
|
|
|
|||
|
|
@ -73,12 +73,16 @@ read_counter(struct fd_context *ctx, int type)
|
|||
return ctx->stats.staging_uploads;
|
||||
case FD_QUERY_SHADOW_UPLOADS:
|
||||
return ctx->stats.shadow_uploads;
|
||||
case FD_QUERY_VS_REGS:
|
||||
return ctx->stats.vs_regs;
|
||||
case FD_QUERY_FS_REGS:
|
||||
return ctx->stats.fs_regs;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_rate_query(struct fd_query *q)
|
||||
is_time_rate_query(struct fd_query *q)
|
||||
{
|
||||
switch (q->type) {
|
||||
case FD_QUERY_BATCH_TOTAL:
|
||||
|
|
@ -94,14 +98,29 @@ is_rate_query(struct fd_query *q)
|
|||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
is_draw_rate_query(struct fd_query *q)
|
||||
{
|
||||
switch (q->type) {
|
||||
case FD_QUERY_VS_REGS:
|
||||
case FD_QUERY_FS_REGS:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static boolean
|
||||
fd_sw_begin_query(struct fd_context *ctx, struct fd_query *q)
|
||||
{
|
||||
struct fd_sw_query *sq = fd_sw_query(q);
|
||||
sq->begin_value = read_counter(ctx, q->type);
|
||||
if (is_rate_query(q))
|
||||
if (is_time_rate_query(q)) {
|
||||
sq->begin_time = os_time_get();
|
||||
return true;
|
||||
} else if (is_draw_rate_query(q)) {
|
||||
sq->begin_time = ctx->stats.draw_calls;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -109,8 +128,11 @@ fd_sw_end_query(struct fd_context *ctx, struct fd_query *q)
|
|||
{
|
||||
struct fd_sw_query *sq = fd_sw_query(q);
|
||||
sq->end_value = read_counter(ctx, q->type);
|
||||
if (is_rate_query(q))
|
||||
if (is_time_rate_query(q)) {
|
||||
sq->end_time = os_time_get();
|
||||
} else if (is_draw_rate_query(q)) {
|
||||
sq->end_time = ctx->stats.draw_calls;
|
||||
}
|
||||
}
|
||||
|
||||
static boolean
|
||||
|
|
@ -121,10 +143,14 @@ fd_sw_get_query_result(struct fd_context *ctx, struct fd_query *q,
|
|||
|
||||
result->u64 = sq->end_value - sq->begin_value;
|
||||
|
||||
if (is_rate_query(q)) {
|
||||
if (is_time_rate_query(q)) {
|
||||
double fps = (result->u64 * 1000000) /
|
||||
(double)(sq->end_time - sq->begin_time);
|
||||
result->u64 = (uint64_t)fps;
|
||||
} else if (is_draw_rate_query(q)) {
|
||||
double avg = ((double)result->u64) /
|
||||
(double)(sq->end_time - sq->begin_time);
|
||||
result->f = avg;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -154,6 +180,8 @@ fd_sw_create_query(struct fd_context *ctx, unsigned query_type)
|
|||
case FD_QUERY_BATCH_RESTORE:
|
||||
case FD_QUERY_STAGING_UPLOADS:
|
||||
case FD_QUERY_SHADOW_UPLOADS:
|
||||
case FD_QUERY_VS_REGS:
|
||||
case FD_QUERY_FS_REGS:
|
||||
break;
|
||||
default:
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -527,4 +527,13 @@ ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot)
|
|||
return regid(63, 0);
|
||||
}
|
||||
|
||||
/* calculate register footprint in terms of half-regs (ie. one full
|
||||
* reg counts as two half-regs).
|
||||
*/
|
||||
static inline uint32_t
|
||||
ir3_shader_halfregs(const struct ir3_shader_variant *v)
|
||||
{
|
||||
return (2 * (v->info.max_reg + 1)) + (v->info.max_half_reg + 1);
|
||||
}
|
||||
|
||||
#endif /* IR3_SHADER_H_ */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue