radeonsi: add HUD queries for cache flush stats

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2016-12-25 19:48:55 +01:00
parent aac07bb79c
commit 5871ebd7f1
4 changed files with 32 additions and 0 deletions

View file

@ -574,6 +574,9 @@ struct r600_common_context {
unsigned num_vs_flushes;
unsigned num_ps_flushes;
unsigned num_cs_flushes;
unsigned num_fb_cache_flushes;
unsigned num_L2_invalidates;
unsigned num_L2_writebacks;
uint64_t num_alloc_tex_transfer_bytes;
unsigned last_tex_ps_draw_ratio; /* for query */

View file

@ -113,6 +113,15 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
case R600_QUERY_NUM_CS_FLUSHES:
query->begin_result = rctx->num_cs_flushes;
break;
case R600_QUERY_NUM_FB_CACHE_FLUSHES:
query->begin_result = rctx->num_fb_cache_flushes;
break;
case R600_QUERY_NUM_L2_INVALIDATES:
query->begin_result = rctx->num_L2_invalidates;
break;
case R600_QUERY_NUM_L2_WRITEBACKS:
query->begin_result = rctx->num_L2_writebacks;
break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_MAPPED_VRAM:
@ -197,6 +206,15 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
case R600_QUERY_NUM_CS_FLUSHES:
query->end_result = rctx->num_cs_flushes;
break;
case R600_QUERY_NUM_FB_CACHE_FLUSHES:
query->end_result = rctx->num_fb_cache_flushes;
break;
case R600_QUERY_NUM_L2_INVALIDATES:
query->end_result = rctx->num_L2_invalidates;
break;
case R600_QUERY_NUM_L2_WRITEBACKS:
query->end_result = rctx->num_L2_writebacks;
break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_MAPPED_VRAM:
@ -1665,6 +1683,9 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE),
X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE),
X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE),
X("num-fb-cache-flushes", NUM_FB_CACHE_FLUSHES, UINT64, AVERAGE),
X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE),
X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE),
X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),

View file

@ -52,6 +52,9 @@ enum {
R600_QUERY_NUM_VS_FLUSHES,
R600_QUERY_NUM_PS_FLUSHES,
R600_QUERY_NUM_CS_FLUSHES,
R600_QUERY_NUM_FB_CACHE_FLUSHES,
R600_QUERY_NUM_L2_INVALIDATES,
R600_QUERY_NUM_L2_WRITEBACKS,
R600_QUERY_REQUESTED_VRAM,
R600_QUERY_REQUESTED_GTT,
R600_QUERY_MAPPED_VRAM,

View file

@ -735,6 +735,9 @@ void si_emit_cache_flush(struct si_context *sctx)
struct radeon_winsys_cs *cs = rctx->gfx.cs;
uint32_t cp_coher_cntl = 0;
if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER)
sctx->b.num_fb_cache_flushes++;
/* SI has a bug that it always flushes ICACHE and KCACHE if either
* bit is set. An alternative way is to write SQC_CACHES, but that
* doesn't seem to work reliably. Since the bug doesn't affect
@ -852,6 +855,7 @@ void si_emit_cache_flush(struct si_context *sctx)
S_0085F0_TC_ACTION_ENA(1) |
S_0301F0_TC_WB_ACTION_ENA(rctx->chip_class >= VI));
cp_coher_cntl = 0;
sctx->b.num_L2_invalidates++;
} else {
/* L1 invalidation and L2 writeback must be done separately,
* because both operations can't be done together.
@ -867,6 +871,7 @@ void si_emit_cache_flush(struct si_context *sctx)
S_0301F0_TC_WB_ACTION_ENA(1) |
S_0301F0_TC_NC_ACTION_ENA(1));
cp_coher_cntl = 0;
sctx->b.num_L2_writebacks++;
}
if (rctx->flags & SI_CONTEXT_INV_VMEM_L1) {
/* Invalidate per-CU VMEM L1. */