asahi: Implement timer queries
Everything but the uapi piece. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26056>
This commit is contained in:
parent
bc8232c4a2
commit
49225db140
7 changed files with 147 additions and 5 deletions
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include "agx_device.h"
|
||||
#include <inttypes.h>
|
||||
#include "util/timespec.h"
|
||||
#include "agx_bo.h"
|
||||
#include "agx_compile.h"
|
||||
#include "decode.h"
|
||||
|
|
@ -19,6 +20,7 @@
|
|||
#include "util/log.h"
|
||||
#include "util/os_file.h"
|
||||
#include "util/os_mman.h"
|
||||
#include "util/os_time.h"
|
||||
#include "util/simple_mtx.h"
|
||||
#include "git_sha1.h"
|
||||
#include "nir_serialize.h"
|
||||
|
|
@ -456,3 +458,20 @@ agx_debug_fault(struct agx_device *dev, uint64_t addr)
|
|||
|
||||
pthread_mutex_unlock(&dev->bo_map_lock);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
agx_get_gpu_timestamp(struct agx_device *dev)
|
||||
{
|
||||
#if DETECT_ARCH_ARCH64
|
||||
uint64_t ret;
|
||||
__asm__ volatile("mrs \t%0, cntvct_el0" : "=r"(ret));
|
||||
return ret;
|
||||
#elif DETECT_ARCH_X86 || DETECT_ARCH_X86_64
|
||||
/* Maps to the above when run under FEX without thunking */
|
||||
uint32_t high, low;
|
||||
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
|
||||
return (uint64_t)low | ((uint64_t)high << 32);
|
||||
#else
|
||||
unreachable("Kernel support for fetching timestamps pending");
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include "util/simple_mtx.h"
|
||||
#include "util/sparse_array.h"
|
||||
#include "util/timespec.h"
|
||||
#include "util/vma.h"
|
||||
#include "agx_bo.h"
|
||||
#include "agx_formats.h"
|
||||
|
|
@ -50,6 +51,7 @@ struct drm_asahi_params_global {
|
|||
uint32_t gpu_generation;
|
||||
uint32_t gpu_variant;
|
||||
uint32_t num_dies;
|
||||
uint32_t timer_frequency_hz;
|
||||
};
|
||||
|
||||
/* How many power-of-two levels in the BO cache do we want? 2^14 minimum chosen
|
||||
|
|
@ -145,4 +147,12 @@ int agx_export_sync_file(struct agx_device *dev, struct agx_bo *bo);
|
|||
|
||||
void agx_debug_fault(struct agx_device *dev, uint64_t addr);
|
||||
|
||||
uint64_t agx_get_gpu_timestamp(struct agx_device *dev);
|
||||
|
||||
static inline uint64_t
|
||||
agx_gpu_time_to_ns(struct agx_device *dev, uint64_t gpu_time)
|
||||
{
|
||||
return (gpu_time * NSEC_PER_SEC) / dev->params.timer_frequency_hz;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <xf86drm.h>
|
||||
#include "asahi/lib/decode.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "agx_state.h"
|
||||
|
||||
#define foreach_active(ctx, idx) \
|
||||
|
|
@ -125,6 +126,7 @@ agx_batch_init(struct agx_context *ctx,
|
|||
util_dynarray_init(&batch->depth_bias, ctx);
|
||||
util_dynarray_init(&batch->occlusion_queries, ctx);
|
||||
util_dynarray_init(&batch->nonocclusion_queries, ctx);
|
||||
util_dynarray_init(&batch->timestamp_queries, ctx);
|
||||
|
||||
batch->clear = 0;
|
||||
batch->draw = 0;
|
||||
|
|
@ -167,7 +169,9 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset)
|
|||
|
||||
assert(ctx->batch != batch);
|
||||
|
||||
agx_finish_batch_queries(batch);
|
||||
uint64_t begin_ts = ~0, end_ts = 0;
|
||||
/* TODO: UAPI pending */
|
||||
agx_finish_batch_queries(batch, begin_ts, end_ts);
|
||||
batch->occlusion_buffer.cpu = NULL;
|
||||
batch->occlusion_buffer.gpu = 0;
|
||||
|
||||
|
|
@ -205,6 +209,7 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset)
|
|||
util_dynarray_fini(&batch->depth_bias);
|
||||
util_dynarray_fini(&batch->occlusion_queries);
|
||||
util_dynarray_fini(&batch->nonocclusion_queries);
|
||||
util_dynarray_fini(&batch->timestamp_queries);
|
||||
|
||||
if (!(dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC))) {
|
||||
agx_batch_print_stats(dev, batch);
|
||||
|
|
@ -742,3 +747,25 @@ agx_batch_reset(struct agx_context *ctx, struct agx_batch *batch)
|
|||
|
||||
agx_batch_cleanup(ctx, batch, true);
|
||||
}
|
||||
|
||||
void
|
||||
agx_batch_add_timestamp_query(struct agx_batch *batch, struct agx_query *q)
|
||||
{
|
||||
if (q)
|
||||
util_dynarray_append(&batch->timestamp_queries, struct agx_query *, q);
|
||||
}
|
||||
|
||||
/*
|
||||
* Timestamp queries record the time after all current work is finished,
|
||||
* which we handle as the time after all current batches finish (since we're a
|
||||
* tiler and would rather not split the batch). So add a query to all active
|
||||
* batches.
|
||||
*/
|
||||
void
|
||||
agx_add_timestamp_end_query(struct agx_context *ctx, struct agx_query *q)
|
||||
{
|
||||
unsigned idx;
|
||||
foreach_active(ctx, idx) {
|
||||
agx_batch_add_timestamp_query(&ctx->batches.slots[idx], q);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,8 @@
|
|||
#include "pipe/p_state.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/half_float.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/timespec.h"
|
||||
#include "util/u_drm.h"
|
||||
#include "util/u_gen_mipmap.h"
|
||||
#include "util/u_inlines.h"
|
||||
|
|
@ -1555,6 +1557,8 @@ agx_get_name(struct pipe_screen *pscreen)
|
|||
static int
|
||||
agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
{
|
||||
struct agx_device *dev = agx_device(pscreen);
|
||||
|
||||
switch (param) {
|
||||
case PIPE_CAP_NPOT_TEXTURES:
|
||||
case PIPE_CAP_SHADER_STENCIL_EXPORT:
|
||||
|
|
@ -1580,6 +1584,8 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
return 1;
|
||||
|
||||
case PIPE_CAP_OCCLUSION_QUERY:
|
||||
case PIPE_CAP_QUERY_TIMESTAMP:
|
||||
case PIPE_CAP_QUERY_TIME_ELAPSED:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_PRIMITIVE_RESTART:
|
||||
case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
|
||||
|
|
@ -1587,6 +1593,10 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_NATIVE_FENCE_FD:
|
||||
return true;
|
||||
|
||||
case PIPE_CAP_TIMER_RESOLUTION:
|
||||
/* Timer resolution is the length of a single tick in nanos */
|
||||
return agx_gpu_time_to_ns(dev, 1);
|
||||
|
||||
case PIPE_CAP_SAMPLER_VIEW_TARGET:
|
||||
case PIPE_CAP_TEXTURE_SWIZZLE:
|
||||
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
|
||||
|
|
@ -2146,6 +2156,13 @@ agx_screen_get_fd(struct pipe_screen *pscreen)
|
|||
return agx_device(pscreen)->fd;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
agx_get_timestamp(struct pipe_screen *pscreen)
|
||||
{
|
||||
struct agx_device *dev = agx_device(pscreen);
|
||||
return agx_gpu_time_to_ns(dev, agx_get_gpu_timestamp(dev));
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
agx_screen_create(int fd, struct renderonly *ro,
|
||||
const struct pipe_screen_config *config)
|
||||
|
|
@ -2197,7 +2214,7 @@ agx_screen_create(int fd, struct renderonly *ro,
|
|||
screen->resource_get_handle = agx_resource_get_handle;
|
||||
screen->resource_get_param = agx_resource_get_param;
|
||||
screen->resource_create_with_modifiers = agx_resource_create_with_modifiers;
|
||||
screen->get_timestamp = u_default_get_timestamp;
|
||||
screen->get_timestamp = agx_get_timestamp;
|
||||
screen->fence_reference = agx_fence_reference;
|
||||
screen->fence_finish = agx_fence_finish;
|
||||
screen->fence_get_fd = agx_fence_get_fd;
|
||||
|
|
|
|||
|
|
@ -4,7 +4,10 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include "pipe/p_defines.h"
|
||||
#include "util/u_prim.h"
|
||||
#include "agx_device.h"
|
||||
#include "agx_state.h"
|
||||
#include "pool.h"
|
||||
|
||||
|
|
@ -72,6 +75,16 @@ agx_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
|
|||
ctx->tf_prims_generated = query;
|
||||
break;
|
||||
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
ctx->time_elapsed = query;
|
||||
query->timestamp_begin = UINT64_MAX;
|
||||
query->timestamp_end = 0;
|
||||
return true;
|
||||
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
/* No-op */
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
@ -94,6 +107,7 @@ static bool
|
|||
agx_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
|
||||
{
|
||||
struct agx_context *ctx = agx_context(pctx);
|
||||
struct agx_device *dev = agx_device(pctx->screen);
|
||||
struct agx_query *query = (struct agx_query *)pquery;
|
||||
|
||||
ctx->dirty |= AGX_DIRTY_QUERY;
|
||||
|
|
@ -109,6 +123,18 @@ agx_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
|
|||
return true;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
ctx->tf_prims_generated = NULL;
|
||||
return true;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
ctx->time_elapsed = NULL;
|
||||
return true;
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
/* Timestamp logically written now, set up batches to MAX their finish
|
||||
* time in. If there are no batches, it's just the current time stamp.
|
||||
*/
|
||||
agx_add_timestamp_end_query(ctx, query);
|
||||
|
||||
query->timestamp_end = agx_get_gpu_timestamp(dev);
|
||||
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
@ -121,6 +147,7 @@ agx_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
|
|||
{
|
||||
struct agx_query *query = (struct agx_query *)pquery;
|
||||
struct agx_context *ctx = agx_context(pctx);
|
||||
struct agx_device *dev = agx_device(pctx->screen);
|
||||
|
||||
/* For GPU queries, flush the writer. When the writer is flushed, the GPU
|
||||
* will write the value, and when we wait for the writer, the CPU will read
|
||||
|
|
@ -137,6 +164,11 @@ agx_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
|
|||
struct agx_batch *writer = query->writer;
|
||||
agx_flush_batch_for_reason(ctx, writer, "GPU query");
|
||||
agx_sync_batch_for_reason(ctx, writer, "GPU query");
|
||||
} else if (query->type == PIPE_QUERY_TIMESTAMP ||
|
||||
query->type == PIPE_QUERY_TIME_ELAPSED) {
|
||||
/* TODO: Optimize this... timestamp queries are bonkers on tilers. */
|
||||
agx_flush_all(ctx, "Timestamp query");
|
||||
agx_sync_all(ctx, "Timestamp query");
|
||||
}
|
||||
|
||||
/* After syncing, there is no writer left, so query->value is ready */
|
||||
|
|
@ -154,6 +186,15 @@ agx_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
|
|||
vresult->u64 = query->value;
|
||||
return true;
|
||||
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
vresult->u64 = agx_gpu_time_to_ns(dev, query->timestamp_end);
|
||||
return true;
|
||||
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
vresult->u64 =
|
||||
agx_gpu_time_to_ns(dev, query->timestamp_end - query->timestamp_begin);
|
||||
return true;
|
||||
|
||||
default:
|
||||
unreachable("Other queries not yet supported");
|
||||
}
|
||||
|
|
@ -224,7 +265,8 @@ agx_get_query_address(struct agx_batch *batch, struct agx_query *query)
|
|||
}
|
||||
|
||||
void
|
||||
agx_finish_batch_queries(struct agx_batch *batch)
|
||||
agx_finish_batch_queries(struct agx_batch *batch, uint64_t begin_ts,
|
||||
uint64_t end_ts)
|
||||
{
|
||||
uint64_t *occlusion = (uint64_t *)batch->occlusion_buffer.cpu;
|
||||
|
||||
|
|
@ -272,6 +314,15 @@ agx_finish_batch_queries(struct agx_batch *batch)
|
|||
query->ptr.cpu = NULL;
|
||||
query->ptr.gpu = 0;
|
||||
}
|
||||
|
||||
util_dynarray_foreach(&batch->timestamp_queries, struct agx_query *, it) {
|
||||
struct agx_query *query = *it;
|
||||
if (query == NULL)
|
||||
continue;
|
||||
|
||||
query->timestamp_begin = MIN2(query->timestamp_begin, begin_ts);
|
||||
query->timestamp_end = MAX2(query->timestamp_end, end_ts);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -3743,6 +3743,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
}
|
||||
|
||||
struct agx_batch *batch = agx_get_batch(ctx);
|
||||
agx_batch_add_timestamp_query(batch, ctx->time_elapsed);
|
||||
|
||||
unsigned idx_size = info->index_size;
|
||||
uint64_t ib = 0;
|
||||
size_t ib_extent = 0;
|
||||
|
|
@ -4127,6 +4129,7 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
|||
{
|
||||
struct agx_context *ctx = agx_context(pipe);
|
||||
struct agx_batch *batch = agx_get_compute_batch(ctx);
|
||||
agx_batch_add_timestamp_query(batch, ctx->time_elapsed);
|
||||
|
||||
agx_batch_init_state(batch);
|
||||
|
||||
|
|
|
|||
|
|
@ -322,6 +322,7 @@ struct agx_batch {
|
|||
|
||||
/* Non-occlusion queries */
|
||||
struct util_dynarray nonocclusion_queries;
|
||||
struct util_dynarray timestamp_queries;
|
||||
|
||||
/* Result buffer where the kernel places command execution information */
|
||||
union agx_batch_result *result;
|
||||
|
|
@ -482,6 +483,7 @@ struct agx_context {
|
|||
struct agx_query *occlusion_query;
|
||||
struct agx_query *prims_generated;
|
||||
struct agx_query *tf_prims_generated;
|
||||
struct agx_query *time_elapsed;
|
||||
bool active_queries;
|
||||
|
||||
struct util_debug_callback debug;
|
||||
|
|
@ -625,7 +627,15 @@ struct agx_query {
|
|||
struct agx_ptr ptr;
|
||||
|
||||
/* Accumulator flushed to the CPU */
|
||||
uint64_t value;
|
||||
union {
|
||||
uint64_t value;
|
||||
uint64_t timestamp_end;
|
||||
};
|
||||
|
||||
/* For time elapsed queries, end is in the above union for consistent
|
||||
* handling witn timestamp queries.
|
||||
*/
|
||||
uint64_t timestamp_begin;
|
||||
};
|
||||
|
||||
struct agx_sampler_state {
|
||||
|
|
@ -889,6 +899,10 @@ struct agx_batch *agx_get_compute_batch(struct agx_context *ctx);
|
|||
void agx_batch_reset(struct agx_context *ctx, struct agx_batch *batch);
|
||||
int agx_cleanup_batches(struct agx_context *ctx);
|
||||
|
||||
void agx_batch_add_timestamp_query(struct agx_batch *batch,
|
||||
struct agx_query *q);
|
||||
void agx_add_timestamp_end_query(struct agx_context *ctx, struct agx_query *q);
|
||||
|
||||
/* Blit shaders */
|
||||
void agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
|
||||
bool render_cond);
|
||||
|
|
@ -910,7 +924,8 @@ uint16_t agx_get_oq_index(struct agx_batch *batch, struct agx_query *query);
|
|||
uint64_t agx_get_query_address(struct agx_batch *batch,
|
||||
struct agx_query *query);
|
||||
|
||||
void agx_finish_batch_queries(struct agx_batch *batch);
|
||||
void agx_finish_batch_queries(struct agx_batch *batch, uint64_t begin_ts,
|
||||
uint64_t end_ts);
|
||||
|
||||
bool agx_render_condition_check_inner(struct agx_context *ctx);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue