d3d12: ARB_query_buffer_object and GL4.4

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26156>
This commit is contained in:
Jesse Natalie 2023-11-10 14:48:07 -08:00 committed by Marge Bot
parent 6384ccd1cd
commit 9ef621ec2e
8 changed files with 339 additions and 90 deletions

View file

@ -192,12 +192,12 @@ GL 4.3, GLSL 4.30 -- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, v
GL_ARB_vertex_attrib_binding DONE (all drivers)
GL 4.4, GLSL 4.40 -- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, virgl, zink, iris, crocus/gen7.5+
GL 4.4, GLSL 4.40 -- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, virgl, zink, iris, crocus/gen7.5+, d3d12
GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers)
GL_ARB_buffer_storage DONE (freedreno, nv50, v3d, vc4, lima, panfrost, asahi, d3d12, softpipe, etnaviv, crocus)
GL_ARB_buffer_storage DONE (freedreno, nv50, v3d, vc4, lima, panfrost, asahi, softpipe, etnaviv, crocus)
GL_ARB_clear_texture DONE (all drivers)
GL_ARB_enhanced_layouts DONE (freedreno/a3xx+, nv50, softpipe, crocus, d3d12)
GL_ARB_enhanced_layouts DONE (freedreno/a3xx+, nv50, softpipe, crocus)
- compile-time constant expressions DONE
- explicit byte offsets for blocks DONE
- forced alignment within blocks DONE
@ -206,9 +206,9 @@ GL 4.4, GLSL 4.40 -- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, v
- input/output block locations DONE
GL_ARB_multi_bind DONE (all drivers)
GL_ARB_query_buffer_object DONE (freedreno/a6xx)
GL_ARB_texture_mirror_clamp_to_edge DONE (freedreno, nv50, softpipe, v3d, panfrost, asahi, crocus, d3d12)
GL_ARB_texture_stencil8 DONE (freedreno, nv50, softpipe, v3d, panfrost, d3d12, asahi)
GL_ARB_vertex_type_10f_11f_11f_rev DONE (freedreno, nv50, softpipe, panfrost, d3d12, asahi, crocus)
GL_ARB_texture_mirror_clamp_to_edge DONE (freedreno, nv50, softpipe, v3d, panfrost, asahi, crocus)
GL_ARB_texture_stencil8 DONE (freedreno, nv50, softpipe, v3d, panfrost, asahi)
GL_ARB_vertex_type_10f_11f_11f_rev DONE (freedreno, nv50, softpipe, panfrost, asahi, crocus)
GL 4.5, GLSL 4.50 -- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, virgl, zink, iris, crocus/gen7.5+

View file

@ -230,6 +230,9 @@ d3d12_start_batch(struct d3d12_context *ctx, struct d3d12_batch *batch)
batch->has_errors = true;
return;
}
if (FAILED(ctx->cmdlist->QueryInterface(IID_PPV_ARGS(&ctx->cmdlist2)))) {
ctx->cmdlist2 = nullptr;
}
if (FAILED(ctx->cmdlist->QueryInterface(IID_PPV_ARGS(&ctx->cmdlist8)))) {
ctx->cmdlist8 = nullptr;
}

View file

@ -212,6 +212,170 @@ get_draw_auto(const nir_shader_compiler_options *options)
return b.shader;
}
static struct nir_shader *
get_query_resolve(const nir_shader_compiler_options *options, const d3d12_compute_transform_key *key)
{
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "QueryResolve");
uint32_t bit_size = key->query_resolve.is_64bit ? 64 : 32;
const struct glsl_type *value_type = glsl_uintN_t_type(bit_size);
assert(!key->query_resolve.is_resolve_in_place ||
(key->query_resolve.is_64bit && key->query_resolve.num_subqueries == 1));
assert(key->query_resolve.num_subqueries == 1 ||
key->query_resolve.pipe_query_type == PIPE_QUERY_PRIMITIVES_GENERATED);
assert(key->query_resolve.num_subqueries <= 3); /* Fourth state var is an output offset */
nir_variable *inputs[3];
for (uint32_t i = 0; i < key->query_resolve.num_subqueries; ++i) {
/* Inputs are always 64-bit */
inputs[i] = nir_variable_create(b.shader, nir_var_mem_ssbo, glsl_array_type(glsl_uint64_t_type(), 0, 8), "input");
inputs[i]->data.binding = i;
}
nir_variable *output = inputs[0];
if (!key->query_resolve.is_resolve_in_place) {
output = nir_variable_create(b.shader, nir_var_mem_ssbo, glsl_array_type(value_type, 0, bit_size / 8), "output");
output->data.binding = key->query_resolve.num_subqueries;
}
/* How many entries in each sub-query is passed via root constants */
nir_variable *state_var = nullptr;
nir_def *state_var_data = d3d12_get_state_var(&b, D3D12_STATE_VAR_TRANSFORM_GENERIC0, "state_var", glsl_uvec4_type(), &state_var);
/* For in-place resolves, we resolve each field of the query. Otherwise, resolve one field into the dest */
nir_variable *results[sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(UINT64)];
uint32_t num_result_values = 1;
if (key->query_resolve.is_resolve_in_place) {
if (key->query_resolve.pipe_query_type == PIPE_QUERY_PIPELINE_STATISTICS)
num_result_values = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(UINT64);
else if (key->query_resolve.pipe_query_type == PIPE_QUERY_SO_STATISTICS)
num_result_values = sizeof(D3D12_QUERY_DATA_SO_STATISTICS) / sizeof(UINT64);
}
uint32_t var_bit_size = key->query_resolve.pipe_query_type == PIPE_QUERY_TIME_ELAPSED ||
key->query_resolve.pipe_query_type == PIPE_QUERY_TIMESTAMP ? 64 : bit_size;
for (uint32_t i = 0; i < num_result_values; ++i) {
results[i] = nir_local_variable_create(b.impl, glsl_uintN_t_type(var_bit_size), "result");
nir_store_var(&b, results[i], nir_imm_intN_t(&b, 0, var_bit_size), 1);
}
/* For each subquery... */
for (uint32_t i = 0; i < key->query_resolve.num_subqueries; ++i) {
nir_def *num_results = nir_channel(&b, state_var_data, i);
uint32_t subquery_index = key->query_resolve.num_subqueries == 1 ?
key->query_resolve.single_subquery_index : i;
uint32_t base_offset = 0;
uint32_t stride = 0;
switch (key->query_resolve.pipe_query_type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
case PIPE_QUERY_TIMESTAMP:
stride = 1;
break;
case PIPE_QUERY_TIME_ELAPSED:
stride = 2;
break;
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_PRIMITIVES_EMITTED:
stride = sizeof(D3D12_QUERY_DATA_SO_STATISTICS) / sizeof(UINT64);
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
if (subquery_index == 0)
stride = sizeof(D3D12_QUERY_DATA_SO_STATISTICS) / sizeof(UINT64);
else
stride = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(UINT64);
if (!key->query_resolve.is_resolve_in_place) {
if (subquery_index == 1)
base_offset = offsetof(D3D12_QUERY_DATA_PIPELINE_STATISTICS, GSPrimitives) / sizeof(UINT64);
else if (subquery_index == 2)
base_offset = offsetof(D3D12_QUERY_DATA_PIPELINE_STATISTICS, IAPrimitives) / sizeof(UINT64);
}
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
stride = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(UINT64);
break;
default:
unreachable("Unhandled query resolve");
}
if (!key->query_resolve.is_resolve_in_place && key->query_resolve.num_subqueries == 1)
base_offset = key->query_resolve.single_result_field_offset;
nir_def *base_array_index = nir_imm_int(&b, base_offset);
/* For each query result in this subquery... */
nir_variable *loop_counter = nir_local_variable_create(b.impl, glsl_uint_type(), "loop_counter");
nir_store_var(&b, loop_counter, nir_imm_int(&b, 0), 1);
nir_loop *loop = nir_push_loop(&b);
nir_def *loop_counter_value = nir_load_var(&b, loop_counter);
nir_if *nif = nir_push_if(&b, nir_ieq(&b, loop_counter_value, num_results));
nir_jump(&b, nir_jump_break);
nir_pop_if(&b, nif);
/* For each field in the query result, accumulate */
nir_def *array_index = nir_iadd(&b, nir_imul_imm(&b, loop_counter_value, stride), base_array_index);
for (uint32_t j = 0; j < num_result_values; ++j) {
nir_def *new_value;
if (key->query_resolve.pipe_query_type == PIPE_QUERY_TIME_ELAPSED) {
assert(j == 0 && i == 0);
nir_def *start = nir_load_ssbo(&b, 1, 64, nir_imm_int(&b, i), nir_imul_imm(&b, array_index, 8));
nir_def *end = nir_load_ssbo(&b, 1, 64, nir_imm_int(&b, i), nir_imul_imm(&b, nir_iadd_imm(&b, array_index, 1), 8));
new_value = nir_isub(&b, end, start);
} else {
new_value = nir_u2uN(&b, nir_load_ssbo(&b, 1, 64, nir_imm_int(&b, i), nir_imul_imm(&b, nir_iadd_imm(&b, array_index, j), 8)), var_bit_size);
}
nir_store_var(&b, results[j], nir_iadd(&b, nir_load_var(&b, results[j]), new_value), 1);
}
nir_store_var(&b, loop_counter, nir_iadd_imm(&b, loop_counter_value, 1), 1);
nir_pop_loop(&b, loop);
}
/* Results are accumulated, now store the final values */
nir_def *output_base_index = nir_channel(&b, state_var_data, 3);
for (uint32_t i = 0; i < num_result_values; ++i) {
/* When resolving in-place, resolve each field, otherwise just write the one result */
uint32_t field_offset = key->query_resolve.is_resolve_in_place ?
i : key->query_resolve.single_result_field_offset;
/* When resolving time elapsed in-place, write [0, time], as the only special case */
if (key->query_resolve.is_resolve_in_place &&
key->query_resolve.pipe_query_type == PIPE_QUERY_TIME_ELAPSED) {
nir_store_ssbo(&b, nir_imm_int64(&b, 0), nir_imm_int(&b, output->data.binding),
nir_imul_imm(&b, output_base_index, bit_size / 8), 1, (gl_access_qualifier)0, bit_size / 8, 0);
field_offset++;
}
nir_def *result_val = nir_load_var(&b, results[i]);
if (!key->query_resolve.is_resolve_in_place &&
(key->query_resolve.pipe_query_type == PIPE_QUERY_TIME_ELAPSED ||
key->query_resolve.pipe_query_type == PIPE_QUERY_TIMESTAMP)) {
result_val = nir_f2u64(&b, nir_fmul_imm(&b, nir_u2f64(&b, result_val), key->query_resolve.timestamp_multiplier));
if (!key->query_resolve.is_64bit) {
nir_alu_type rounding_type = key->query_resolve.is_signed ? nir_type_int : nir_type_uint;
nir_alu_type src_round = (nir_alu_type)(rounding_type | 64);
nir_alu_type dst_round = (nir_alu_type)(rounding_type | bit_size);
result_val = nir_convert_alu_types(&b, bit_size, result_val, src_round, dst_round, nir_rounding_mode_undef, true);
}
}
nir_store_ssbo(&b, result_val, nir_imm_int(&b, output->data.binding),
nir_imul_imm(&b, nir_iadd_imm(&b, output_base_index, field_offset), bit_size / 8),
1, (gl_access_qualifier)0, bit_size / 8, 0);
}
nir_validate_shader(b.shader, "creation");
b.shader->info.num_ssbos = key->query_resolve.num_subqueries + !key->query_resolve.is_resolve_in_place;
b.shader->info.num_ubos = 0;
NIR_PASS_V(b.shader, nir_lower_convert_alu_types, NULL);
return b.shader;
}
static struct nir_shader *
create_compute_transform(const nir_shader_compiler_options *options, const d3d12_compute_transform_key *key)
{
@ -224,6 +388,8 @@ create_compute_transform(const nir_shader_compiler_options *options, const d3d12
return get_fake_so_buffer_vertex_count(options);
case d3d12_compute_transform_type::draw_auto:
return get_draw_auto(options);
case d3d12_compute_transform_type::query_resolve:
return get_query_resolve(options, key);
default:
unreachable("Invalid transform");
}

View file

@ -45,6 +45,8 @@ enum class d3d12_compute_transform_type
fake_so_buffer_vertex_count,
/* Append a buffer filled size with (vertex count, 1, 0, 0) */
draw_auto,
/* Accumulate queries together and write a 32-bit or 64-bit result */
query_resolve,
max,
};
@ -67,6 +69,23 @@ struct d3d12_compute_transform_key
uint16_t size;
} ranges[PIPE_MAX_SO_OUTPUTS];
} fake_so_buffer_copy_back;
struct {
/* true means the accumulation should be done as uint64, else uint32. */
uint8_t is_64bit : 1;
/* true means output is written where input[0] was, else output is a separate buffer.
* true also means all fields are accumulated, else single_result_field_offset determines
* which field is resolved. Implies num_subqueries == 1. */
uint8_t is_resolve_in_place : 1;
/* Indicates how many subqueries to accumulate together into a final result. When
* set to 1, single_subquery_index determines where the data comes from. */
uint8_t num_subqueries : 2;
uint8_t pipe_query_type : 4;
uint8_t single_subquery_index : 2;
uint8_t single_result_field_offset : 4;
uint8_t is_signed : 1;
double timestamp_multiplier;
} query_resolve;
};
};
@ -83,7 +102,7 @@ struct d3d12_compute_transform_save_restore
{
struct d3d12_shader_selector *cs;
struct pipe_constant_buffer cbuf0;
struct pipe_shader_buffer ssbos[2];
struct pipe_shader_buffer ssbos[4];
};
void

View file

@ -98,6 +98,8 @@ d3d12_context_destroy(struct pipe_context *pctx)
for (unsigned i = 0; i < ARRAY_SIZE(ctx->batches); ++i)
d3d12_destroy_batch(ctx, &ctx->batches[i]);
ctx->cmdlist->Release();
if (ctx->cmdlist2)
ctx->cmdlist2->Release();
if (ctx->cmdlist8)
ctx->cmdlist8->Release();
d3d12_descriptor_pool_free(ctx->sampler_pool);

View file

@ -257,6 +257,7 @@ struct d3d12_context {
uint64_t submit_id;
ID3D12GraphicsCommandList *cmdlist;
ID3D12GraphicsCommandList2 *cmdlist2;
ID3D12GraphicsCommandList8 *cmdlist8;
ID3D12GraphicsCommandList *state_fixup_cmdlist;

View file

@ -23,6 +23,7 @@
#include "d3d12_query.h"
#include "d3d12_compiler.h"
#include "d3d12_compute_transforms.h"
#include "d3d12_context.h"
#include "d3d12_resource.h"
#include "d3d12_screen.h"
@ -184,9 +185,9 @@ d3d12_release_query(struct pipe_context *pctx,
}
static bool
accumulate_subresult(struct d3d12_context *ctx, struct d3d12_query *q_parent,
unsigned sub_query,
union pipe_query_result *result, bool write)
accumulate_subresult_cpu(struct d3d12_context *ctx, struct d3d12_query *q_parent,
unsigned sub_query,
union pipe_query_result *result)
{
struct pipe_transfer *transfer = NULL;
struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
@ -194,8 +195,6 @@ accumulate_subresult(struct d3d12_context *ctx, struct d3d12_query *q_parent,
unsigned access = PIPE_MAP_READ;
void *results;
if (write)
access |= PIPE_MAP_WRITE;
access |= PIPE_MAP_UNSYNCHRONIZED;
results = pipe_buffer_map_range(&ctx->base, q->buffer, q->buffer_offset,
@ -256,32 +255,6 @@ accumulate_subresult(struct d3d12_context *ctx, struct d3d12_query *q_parent,
}
}
if (write) {
if (q->d3d12qtype == D3D12_QUERY_TYPE_PIPELINE_STATISTICS) {
results_stats[0].IAVertices = result->pipeline_statistics.ia_vertices;
results_stats[0].IAPrimitives = result->pipeline_statistics.ia_primitives;
results_stats[0].VSInvocations = result->pipeline_statistics.vs_invocations;
results_stats[0].GSInvocations = result->pipeline_statistics.gs_invocations;
results_stats[0].GSPrimitives = result->pipeline_statistics.gs_primitives;
results_stats[0].CInvocations = result->pipeline_statistics.c_invocations;
results_stats[0].CPrimitives = result->pipeline_statistics.c_primitives;
results_stats[0].PSInvocations = result->pipeline_statistics.ps_invocations;
results_stats[0].HSInvocations = result->pipeline_statistics.hs_invocations;
results_stats[0].DSInvocations = result->pipeline_statistics.ds_invocations;
results_stats[0].CSInvocations = result->pipeline_statistics.cs_invocations;
} else if (d3d12_query_heap_type(q_parent->type, sub_query) == D3D12_QUERY_HEAP_TYPE_SO_STATISTICS) {
results_so[0].NumPrimitivesWritten = result->so_statistics.num_primitives_written;
results_so[0].PrimitivesStorageNeeded = result->so_statistics.primitives_storage_needed;
} else {
if (unlikely(q->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP)) {
results_u64[0] = 0;
results_u64[1] = result->u64;
} else {
results_u64[0] = result->u64;
}
}
}
pipe_buffer_unmap(&ctx->base, transfer);
if (q->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP)
@ -291,33 +264,33 @@ accumulate_subresult(struct d3d12_context *ctx, struct d3d12_query *q_parent,
}
static bool
accumulate_result(struct d3d12_context *ctx, struct d3d12_query *q,
union pipe_query_result *result, bool write)
accumulate_result_cpu(struct d3d12_context *ctx, struct d3d12_query *q,
union pipe_query_result *result)
{
union pipe_query_result local_result;
switch (q->type) {
case PIPE_QUERY_PRIMITIVES_GENERATED:
if (!accumulate_subresult(ctx, q, 0, &local_result, write))
if (!accumulate_subresult_cpu(ctx, q, 0, &local_result))
return false;
result->u64 = local_result.so_statistics.primitives_storage_needed;
if (!accumulate_subresult(ctx, q, 1, &local_result, write))
if (!accumulate_subresult_cpu(ctx, q, 1, &local_result))
return false;
result->u64 += local_result.pipeline_statistics.gs_primitives;
if (!accumulate_subresult(ctx, q, 2, &local_result, write))
if (!accumulate_subresult_cpu(ctx, q, 2, &local_result))
return false;
result->u64 += local_result.pipeline_statistics.ia_primitives;
return true;
case PIPE_QUERY_PRIMITIVES_EMITTED:
if (!accumulate_subresult(ctx, q, 0, &local_result, write))
if (!accumulate_subresult_cpu(ctx, q, 0, &local_result))
return false;
result->u64 = local_result.so_statistics.num_primitives_written;
return true;
default:
assert(num_sub_queries(q->type) == 1);
return accumulate_subresult(ctx, q, 0, result, write);
return accumulate_subresult_cpu(ctx, q, 0, result);
}
}
@ -362,21 +335,99 @@ query_ensure_ready(struct d3d12_screen* screen, struct d3d12_context* ctx, struc
return true;
}
static void
accumulate_subresult_gpu(struct d3d12_context *ctx, struct d3d12_query *q_parent,
unsigned sub_query)
{
d3d12_compute_transform_save_restore save;
d3d12_save_compute_transform_state(ctx, &save);
d3d12_compute_transform_key key;
memset(&key, 0, sizeof(key));
key.type = d3d12_compute_transform_type::query_resolve;
key.query_resolve.is_64bit = true;
key.query_resolve.is_resolve_in_place = true;
key.query_resolve.num_subqueries = 1;
key.query_resolve.pipe_query_type = q_parent->type;
key.query_resolve.single_subquery_index = sub_query;
key.query_resolve.is_signed = false;
key.query_resolve.timestamp_multiplier = 1.0;
ctx->base.bind_compute_state(&ctx->base, d3d12_get_compute_transform(ctx, &key));
ctx->transform_state_vars[0] = q_parent->subqueries[sub_query].curr_query;
ctx->transform_state_vars[1] = 0;
ctx->transform_state_vars[2] = 0;
ctx->transform_state_vars[3] = 0;
pipe_shader_buffer new_cs_ssbos[1];
new_cs_ssbos[0].buffer = q_parent->subqueries[sub_query].buffer;
new_cs_ssbos[0].buffer_offset = q_parent->subqueries[sub_query].buffer_offset;
new_cs_ssbos[0].buffer_size = q_parent->subqueries[sub_query].query_size * q_parent->subqueries[sub_query].num_queries;
ctx->base.set_shader_buffers(&ctx->base, PIPE_SHADER_COMPUTE, 0, 1, new_cs_ssbos, 1);
pipe_grid_info grid = {};
grid.block[0] = grid.block[1] = grid.block[2] = 1;
grid.grid[0] = grid.grid[1] = grid.grid[2] = 1;
ctx->base.launch_grid(&ctx->base, &grid);
d3d12_restore_compute_transform_state(ctx, &save);
}
static void
accumulate_result_gpu(struct d3d12_context *ctx, struct d3d12_query *q,
struct pipe_resource *dst, uint32_t dst_offset,
int index, enum pipe_query_value_type result_type)
{
d3d12_compute_transform_save_restore save;
d3d12_save_compute_transform_state(ctx, &save);
d3d12_compute_transform_key key;
memset(&key, 0, sizeof(key));
key.type = d3d12_compute_transform_type::query_resolve;
key.query_resolve.is_64bit = result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64;
key.query_resolve.is_resolve_in_place = false;
key.query_resolve.num_subqueries = num_sub_queries(q->type);
key.query_resolve.pipe_query_type = q->type;
key.query_resolve.single_result_field_offset = index;
key.query_resolve.is_signed = result_type == PIPE_QUERY_TYPE_I32 || result_type == PIPE_QUERY_TYPE_I64;
key.query_resolve.timestamp_multiplier = d3d12_screen(ctx->base.screen)->timestamp_multiplier;
ctx->base.bind_compute_state(&ctx->base, d3d12_get_compute_transform(ctx, &key));
pipe_shader_buffer new_cs_ssbos[4];
uint32_t num_ssbos = 0;
for (uint32_t i = 0; i < key.query_resolve.num_subqueries; ++i) {
ctx->transform_state_vars[i] = q->subqueries[i].curr_query;
new_cs_ssbos[num_ssbos].buffer = q->subqueries[i].buffer;
new_cs_ssbos[num_ssbos].buffer_offset = q->subqueries[i].buffer_offset;
new_cs_ssbos[num_ssbos].buffer_size = q->subqueries[i].query_size * q->subqueries[i].num_queries;
num_ssbos++;
}
assert(dst_offset % (key.query_resolve.is_64bit ? 8 : 4) == 0);
ctx->transform_state_vars[3] = dst_offset / (key.query_resolve.is_64bit ? 8 : 4);
new_cs_ssbos[num_ssbos].buffer = dst;
new_cs_ssbos[num_ssbos].buffer_offset = 0;
new_cs_ssbos[num_ssbos].buffer_size = dst->width0;
num_ssbos++;
ctx->base.set_shader_buffers(&ctx->base, PIPE_SHADER_COMPUTE, 0, num_ssbos, new_cs_ssbos, 1 << (num_ssbos - 1));
pipe_grid_info grid = {};
grid.block[0] = grid.block[1] = grid.block[2] = 1;
grid.grid[0] = grid.grid[1] = grid.grid[2] = 1;
ctx->base.launch_grid(&ctx->base, &grid);
d3d12_restore_compute_transform_state(ctx, &save);
}
static void
begin_subquery(struct d3d12_context *ctx, struct d3d12_query *q_parent, unsigned sub_query)
{
struct d3d12_query_impl *q = &q_parent->subqueries[sub_query];
if (q->curr_query == q->num_queries) {
union pipe_query_result result;
query_ensure_ready(d3d12_screen(ctx->base.screen), ctx, q_parent, false);
d3d12_foreach_submitted_batch(ctx, old_batch) {
if (old_batch->fence && old_batch->fence->value <= q_parent->fence_value)
d3d12_reset_batch(ctx, old_batch, OS_TIMEOUT_INFINITE);
}
/* Accumulate current results and store in first slot */
accumulate_subresult(ctx, q_parent, sub_query, &result, true);
accumulate_subresult_gpu(ctx, q_parent, sub_query);
q->curr_query = 1;
}
@ -412,18 +463,9 @@ begin_timer_query(struct d3d12_context *ctx, struct d3d12_query *q_parent, bool
q->curr_query = 0;
query_index = 0;
} else if (query_index == q->num_queries) {
union pipe_query_result result;
/* Accumulate current results and store in first slot */
query_ensure_ready(d3d12_screen(ctx->base.screen), ctx, q_parent, false);
d3d12_foreach_submitted_batch(ctx, old_batch) {
if (old_batch->fence && old_batch->fence->value <= q_parent->fence_value)
d3d12_reset_batch(ctx, old_batch, OS_TIMEOUT_INFINITE);
}
accumulate_subresult(ctx, q_parent, 0, &result, true);
q->curr_query = 2;
accumulate_subresult_gpu(ctx, q_parent, 0);
q->curr_query = 1;
}
ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, query_index);
@ -530,7 +572,39 @@ d3d12_get_query_result(struct pipe_context *pctx,
if (!query_ensure_ready(screen, ctx, query, wait))
return false;
return accumulate_result(ctx, query, result, false);
return accumulate_result_cpu(ctx, query, result);
}
static void
d3d12_get_query_result_resource(struct pipe_context *pctx,
struct pipe_query *q,
enum pipe_query_flags flags,
enum pipe_query_value_type result_type,
int index,
struct pipe_resource *resource,
unsigned offset)
{
struct d3d12_context *ctx = d3d12_context(pctx);
if (index == -1) {
/* Write the "available" bit, which is always true */
struct d3d12_resource *res = d3d12_resource(resource);
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_TRANSITION_FLAG_NONE);
d3d12_apply_resource_states(ctx, false);
D3D12_GPU_VIRTUAL_ADDRESS gpuva_base = d3d12_resource_gpu_virtual_address(res) + offset;
D3D12_WRITEBUFFERIMMEDIATE_PARAMETER params[2] = {
{ gpuva_base, 1 },
{ gpuva_base + sizeof(uint32_t), 0 },
};
D3D12_WRITEBUFFERIMMEDIATE_MODE modes[2] = { D3D12_WRITEBUFFERIMMEDIATE_MODE_DEFAULT, D3D12_WRITEBUFFERIMMEDIATE_MODE_DEFAULT };
ctx->cmdlist8->WriteBufferImmediate(result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64 ? 2 : 1,
params, modes);
return;
}
struct d3d12_query *query = (struct d3d12_query *)q;
accumulate_result_gpu(ctx, query, resource, offset, index, result_type);
}
void
@ -598,28 +672,7 @@ d3d12_render_condition(struct pipe_context *pctx,
query->predicate = d3d12_resource(pipe_buffer_create(pctx->screen, 0,
PIPE_USAGE_DEFAULT, sizeof(uint64_t)));
if (mode == PIPE_RENDER_COND_WAIT) {
query_ensure_ready(d3d12_screen(ctx->base.screen), ctx, query, false);
d3d12_foreach_submitted_batch(ctx, old_batch) {
if (old_batch->fence && old_batch->fence->value <= query->fence_value)
d3d12_reset_batch(ctx, old_batch, OS_TIMEOUT_INFINITE);
}
union pipe_query_result result;
accumulate_result(ctx, (d3d12_query *)pquery, &result, true);
}
struct d3d12_resource *res = (struct d3d12_resource *)query->subqueries[0].buffer;
uint64_t source_offset = 0;
ID3D12Resource *source = d3d12_resource_underlying(res, &source_offset);
source_offset += query->subqueries[0].buffer_offset;
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS);
d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_TRANSITION_FLAG_NONE);
d3d12_apply_resource_states(ctx, false);
ctx->cmdlist->CopyBufferRegion(d3d12_resource_resource(query->predicate), 0,
source, source_offset,
sizeof(uint64_t));
accumulate_result_gpu(ctx, query, &query->predicate->base.b, 0, 0, PIPE_QUERY_TYPE_U64);
d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_PREDICATION, D3D12_TRANSITION_FLAG_NONE);
d3d12_apply_resource_states(ctx, false);
@ -656,6 +709,7 @@ d3d12_context_query_init(struct pipe_context *pctx)
pctx->begin_query = d3d12_begin_query;
pctx->end_query = d3d12_end_query;
pctx->get_query_result = d3d12_get_query_result;
pctx->get_query_result_resource = d3d12_get_query_result_resource;
pctx->set_active_query_state = d3d12_set_active_query_state;
pctx->render_condition = d3d12_render_condition;
}

View file

@ -193,9 +193,9 @@ d3d12_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 1;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return 430;
return 440;
case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
return 430;
return 440;
case PIPE_CAP_ESSL_FEATURE_LEVEL:
return 310;
@ -335,8 +335,12 @@ d3d12_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_SHADER_ARRAY_COMPONENTS:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
case PIPE_CAP_QUERY_TIME_ELAPSED:
return 1;
case PIPE_CAP_QUERY_BUFFER_OBJECT:
return (screen->opts3.WriteBufferImmediateSupportFlags & D3D12_COMMAND_LIST_SUPPORT_FLAG_DIRECT) != 0;
case PIPE_CAP_MAX_VERTEX_STREAMS:
return D3D12_SO_BUFFER_SLOT_COUNT;