ir3: lower 64b registers

After all int64/double lowerings, there might still be 64b registers
left which ir3 currently doesn't handle. This only happens in a small
number of Piglit tests where those registers (or the variables they come
from) did not get DCE'd.

This patch handles 64b registers in ir3 by adding a NIR pass that does
the following:
 - @decl_reg -> split in two 32b ones
 - @store_reg -> unpack_64_2x32_split_x/y and two separate stores
 - @load_reg -> two separate loads and pack_64_2x32_split

After this pass, the 64b vecs used for the original loads/stores are
still present and are also not handled yet by ir3. This patch removes
them by running nir_lower_alu_to_scalar and nir_copy_prop.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26175>
This commit is contained in:
Job Noorman 2023-11-10 11:57:34 +01:00 committed by Marge Bot
parent 6e7a61df4c
commit 286caa5080
5 changed files with 113 additions and 12 deletions

View file

@ -106,16 +106,10 @@ spec@arb_shader_image_load_store@qualifiers@r8/strict layout qualifiers/permissi
# ir3_nir_lower_tess.c:251: lower_block_to_explicit_output: Assertion `util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1)' failed.
spec@arb_tessellation_shader@execution@tcs-input-read-mat,Crash
# Some 64b not getting lowered to 32b:
spec@arb_tessellation_shader@execution@variable-indexing@vs-output-array-dvec4-index-wr-before-tcs,Crash
spec@arb_texture_rectangle@1-1-linear-texture,Fail
spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match,Fail
# fails unrelated to GL_ARB_enhanced_layouts
spec@arb_enhanced_layouts@execution@component-layout@vs-fs-array-dvec3,Crash
# fails on gen1 (a618/a630) with both fd and zink, but passes on gen4..
# maybe gen1 sqe doesn't handle the count==0 case?
spec@arb_indirect_parameters@tf-count-arrays,Fail

View file

@ -109,16 +109,10 @@ spec@arb_shader_image_load_store@qualifiers@r8/strict layout qualifiers/permissi
# ir3_nir_lower_tess.c:251: lower_block_to_explicit_output: Assertion `util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1)' failed.
spec@arb_tessellation_shader@execution@tcs-input-read-mat,Crash
# Some 64b not getting lowered to 32b:
spec@arb_tessellation_shader@execution@variable-indexing@vs-output-array-dvec4-index-wr-before-tcs,Crash
spec@arb_texture_rectangle@1-1-linear-texture,Fail
spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match,Fail
# fails unrelated to GL_ARB_enhanced_layouts
spec@arb_enhanced_layouts@execution@component-layout@vs-fs-array-dvec3,Crash
# fails on gen1 (a618/a630) with both fd and zink, but passes on gen4..
# maybe gen1 sqe doesn't handle the count==0 case?
spec@arb_indirect_parameters@tf-count-arrays,Fail

View file

@ -91,6 +91,21 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
bool needs_late_alg = false;
NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1);
if (progress) {
bool regs_progress = false;
/* Split 64b registers into two 32b ones. */
NIR_PASS(regs_progress, ctx->s, ir3_nir_lower_64b_regs);
if (regs_progress) {
/* After splitting registers, we might still have some 64b vecs. Run
* some passes to get rid of them.
*/
NIR_PASS_V(ctx->s, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS_V(ctx->s, nir_copy_prop);
}
}
/* we could need cleanup after lower_locals_to_regs */
while (progress) {
progress = false;

View file

@ -65,6 +65,7 @@ void ir3_nir_lower_gs(nir_shader *shader);
bool ir3_nir_lower_64b_intrinsics(nir_shader *shader);
bool ir3_nir_lower_64b_undef(nir_shader *shader);
bool ir3_nir_lower_64b_global(nir_shader *shader);
bool ir3_nir_lower_64b_regs(nir_shader *shader);
void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
void ir3_nir_lower_io_to_temporaries(nir_shader *s);

View file

@ -299,3 +299,100 @@ ir3_nir_lower_64b_global(nir_shader *shader)
shader, lower_64b_global_filter,
lower_64b_global, NULL);
}
/*
* Lowering for 64b registers:
* - @decl_reg -> split in two 32b ones
* - @store_reg -> unpack_64_2x32_split_x/y and two separate stores
* - @load_reg -> two separate loads and pack_64_2x32_split
*/
static void
lower_64b_reg(nir_builder *b, nir_intrinsic_instr *reg)
{
unsigned num_components = nir_intrinsic_num_components(reg);
unsigned num_array_elems = nir_intrinsic_num_array_elems(reg);
nir_def *reg_hi = nir_decl_reg(b, num_components, 32, num_array_elems);
nir_def *reg_lo = nir_decl_reg(b, num_components, 32, num_array_elems);
nir_foreach_reg_store_safe (store_reg_src, reg) {
nir_intrinsic_instr *store =
nir_instr_as_intrinsic(nir_src_parent_instr(store_reg_src));
b->cursor = nir_before_instr(&store->instr);
nir_def *packed = store->src[0].ssa;
nir_def *unpacked_lo = nir_unpack_64_2x32_split_x(b, packed);
nir_def *unpacked_hi = nir_unpack_64_2x32_split_y(b, packed);
int base = nir_intrinsic_base(store);
if (store->intrinsic == nir_intrinsic_store_reg) {
nir_build_store_reg(b, unpacked_lo, reg_lo, .base = base);
nir_build_store_reg(b, unpacked_hi, reg_hi, .base = base);
} else {
assert(store->intrinsic == nir_intrinsic_store_reg_indirect);
nir_def *offset = store->src[2].ssa;
nir_store_reg_indirect(b, unpacked_lo, reg_lo, offset, .base = base);
nir_store_reg_indirect(b, unpacked_hi, reg_hi, offset, .base = base);
}
nir_instr_remove(&store->instr);
}
nir_foreach_reg_load_safe (load_reg_src, reg) {
nir_intrinsic_instr *load =
nir_instr_as_intrinsic(nir_src_parent_instr(load_reg_src));
b->cursor = nir_before_instr(&load->instr);
int base = nir_intrinsic_base(load);
nir_def *load_lo, *load_hi;
if (load->intrinsic == nir_intrinsic_load_reg) {
load_lo =
nir_build_load_reg(b, num_components, 32, reg_lo, .base = base);
load_hi =
nir_build_load_reg(b, num_components, 32, reg_hi, .base = base);
} else {
assert(load->intrinsic == nir_intrinsic_load_reg_indirect);
nir_def *offset = load->src[1].ssa;
load_lo = nir_load_reg_indirect(b, num_components, 32, reg_lo, offset,
.base = base);
load_hi = nir_load_reg_indirect(b, num_components, 32, reg_hi, offset,
.base = base);
}
nir_def *packed = nir_pack_64_2x32_split(b, load_lo, load_hi);
nir_def_rewrite_uses(&load->def, packed);
nir_instr_remove(&load->instr);
}
nir_instr_remove(&reg->instr);
}
bool
ir3_nir_lower_64b_regs(nir_shader *shader)
{
bool progress = false;
nir_foreach_function_impl (impl, shader) {
bool impl_progress = false;
nir_builder b = nir_builder_create(impl);
nir_foreach_reg_decl_safe (reg, impl) {
if (nir_intrinsic_bit_size(reg) == 64) {
lower_64b_reg(&b, reg);
impl_progress = true;
}
}
if (impl_progress) {
nir_metadata_preserve(
impl, nir_metadata_block_index | nir_metadata_dominance);
progress = true;
}
}
return progress;
}