ir3: lower 64b registers
After all int64/double lowerings, there might still be 64b registers left which ir3 currently doesn't handle. This only happens in a small number of Piglit tests where those registers (or the variables they come from) did not get DCE'd. This patch handles 64b registers in ir3 by adding a NIR pass that does the following: - @decl_reg -> split in two 32b ones - @store_reg -> unpack_64_2x32_split_x/y and two separate stores - @load_reg -> two separate loads and pack_64_2x32_split After this pass, the 64b vecs used for the original loads/stores are still present and are also not handled yet by ir3. This patch removes them by running nir_lower_alu_to_scalar and nir_copy_prop. Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26175>
This commit is contained in:
parent
6e7a61df4c
commit
286caa5080
5 changed files with 113 additions and 12 deletions
|
|
@ -106,16 +106,10 @@ spec@arb_shader_image_load_store@qualifiers@r8/strict layout qualifiers/permissi
|
|||
# ir3_nir_lower_tess.c:251: lower_block_to_explicit_output: Assertion `util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1)' failed.
|
||||
spec@arb_tessellation_shader@execution@tcs-input-read-mat,Crash
|
||||
|
||||
# Some 64b not getting lowered to 32b:
|
||||
spec@arb_tessellation_shader@execution@variable-indexing@vs-output-array-dvec4-index-wr-before-tcs,Crash
|
||||
|
||||
spec@arb_texture_rectangle@1-1-linear-texture,Fail
|
||||
|
||||
spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match,Fail
|
||||
|
||||
# fails unrelated to GL_ARB_enhanced_layouts
|
||||
spec@arb_enhanced_layouts@execution@component-layout@vs-fs-array-dvec3,Crash
|
||||
|
||||
# fails on gen1 (a618/a630) with both fd and zink, but passes on gen4..
|
||||
# maybe gen1 sqe doesn't handle the count==0 case?
|
||||
spec@arb_indirect_parameters@tf-count-arrays,Fail
|
||||
|
|
|
|||
|
|
@ -109,16 +109,10 @@ spec@arb_shader_image_load_store@qualifiers@r8/strict layout qualifiers/permissi
|
|||
# ir3_nir_lower_tess.c:251: lower_block_to_explicit_output: Assertion `util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1)' failed.
|
||||
spec@arb_tessellation_shader@execution@tcs-input-read-mat,Crash
|
||||
|
||||
# Some 64b not getting lowered to 32b:
|
||||
spec@arb_tessellation_shader@execution@variable-indexing@vs-output-array-dvec4-index-wr-before-tcs,Crash
|
||||
|
||||
spec@arb_texture_rectangle@1-1-linear-texture,Fail
|
||||
|
||||
spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match,Fail
|
||||
|
||||
# fails unrelated to GL_ARB_enhanced_layouts
|
||||
spec@arb_enhanced_layouts@execution@component-layout@vs-fs-array-dvec3,Crash
|
||||
|
||||
# fails on gen1 (a618/a630) with both fd and zink, but passes on gen4..
|
||||
# maybe gen1 sqe doesn't handle the count==0 case?
|
||||
spec@arb_indirect_parameters@tf-count-arrays,Fail
|
||||
|
|
|
|||
|
|
@ -91,6 +91,21 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
|
|||
bool needs_late_alg = false;
|
||||
NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1);
|
||||
|
||||
if (progress) {
|
||||
bool regs_progress = false;
|
||||
|
||||
/* Split 64b registers into two 32b ones. */
|
||||
NIR_PASS(regs_progress, ctx->s, ir3_nir_lower_64b_regs);
|
||||
|
||||
if (regs_progress) {
|
||||
/* After splitting registers, we might still have some 64b vecs. Run
|
||||
* some passes to get rid of them.
|
||||
*/
|
||||
NIR_PASS_V(ctx->s, nir_lower_alu_to_scalar, NULL, NULL);
|
||||
NIR_PASS_V(ctx->s, nir_copy_prop);
|
||||
}
|
||||
}
|
||||
|
||||
/* we could need cleanup after lower_locals_to_regs */
|
||||
while (progress) {
|
||||
progress = false;
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ void ir3_nir_lower_gs(nir_shader *shader);
|
|||
bool ir3_nir_lower_64b_intrinsics(nir_shader *shader);
|
||||
bool ir3_nir_lower_64b_undef(nir_shader *shader);
|
||||
bool ir3_nir_lower_64b_global(nir_shader *shader);
|
||||
bool ir3_nir_lower_64b_regs(nir_shader *shader);
|
||||
|
||||
void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
|
||||
void ir3_nir_lower_io_to_temporaries(nir_shader *s);
|
||||
|
|
|
|||
|
|
@ -299,3 +299,100 @@ ir3_nir_lower_64b_global(nir_shader *shader)
|
|||
shader, lower_64b_global_filter,
|
||||
lower_64b_global, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lowering for 64b registers:
|
||||
* - @decl_reg -> split in two 32b ones
|
||||
* - @store_reg -> unpack_64_2x32_split_x/y and two separate stores
|
||||
* - @load_reg -> two separate loads and pack_64_2x32_split
|
||||
*/
|
||||
|
||||
static void
|
||||
lower_64b_reg(nir_builder *b, nir_intrinsic_instr *reg)
|
||||
{
|
||||
unsigned num_components = nir_intrinsic_num_components(reg);
|
||||
unsigned num_array_elems = nir_intrinsic_num_array_elems(reg);
|
||||
|
||||
nir_def *reg_hi = nir_decl_reg(b, num_components, 32, num_array_elems);
|
||||
nir_def *reg_lo = nir_decl_reg(b, num_components, 32, num_array_elems);
|
||||
|
||||
nir_foreach_reg_store_safe (store_reg_src, reg) {
|
||||
nir_intrinsic_instr *store =
|
||||
nir_instr_as_intrinsic(nir_src_parent_instr(store_reg_src));
|
||||
b->cursor = nir_before_instr(&store->instr);
|
||||
|
||||
nir_def *packed = store->src[0].ssa;
|
||||
nir_def *unpacked_lo = nir_unpack_64_2x32_split_x(b, packed);
|
||||
nir_def *unpacked_hi = nir_unpack_64_2x32_split_y(b, packed);
|
||||
int base = nir_intrinsic_base(store);
|
||||
|
||||
if (store->intrinsic == nir_intrinsic_store_reg) {
|
||||
nir_build_store_reg(b, unpacked_lo, reg_lo, .base = base);
|
||||
nir_build_store_reg(b, unpacked_hi, reg_hi, .base = base);
|
||||
} else {
|
||||
assert(store->intrinsic == nir_intrinsic_store_reg_indirect);
|
||||
|
||||
nir_def *offset = store->src[2].ssa;
|
||||
nir_store_reg_indirect(b, unpacked_lo, reg_lo, offset, .base = base);
|
||||
nir_store_reg_indirect(b, unpacked_hi, reg_hi, offset, .base = base);
|
||||
}
|
||||
|
||||
nir_instr_remove(&store->instr);
|
||||
}
|
||||
|
||||
nir_foreach_reg_load_safe (load_reg_src, reg) {
|
||||
nir_intrinsic_instr *load =
|
||||
nir_instr_as_intrinsic(nir_src_parent_instr(load_reg_src));
|
||||
b->cursor = nir_before_instr(&load->instr);
|
||||
|
||||
int base = nir_intrinsic_base(load);
|
||||
nir_def *load_lo, *load_hi;
|
||||
|
||||
if (load->intrinsic == nir_intrinsic_load_reg) {
|
||||
load_lo =
|
||||
nir_build_load_reg(b, num_components, 32, reg_lo, .base = base);
|
||||
load_hi =
|
||||
nir_build_load_reg(b, num_components, 32, reg_hi, .base = base);
|
||||
} else {
|
||||
assert(load->intrinsic == nir_intrinsic_load_reg_indirect);
|
||||
|
||||
nir_def *offset = load->src[1].ssa;
|
||||
load_lo = nir_load_reg_indirect(b, num_components, 32, reg_lo, offset,
|
||||
.base = base);
|
||||
load_hi = nir_load_reg_indirect(b, num_components, 32, reg_hi, offset,
|
||||
.base = base);
|
||||
}
|
||||
|
||||
nir_def *packed = nir_pack_64_2x32_split(b, load_lo, load_hi);
|
||||
nir_def_rewrite_uses(&load->def, packed);
|
||||
nir_instr_remove(&load->instr);
|
||||
}
|
||||
|
||||
nir_instr_remove(®->instr);
|
||||
}
|
||||
|
||||
bool
|
||||
ir3_nir_lower_64b_regs(nir_shader *shader)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
nir_foreach_function_impl (impl, shader) {
|
||||
bool impl_progress = false;
|
||||
nir_builder b = nir_builder_create(impl);
|
||||
|
||||
nir_foreach_reg_decl_safe (reg, impl) {
|
||||
if (nir_intrinsic_bit_size(reg) == 64) {
|
||||
lower_64b_reg(&b, reg);
|
||||
impl_progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (impl_progress) {
|
||||
nir_metadata_preserve(
|
||||
impl, nir_metadata_block_index | nir_metadata_dominance);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue