From 6738a7b5b4ae7a8f14fda0d39f760db4e29db186 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Tue, 15 Nov 2022 16:43:25 +0100 Subject: [PATCH] r300: be more careful with presubtract and non-native swizzles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The problematic scenario is when we have the same source used by both normal and presubtract argument. We check that case currently and count the source only once. However if one of the arguments uses a non-native swizzle, we have to rewrite it later and the source changes. Therefore we end with too many sources and fail later during pair translation. Example: ADD temp[21].xy, temp[20].xy__, temp[17].xy__; MAD temp[22].xy, temp[17].zw__, temp[11].xy__, temp[21].xy__; will get converted to MAD temp[22].xy, temp[17].zw__, temp[11].xy__, (temp[17] + temp[20]).xy__; however after dataflow swizzles pass we end with MOV temp[3].x, temp[17].z___; MOV temp[3].y, temp[17]._w__; MAD temp[22].xy, temp[3].xy__, temp[11].xy__, (temp[17] + temp[20]).xy__; Just skip the "don't count the same source twice" optimization when a non-native swizzle is used to fix 2 dEQP atan2 tests. Signed-off-by: Pavel Ondračka Reviewed-by: Filip Gawin Tested-by: Filip Gawin Part-of: --- .../drivers/r300/ci/r300-r480-fails.txt | 2 -- .../r300/compiler/radeon_compiler_util.c | 20 ++++++++++++++----- .../r300/compiler/radeon_compiler_util.h | 1 + .../drivers/r300/compiler/radeon_dataflow.c | 1 + .../drivers/r300/compiler/radeon_dataflow.h | 2 ++ .../drivers/r300/compiler/radeon_optimize.c | 7 +++++-- .../tests/radeon_compiler_util_tests.c | 5 ++++- 7 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/r300/ci/r300-r480-fails.txt b/src/gallium/drivers/r300/ci/r300-r480-fails.txt index ac863a4729d..e891ed5ebe4 100644 --- a/src/gallium/drivers/r300/ci/r300-r480-fails.txt +++ b/src/gallium/drivers/r300/ci/r300-r480-fails.txt @@ -622,8 +622,6 @@ dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.asin.highp_vec2_fr dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.asin.mediump_vec2_fragment,Fail dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan.highp_vec2_fragment,Fail dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan.mediump_vec2_fragment,Fail -dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.highp_vec2_fragment,Fail -dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.mediump_vec2_fragment,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.1,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.5,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.6,Fail diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c index a4bc60fa49d..c2a2b396293 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c @@ -33,6 +33,7 @@ #include "radeon_compiler.h" #include "radeon_dataflow.h" +#include "r300_fragprog_swizzle.h" /** */ unsigned int rc_swizzle_to_writemask(unsigned int swz) @@ -383,6 +384,7 @@ struct src_select { rc_register_file File; int Index; unsigned int SrcType; + unsigned int Swizzle; }; struct can_use_presub_data { @@ -396,14 +398,15 @@ static void can_use_presub_data_add_select( struct can_use_presub_data * data, rc_register_file file, unsigned int index, - unsigned int src_type) + unsigned int swizzle) { struct src_select * select; select = &data->Selects[data->SelectCount++]; select->File = file; select->Index = index; - select->SrcType = src_type; + select->SrcType = rc_source_type_swz(swizzle); + select->Swizzle = swizzle; } /** @@ -426,10 +429,11 @@ static void can_use_presub_read_cb( return; can_use_presub_data_add_select(d, src->File, src->Index, - rc_source_type_swz(src->Swizzle)); + src->Swizzle); } unsigned int rc_inst_can_use_presub( + struct radeon_compiler * c, struct rc_instruction * inst, rc_presubtract_op presub_op, unsigned int presub_writemask, @@ -473,14 +477,14 @@ unsigned int rc_inst_can_use_presub( can_use_presub_data_add_select(&d, presub_src0->File, presub_src0->Index, - src_type0); + presub_src0->Swizzle); if (num_presub_srcs > 1) { src_type1 = rc_source_type_swz(presub_src1->Swizzle); can_use_presub_data_add_select(&d, presub_src1->File, presub_src1->Index, - src_type1); + presub_src1->Swizzle); /* Even if both of the presub sources read from the same * register, we still need to use 2 different source selects @@ -504,6 +508,12 @@ unsigned int rc_inst_can_use_presub( unsigned int j; unsigned int src_type = d.Selects[i].SrcType; for (j = i + 1; j < d.SelectCount; j++) { + /* Even if the sources are the same now, they will not be the + * same later, if we have to rewrite some non-native swizzle. */ + if(!c->is_r500 && ( + !r300_swizzle_is_native_basic(d.Selects[i].Swizzle) || + !r300_swizzle_is_native_basic(d.Selects[j].Swizzle))) + continue; if (d.Selects[i].File == d.Selects[j].File && d.Selects[i].Index == d.Selects[j].Index) { src_type &= ~d.Selects[j].SrcType; diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h index 49b3e661eb0..7c1d6bbc961 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h @@ -87,6 +87,7 @@ unsigned int rc_source_type_swz(unsigned int swizzle); unsigned int rc_source_type_mask(unsigned int mask); unsigned int rc_inst_can_use_presub( + struct radeon_compiler * c, struct rc_instruction * inst, rc_presubtract_op presub_op, unsigned int presub_writemask, diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.c b/src/gallium/drivers/r300/compiler/radeon_dataflow.c index d761ae9a7b0..94ee92131ce 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow.c +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.c @@ -849,6 +849,7 @@ static void init_get_readers_callback_data( rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb) { + reader_data->C = c; reader_data->Abort = 0; reader_data->ReaderCount = 0; reader_data->ReadersReserved = 0; diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h index bb8d48206e2..0c7bf8adf94 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow.h +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.h @@ -86,6 +86,8 @@ struct rc_reader { }; struct rc_reader_data { + struct radeon_compiler * C; + unsigned int Abort; unsigned int AbortOnRead; unsigned int AbortOnWrite; diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index b603b2cd479..33350b7fa41 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -71,7 +71,8 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, rc_register_file file = src->File; struct rc_reader_data * reader_data = data; - if(!rc_inst_can_use_presub(inst, + if(!rc_inst_can_use_presub(reader_data->C, + inst, reader_data->Writer->U.I.PreSub.Opcode, rc_swizzle_to_writemask(src->Swizzle), src, @@ -455,7 +456,9 @@ static void presub_scan_read( struct rc_reader_data * reader_data = data; rc_presubtract_op * presub_opcode = reader_data->CbData; - if (!rc_inst_can_use_presub(inst, *presub_opcode, + if (!rc_inst_can_use_presub(reader_data->C, + inst, + *presub_opcode, reader_data->Writer->U.I.DstReg.WriteMask, src, &reader_data->Writer->U.I.SrcReg[0], diff --git a/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c index 3e9759409b1..bcdbbfc6f00 100644 --- a/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c +++ b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c @@ -45,11 +45,14 @@ static void test_rc_inst_can_use_presub( struct rc_instruction add_inst, replace_inst; int ret; + struct r300_fragment_program_compiler c = {}; + init_compiler(&c.Base, RC_FRAGMENT_PROGRAM, 0, 0); + test_begin(result); init_rc_normal_instruction(&add_inst, add_str); init_rc_normal_instruction(&replace_inst, replace_str); - ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0, + ret = rc_inst_can_use_presub(&c.Base, &replace_inst, RC_PRESUB_ADD, 0, &replace_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]);