diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 5923fea9589..156227a345c 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -318,6 +318,8 @@ can_use_DPP(const aco_ptr& instr, bool pre_ra) return false; if (instr->format == Format::VOP3) return false; + if (instr->operands.size() > 1 && !instr->operands[1].isOfType(RegType::vgpr)) + return false; } /* there are more cases but those all take 64-bit inputs */ diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index 31a229f99e9..f33de4caf76 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -954,13 +954,14 @@ BEGIN_TEST(optimize.denorm_propagation) END_TEST BEGIN_TEST(optimizer.dpp) - //>> v1: %a, v1: %b, s2: %c = p_startpgm - if (!setup_cs("v1 v1 s2", GFX10_3)) + //>> v1: %a, v1: %b, s2: %c, s1: %d = p_startpgm + if (!setup_cs("v1 v1 s2 s1", GFX10_3)) return; Operand a(inputs[0]); Operand b(inputs[1]); Operand c(inputs[2]); + Operand d(inputs[3]); /* basic optimization */ //! v1: %res0 = v_add_f32 %a, %b row_mirror bound_ctrl:1 @@ -1028,6 +1029,21 @@ BEGIN_TEST(optimizer.dpp) Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp8, b, c); writeout(8, res8); + /* sgprs */ + //! v1: %tmp9 = v_mov_b32 %a row_mirror bound_ctrl:1 + //! v1: %res9 = v_add_f32 %tmp9, %d + //! p_unit_test 9, %res9 + Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + Temp res9 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp9, d); + writeout(9, res9); + + //! v1: %tmp10 = v_mov_b32 %a row_mirror bound_ctrl:1 + //! v1: %res10 = v_add_f32 %d, %tmp10 + //! p_unit_test 10, %res10 + Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), d, tmp10); + writeout(10, res10); + finish_opt_test(); END_TEST