diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 866d6de71f7..63573876f2e 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2896,16 +2896,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src); } } else if (dst.regClass() == v1) { - if (ctx->block->fp_mode.denorm32 == fp_denorm_flush) { - /* If denormals are flushed, then v_mul_legacy_f32(2.0, src) can become omod. */ - src = - bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), Operand::c32(0x40000000), src); - } else { - src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::zero(), src); - } - src = - bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand::c32(-1), src, Operand::c32(1u)); - bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src); + /* Legacy multiply with +Inf means +-0.0 becomes +0.0 and all other numbers + * the correctly signed Inf. After that, we only need to clamp between -1.0 and +1.0. + */ + Temp inf = bld.copy(bld.def(s1), Operand::c32(0x7f800000)); + src = bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), inf, src); + bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand::c32(0x3f800000), src, + Operand::c32(0xbf800000)); } else if (dst.regClass() == v2) { Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.def(bld.lm), Operand::zero(), src); Temp tmp = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u));