radeonsi: fix BFE/BFI lowering for GLSL semantics

Fixes spec/arb_gpu_shader5/execution/built-in-functions/*-bitfield{Extract,Insert}

Cc: 13.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
Nicolai Hähnle 2016-10-27 10:59:37 +02:00
parent 6526977306
commit 5aef14932a

View file

@ -459,6 +459,8 @@ static void emit_bfi(const struct lp_build_tgsi_action *action,
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef bfi_args[3];
LLVMValueRef bfi_sm5;
LLVMValueRef cond;
// Calculate the bitmask: (((1 << src3) - 1) << src2
bfi_args[0] = LLVMBuildShl(builder,
@ -478,11 +480,40 @@ static void emit_bfi(const struct lp_build_tgsi_action *action,
* (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
* Use the right-hand side, which the LLVM backend can convert to V_BFI.
*/
emit_data->output[emit_data->chan] =
bfi_sm5 =
LLVMBuildXor(builder, bfi_args[2],
LLVMBuildAnd(builder, bfi_args[0],
LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
""), ""), "");
/* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
* uses the convenient V_BFI lowering for the above, which follows SM5
* and disagrees with GLSL semantics when bits (src3) is 32.
*/
cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
lp_build_const_int32(gallivm, 32), "");
emit_data->output[emit_data->chan] =
LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
}
static void emit_bfe(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef bfe_sm5;
LLVMValueRef cond;
bfe_sm5 = lp_build_intrinsic(builder, action->intr_name,
emit_data->dst_type, emit_data->args,
emit_data->arg_count, LLVMReadNoneAttribute);
/* Correct for GLSL semantics. */
cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
lp_build_const_int32(gallivm, 32), "");
emit_data->output[emit_data->chan] =
LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
}
/* this is ffs in C */
@ -783,7 +814,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe;
bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
@ -835,7 +866,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe;
bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;