i965/fs: Simplify and fix buggy stride/offset calculations using subscript().
These were bashing the 'offset' and 'stride' values of several registers without taking the previous value into account, which probably didn't matter in practice for optimize_frontfacing_ternary() because the 'tmp' register already had a known region, but it would have given the wrong region as result in the other cases in lower_integer_multiplication(). subscript(..., i) is a more straightforward way to take the i-th field of a given type from each channel of a register which should give the right answer as result regardless of the original 'offset' and 'stride' parameters of the register region. Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
parent
3b7b908787
commit
62aaef6c83
2 changed files with 17 additions and 59 deletions
|
|
@ -3468,62 +3468,27 @@ fs_visitor::lower_integer_multiplication()
|
|||
inst->dst.type);
|
||||
|
||||
if (devinfo->gen >= 7) {
|
||||
fs_reg src1_0_w = inst->src[1];
|
||||
fs_reg src1_1_w = inst->src[1];
|
||||
|
||||
if (inst->src[1].file == IMM) {
|
||||
src1_0_w.ud &= 0xffff;
|
||||
src1_1_w.ud >>= 16;
|
||||
ibld.MUL(low, inst->src[0],
|
||||
brw_imm_uw(inst->src[1].ud & 0xffff));
|
||||
ibld.MUL(high, inst->src[0],
|
||||
brw_imm_uw(inst->src[1].ud >> 16));
|
||||
} else {
|
||||
src1_0_w.type = BRW_REGISTER_TYPE_UW;
|
||||
if (src1_0_w.stride != 0) {
|
||||
assert(src1_0_w.stride == 1);
|
||||
src1_0_w.stride = 2;
|
||||
}
|
||||
|
||||
src1_1_w.type = BRW_REGISTER_TYPE_UW;
|
||||
if (src1_1_w.stride != 0) {
|
||||
assert(src1_1_w.stride == 1);
|
||||
src1_1_w.stride = 2;
|
||||
}
|
||||
src1_1_w.offset += type_sz(BRW_REGISTER_TYPE_UW);
|
||||
ibld.MUL(low, inst->src[0],
|
||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 0));
|
||||
ibld.MUL(high, inst->src[0],
|
||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1));
|
||||
}
|
||||
ibld.MUL(low, inst->src[0], src1_0_w);
|
||||
ibld.MUL(high, inst->src[0], src1_1_w);
|
||||
} else {
|
||||
fs_reg src0_0_w = inst->src[0];
|
||||
fs_reg src0_1_w = inst->src[0];
|
||||
|
||||
src0_0_w.type = BRW_REGISTER_TYPE_UW;
|
||||
if (src0_0_w.stride != 0) {
|
||||
assert(src0_0_w.stride == 1);
|
||||
src0_0_w.stride = 2;
|
||||
}
|
||||
|
||||
src0_1_w.type = BRW_REGISTER_TYPE_UW;
|
||||
if (src0_1_w.stride != 0) {
|
||||
assert(src0_1_w.stride == 1);
|
||||
src0_1_w.stride = 2;
|
||||
}
|
||||
src0_1_w.offset += type_sz(BRW_REGISTER_TYPE_UW);
|
||||
|
||||
ibld.MUL(low, src0_0_w, inst->src[1]);
|
||||
ibld.MUL(high, src0_1_w, inst->src[1]);
|
||||
ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0),
|
||||
inst->src[1]);
|
||||
ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1),
|
||||
inst->src[1]);
|
||||
}
|
||||
|
||||
fs_reg dst = inst->dst;
|
||||
dst.type = BRW_REGISTER_TYPE_UW;
|
||||
dst.offset = ROUND_DOWN_TO(dst.offset, REG_SIZE) + 2;
|
||||
dst.stride = 2;
|
||||
|
||||
high.type = BRW_REGISTER_TYPE_UW;
|
||||
high.stride = 2;
|
||||
|
||||
low.type = BRW_REGISTER_TYPE_UW;
|
||||
low.offset = ROUND_DOWN_TO(low.offset, REG_SIZE) + 2;
|
||||
low.stride = 2;
|
||||
|
||||
ibld.ADD(dst, low, high);
|
||||
ibld.ADD(subscript(inst->dst, BRW_REGISTER_TYPE_UW, 1),
|
||||
subscript(low, BRW_REGISTER_TYPE_UW, 1),
|
||||
subscript(high, BRW_REGISTER_TYPE_UW, 0));
|
||||
|
||||
if (inst->conditional_mod || orig_dst.file == MRF) {
|
||||
set_condmod(inst->conditional_mod,
|
||||
|
|
|
|||
|
|
@ -508,15 +508,8 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
|
|||
g0.negate = true;
|
||||
}
|
||||
|
||||
tmp.type = BRW_REGISTER_TYPE_W;
|
||||
tmp.offset = ROUND_DOWN_TO(tmp.offset, REG_SIZE) + 2;
|
||||
tmp.stride = 2;
|
||||
|
||||
bld.OR(tmp, g0, brw_imm_uw(0x3f80));
|
||||
|
||||
tmp.type = BRW_REGISTER_TYPE_D;
|
||||
tmp.offset = ROUND_DOWN_TO(tmp.offset, REG_SIZE);
|
||||
tmp.stride = 1;
|
||||
bld.OR(subscript(tmp, BRW_REGISTER_TYPE_W, 1),
|
||||
g0, brw_imm_uw(0x3f80));
|
||||
} else {
|
||||
/* Bit 31 of g1.6 is 0 if the polygon is front facing. */
|
||||
fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue