i965/fs: Simplify and fix buggy stride/offset calculations using subscript().

These were bashing the 'offset' and 'stride' values of several
registers without taking the previous value into account, which
probably didn't matter in practice for optimize_frontfacing_ternary()
because the 'tmp' register already had a known region, but it would
have given the wrong region as result in the other cases in
lower_integer_multiplication().  subscript(..., i) is a more
straightforward way to take the i-th field of a given type from each
channel of a register which should give the right answer as result
regardless of the original 'offset' and 'stride' parameters of the
register region.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
Francisco Jerez 2016-09-01 19:16:01 -07:00
parent 3b7b908787
commit 62aaef6c83
2 changed files with 17 additions and 59 deletions

View file

@ -3468,62 +3468,27 @@ fs_visitor::lower_integer_multiplication()
inst->dst.type);
if (devinfo->gen >= 7) {
fs_reg src1_0_w = inst->src[1];
fs_reg src1_1_w = inst->src[1];
if (inst->src[1].file == IMM) {
src1_0_w.ud &= 0xffff;
src1_1_w.ud >>= 16;
ibld.MUL(low, inst->src[0],
brw_imm_uw(inst->src[1].ud & 0xffff));
ibld.MUL(high, inst->src[0],
brw_imm_uw(inst->src[1].ud >> 16));
} else {
src1_0_w.type = BRW_REGISTER_TYPE_UW;
if (src1_0_w.stride != 0) {
assert(src1_0_w.stride == 1);
src1_0_w.stride = 2;
}
src1_1_w.type = BRW_REGISTER_TYPE_UW;
if (src1_1_w.stride != 0) {
assert(src1_1_w.stride == 1);
src1_1_w.stride = 2;
}
src1_1_w.offset += type_sz(BRW_REGISTER_TYPE_UW);
ibld.MUL(low, inst->src[0],
subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 0));
ibld.MUL(high, inst->src[0],
subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1));
}
ibld.MUL(low, inst->src[0], src1_0_w);
ibld.MUL(high, inst->src[0], src1_1_w);
} else {
fs_reg src0_0_w = inst->src[0];
fs_reg src0_1_w = inst->src[0];
src0_0_w.type = BRW_REGISTER_TYPE_UW;
if (src0_0_w.stride != 0) {
assert(src0_0_w.stride == 1);
src0_0_w.stride = 2;
}
src0_1_w.type = BRW_REGISTER_TYPE_UW;
if (src0_1_w.stride != 0) {
assert(src0_1_w.stride == 1);
src0_1_w.stride = 2;
}
src0_1_w.offset += type_sz(BRW_REGISTER_TYPE_UW);
ibld.MUL(low, src0_0_w, inst->src[1]);
ibld.MUL(high, src0_1_w, inst->src[1]);
ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0),
inst->src[1]);
ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1),
inst->src[1]);
}
fs_reg dst = inst->dst;
dst.type = BRW_REGISTER_TYPE_UW;
dst.offset = ROUND_DOWN_TO(dst.offset, REG_SIZE) + 2;
dst.stride = 2;
high.type = BRW_REGISTER_TYPE_UW;
high.stride = 2;
low.type = BRW_REGISTER_TYPE_UW;
low.offset = ROUND_DOWN_TO(low.offset, REG_SIZE) + 2;
low.stride = 2;
ibld.ADD(dst, low, high);
ibld.ADD(subscript(inst->dst, BRW_REGISTER_TYPE_UW, 1),
subscript(low, BRW_REGISTER_TYPE_UW, 1),
subscript(high, BRW_REGISTER_TYPE_UW, 0));
if (inst->conditional_mod || orig_dst.file == MRF) {
set_condmod(inst->conditional_mod,

View file

@ -508,15 +508,8 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
g0.negate = true;
}
tmp.type = BRW_REGISTER_TYPE_W;
tmp.offset = ROUND_DOWN_TO(tmp.offset, REG_SIZE) + 2;
tmp.stride = 2;
bld.OR(tmp, g0, brw_imm_uw(0x3f80));
tmp.type = BRW_REGISTER_TYPE_D;
tmp.offset = ROUND_DOWN_TO(tmp.offset, REG_SIZE);
tmp.stride = 1;
bld.OR(subscript(tmp, BRW_REGISTER_TYPE_W, 1),
g0, brw_imm_uw(0x3f80));
} else {
/* Bit 31 of g1.6 is 0 if the polygon is front facing. */
fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));