vc4: Allow QIR registers to be non-SSA.
Now that we have NIR, most of the optimization we still need to do is peepholes on instruction selection rather than general dataflow operations. This means we want to be able to have QIR be a lot closer to the actual QPU instructions, just with virtual registers. Allowing multiple instructions writing the same register opens up a lot of possibilities.
This commit is contained in:
parent
ceb1a31842
commit
0bba4fa070
4 changed files with 10 additions and 4 deletions
|
|
@ -207,6 +207,7 @@ qir_opt_algebraic(struct vc4_compile *c)
|
|||
|
||||
/* FADD(a, FSUB(0, b)) -> FSUB(a, b) */
|
||||
if (inst->src[1].file == QFILE_TEMP &&
|
||||
c->defs[inst->src[1].index] &&
|
||||
c->defs[inst->src[1].index]->op == QOP_FSUB) {
|
||||
struct qinst *fsub = c->defs[inst->src[1].index];
|
||||
if (is_zero(c, fsub->src[0])) {
|
||||
|
|
@ -221,6 +222,7 @@ qir_opt_algebraic(struct vc4_compile *c)
|
|||
|
||||
/* FADD(FSUB(0, b), a) -> FSUB(a, b) */
|
||||
if (inst->src[0].file == QFILE_TEMP &&
|
||||
c->defs[inst->src[0].index] &&
|
||||
c->defs[inst->src[0].index]->op == QOP_FSUB) {
|
||||
struct qinst *fsub = c->defs[inst->src[0].index];
|
||||
if (is_zero(c, fsub->src[0])) {
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ qir_opt_vpm_writes(struct vc4_compile *c)
|
|||
continue;
|
||||
|
||||
struct qinst *inst = c->defs[temp];
|
||||
if (qir_is_multi_instruction(inst))
|
||||
if (!inst || qir_is_multi_instruction(inst))
|
||||
continue;
|
||||
|
||||
if (qir_depends_on_flags(inst) || inst->sf)
|
||||
|
|
|
|||
|
|
@ -389,8 +389,11 @@ qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst)
|
|||
struct qreg
|
||||
qir_follow_movs(struct vc4_compile *c, struct qreg reg)
|
||||
{
|
||||
while (reg.file == QFILE_TEMP && c->defs[reg.index]->op == QOP_MOV)
|
||||
while (reg.file == QFILE_TEMP &&
|
||||
c->defs[reg.index] &&
|
||||
c->defs[reg.index]->op == QOP_MOV) {
|
||||
reg = c->defs[reg.index]->src[0];
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -177,7 +177,8 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
|
|||
uint8_t class_bits[c->num_temps];
|
||||
struct qpu_reg *temp_registers = calloc(c->num_temps,
|
||||
sizeof(*temp_registers));
|
||||
memset(def, 0, sizeof(def));
|
||||
for (int i = 0; i < ARRAY_SIZE(def); i++)
|
||||
def[i] = ~0;
|
||||
memset(use, 0, sizeof(use));
|
||||
|
||||
/* If things aren't ever written (undefined values), just read from
|
||||
|
|
@ -196,7 +197,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
|
|||
uint32_t ip = 0;
|
||||
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
|
||||
if (inst->dst.file == QFILE_TEMP) {
|
||||
def[inst->dst.index] = ip;
|
||||
def[inst->dst.index] = MIN2(ip, def[inst->dst.index]);
|
||||
use[inst->dst.index] = ip;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue