r600/sfn: Factor out fragement shader class for EG

Pre-EG hardware handles the FS inputs differently, so we
need to prepare a different code path.

v2: Make m_interolators_used private (Filip)

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Reviewed-by: Filip Gawin <filip@gawin.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17822>
This commit is contained in:
Gert Wollny 2022-07-27 16:17:12 +02:00 committed by Marge Bot
parent f9703ac34d
commit 8f96cd4db0
4 changed files with 377 additions and 317 deletions

View file

@ -411,7 +411,7 @@ Shader *Shader::translate_from_nir(nir_shader *nir, const pipe_stream_output_inf
switch (nir->info.stage) {
case MESA_SHADER_FRAGMENT:
shader = new FragmentShader(key);
shader = new FragmentShaderEG(key);
break;
case MESA_SHADER_VERTEX:
shader = new VertexShader(so_info, gs_shader, key);

View file

@ -79,10 +79,10 @@ void FragmentShader::do_get_shader_info(r600_shader *sh_info)
bool FragmentShader::load_input(nir_intrinsic_instr *intr)
{
auto& vf = value_factory();
AluInstr *ir = nullptr;
auto location = nir_intrinsic_io_semantics(intr).location;
if (location == VARYING_SLOT_POS) {
AluInstr *ir = nullptr;
for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
ir = new AluInstr(op1_mov,
vf.dest(intr->dest, i, pin_none),
@ -96,41 +96,17 @@ bool FragmentShader::load_input(nir_intrinsic_instr *intr)
}
if (location == VARYING_SLOT_FACE) {
ir = new AluInstr(op2_setge_dx10,
vf.dest(intr->dest, 0, pin_none),
m_face_input,
vf.inline_const(ALU_SRC_0, 0),
AluInstr::last_write);
auto ir = new AluInstr(op2_setge_dx10,
vf.dest(intr->dest, 0, pin_none),
m_face_input,
vf.inline_const(ALU_SRC_0, 0),
AluInstr::last_write);
set_input_gpr(nir_intrinsic_base(intr), m_face_input->sel());
emit_instruction(ir);
return true;
}
auto io = input(nir_intrinsic_base(intr));
auto comp = nir_intrinsic_component(intr);
bool need_temp = comp > 0 || !intr->dest.is_ssa;
for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
if (need_temp) {
auto tmp = vf.temp_register(comp + i);
ir = new AluInstr(op1_interp_load_p0,
tmp,
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
AluInstr::last_write);
emit_instruction(ir);
emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write));
} else {
ir = new AluInstr(op1_interp_load_p0,
vf.dest(intr->dest, i, pin_chan),
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i),
AluInstr::write);
emit_instruction(ir);
}
}
ir->set_alu_flag(alu_last_instr);
return true;
return load_input_hw(intr);
}
bool FragmentShader::store_output(nir_intrinsic_instr *intr)
@ -193,22 +169,10 @@ barycentric_ij_index(nir_intrinsic_instr *intr)
bool FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
{
auto& vf = value_factory();
switch (intr->intrinsic) {
case nir_intrinsic_load_barycentric_centroid:
case nir_intrinsic_load_barycentric_pixel:
case nir_intrinsic_load_barycentric_sample: {
unsigned ij = barycentric_ij_index(intr);
vf.inject_value(intr->dest, 0, m_interpolator[ij].i);
vf.inject_value(intr->dest, 1, m_interpolator[ij].j);
if (process_stage_intrinsic_hw(intr))
return true;
}
case nir_intrinsic_load_input:
return load_input(intr);
case nir_intrinsic_load_barycentric_at_offset:
return load_barycentric_at_offset(intr);
case nir_intrinsic_load_barycentric_at_sample:
return load_barycentric_at_sample(intr);
switch (intr->intrinsic) {
case nir_intrinsic_load_interpolated_input:
return load_interpolated_input(intr);
case nir_intrinsic_discard_if:
@ -250,176 +214,21 @@ bool FragmentShader::load_interpolated_input(nir_intrinsic_instr *intr)
case VARYING_SLOT_POS:
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i)
vf.inject_value(intr->dest, i, m_pos_input[i]);
return true;
return true;
case VARYING_SLOT_FACE:
return false;
default:
;
}
auto param = nir_src_as_const_value(intr->src[1]);
assert(param && "Indirect PS inputs not (yet) supported");
int dest_num_comp = nir_dest_num_components(intr->dest);
int start_comp = nir_intrinsic_component(intr);
bool need_temp = start_comp > 0 || !intr->dest.is_ssa;
auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan);
InterpolateParams params;
params.i = vf.src(intr->src[0], 0);
params.j = vf.src(intr->src[0], 1);
params.base = input(nir_intrinsic_base(intr)).lds_pos();
if (!load_interpolated(dst, params, dest_num_comp, start_comp))
return false;
if (need_temp) {
AluInstr *ir = nullptr;
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
auto real_dst = vf.dest(intr->dest, i, pin_chan);
ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write);
emit_instruction(ir);
}
assert(ir);
ir->set_alu_flag(alu_last_instr);
}
return true;
return load_interpolated_input_hw(intr);
}
bool FragmentShader::load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
int num_dest_comp, int start_comp)
{
sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i << ")" << "\n";
if (num_dest_comp == 1) {
switch (start_comp) {
case 0: return load_interpolated_one_comp(dest, params, op2_interp_x);
case 1: return load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 0, 1);
case 2: return load_interpolated_one_comp(dest, params, op2_interp_z);
case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 2, 3);
default:
assert(0);
}
}
if (num_dest_comp == 2) {
switch (start_comp) {
case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3);
case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc);
case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) &&
load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 0, 1);
default:
assert(0);
}
}
if (num_dest_comp == 3 && start_comp == 0)
return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) &&
load_interpolated_one_comp(dest, params, op2_interp_z);
int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp;
bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc);
success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3);
return success;
}
bool FragmentShader::load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op)
{
auto group = new AluGroup();
bool success = true;
AluInstr *ir = nullptr;
for (unsigned i = 0; i < 2 && success; ++i) {
int chan = i;
if (op == op2_interp_z)
chan += 2;
ir = new AluInstr(op, dest[chan],
i & 1 ? params.j : params.i,
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
i == 0 ? AluInstr::write : AluInstr::last);
ir->set_bank_swizzle(alu_vec_210);
success = group->add_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
if (success)
emit_instruction(group);
return success;
}
bool FragmentShader::load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask)
{
auto group = new AluGroup();
bool success = true;
AluInstr *ir = nullptr;
assert(params.j);
assert(params.i);
for (unsigned i = 0; i < 4 ; ++i) {
ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
(writemask & (1 << i)) ? AluInstr::write : AluInstr::empty);
ir->set_bank_swizzle(alu_vec_210);
success = group->add_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
if (success)
emit_instruction(group);
return success;
}
bool FragmentShader::load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op,
UNUSED int start, int comp)
{
auto group = new AluGroup();
bool success = true;
AluInstr *ir = nullptr;
for (int i = 0; i < 4 ; ++i) {
ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
i == comp ? AluInstr::write : AluInstr::empty);
ir->set_bank_swizzle(alu_vec_210);
success = group->add_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
if (success)
emit_instruction(group);
return success;
}
int FragmentShader::do_allocate_reserved_registers()
{
for (unsigned i = 0; i < s_max_interpolators; ++i) {
if (m_interpolators_used.test(i)) {
sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
m_interpolator[i].enabled = true;
}
}
int num_baryc = 0;
for (int i = 0; i < 6; ++i) {
if (m_interpolator[i].enabled) {
sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
unsigned sel = num_baryc / 2;
unsigned chan = 2 * (num_baryc % 2);
m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1);
m_interpolator[i].i->pin_live_range(true, false);
m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan);
m_interpolator[i].j->pin_live_range(true, false);
m_interpolator[i].ij_index = num_baryc++;
}
}
int num_baryc = allocate_register_inputs();
int next_register = (num_baryc + 1) >> 1;
@ -658,90 +467,6 @@ bool FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id)
}
}
bool FragmentShader::load_barycentric_at_sample(nir_intrinsic_instr* instr)
{
auto& vf = value_factory();
RegisterVec4 slope = vf.temp_vec4(pin_group);
auto src = emit_load_to_register(vf.src(instr->src[0], 0));
auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0,
R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
fetch->set_fetch_flag(FetchInstr::srf_mode);
emit_instruction(fetch);
auto grad = vf.temp_vec4(pin_group);
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
assert(interpolator.enabled);
RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0);
tex->set_tex_flag(TexInstr::grad_fine);
tex->set_tex_flag(TexInstr::x_unnormalized);
tex->set_tex_flag(TexInstr::y_unnormalized);
tex->set_tex_flag(TexInstr::z_unnormalized);
tex->set_tex_flag(TexInstr::w_unnormalized);
emit_instruction(tex);
tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0);
tex->set_tex_flag(TexInstr::x_unnormalized);
tex->set_tex_flag(TexInstr::y_unnormalized);
tex->set_tex_flag(TexInstr::z_unnormalized);
tex->set_tex_flag(TexInstr::w_unnormalized);
tex->set_tex_flag(TexInstr::grad_fine);
emit_instruction(tex);
auto tmp0 = vf.temp_register();
auto tmp1 = vf.temp_register();
emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write}));
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr}));
return true;
}
bool FragmentShader::load_barycentric_at_offset(nir_intrinsic_instr* instr)
{
auto& vf = value_factory();
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
auto help = vf.temp_vec4(pin_group);
RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0);
getgradh->set_tex_flag(TexInstr::x_unnormalized);
getgradh->set_tex_flag(TexInstr::y_unnormalized);
getgradh->set_tex_flag(TexInstr::z_unnormalized);
getgradh->set_tex_flag(TexInstr::w_unnormalized);
getgradh->set_tex_flag(TexInstr::grad_fine);
emit_instruction(getgradh);
auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0);
getgradv->set_tex_flag(TexInstr::x_unnormalized);
getgradv->set_tex_flag(TexInstr::y_unnormalized);
getgradv->set_tex_flag(TexInstr::z_unnormalized);
getgradv->set_tex_flag(TexInstr::w_unnormalized);
getgradv->set_tex_flag(TexInstr::grad_fine);
emit_instruction(getgradv);
auto ofs_x = vf.src(instr->src[0], 0);
auto ofs_y = vf.src(instr->src[0], 1);
auto tmp0 = vf.temp_register();
auto tmp1 = vf.temp_register();
emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr}));
return true;
}
bool FragmentShader::emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs)
{
RegisterVec4::Swizzle swizzle;
@ -890,7 +615,319 @@ void FragmentShader::do_print_properties(std::ostream& os) const
}
FragmentShader::Interpolator::Interpolator():
bool FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr)
{
auto& vf = value_factory();
auto io = input(nir_intrinsic_base(intr));
auto comp = nir_intrinsic_component(intr);
bool need_temp = comp > 0 || !intr->dest.is_ssa;
AluInstr *ir = nullptr;
for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
if (need_temp) {
auto tmp = vf.temp_register(comp + i);
ir = new AluInstr(op1_interp_load_p0,
tmp,
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
AluInstr::last_write);
emit_instruction(ir);
emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write));
} else {
ir = new AluInstr(op1_interp_load_p0,
vf.dest(intr->dest, i, pin_chan),
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i),
AluInstr::write);
emit_instruction(ir);
}
}
ir->set_alu_flag(alu_last_instr);
return true;
}
bool FragmentShaderEG::allocate_register_inputs()
{
for (unsigned i = 0; i < s_max_interpolators; ++i) {
if (interpolators_used(i)) {
sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
m_interpolator[i].enabled = true;
}
}
int num_baryc = 0;
for (int i = 0; i < 6; ++i) {
if (m_interpolator[i].enabled) {
sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
unsigned sel = num_baryc / 2;
unsigned chan = 2 * (num_baryc % 2);
m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1);
m_interpolator[i].i->pin_live_range(true, false);
m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan);
m_interpolator[i].j->pin_live_range(true, false);
m_interpolator[i].ij_index = num_baryc++;
}
}
return num_baryc;
}
bool FragmentShaderEG::process_stage_intrinsic_hw(nir_intrinsic_instr *intr)
{
auto& vf = value_factory();
switch (intr->intrinsic) {
case nir_intrinsic_load_barycentric_centroid:
case nir_intrinsic_load_barycentric_pixel:
case nir_intrinsic_load_barycentric_sample: {
unsigned ij = barycentric_ij_index(intr);
vf.inject_value(intr->dest, 0, m_interpolator[ij].i);
vf.inject_value(intr->dest, 1, m_interpolator[ij].j);
return true;
}
case nir_intrinsic_load_input:
return load_input(intr);
case nir_intrinsic_load_barycentric_at_offset:
return load_barycentric_at_offset(intr);
case nir_intrinsic_load_barycentric_at_sample:
return load_barycentric_at_sample(intr);
default:
return false;
}
}
bool FragmentShaderEG::load_interpolated_input_hw(nir_intrinsic_instr *intr)
{
auto& vf = value_factory();
auto param = nir_src_as_const_value(intr->src[1]);
assert(param && "Indirect PS inputs not (yet) supported");
int dest_num_comp = nir_dest_num_components(intr->dest);
int start_comp = nir_intrinsic_component(intr);
bool need_temp = start_comp > 0 || !intr->dest.is_ssa;
auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan);
InterpolateParams params;
params.i = vf.src(intr->src[0], 0);
params.j = vf.src(intr->src[0], 1);
params.base = input(nir_intrinsic_base(intr)).lds_pos();
if (!load_interpolated(dst, params, dest_num_comp, start_comp))
return false;
if (need_temp) {
AluInstr *ir = nullptr;
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
auto real_dst = vf.dest(intr->dest, i, pin_chan);
ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write);
emit_instruction(ir);
}
assert(ir);
ir->set_alu_flag(alu_last_instr);
}
return true;
}
bool FragmentShaderEG::load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
int num_dest_comp, int start_comp)
{
sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i << ")" << "\n";
if (num_dest_comp == 1) {
switch (start_comp) {
case 0: return load_interpolated_one_comp(dest, params, op2_interp_x);
case 1: return load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1);
case 2: return load_interpolated_one_comp(dest, params, op2_interp_z);
case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 3);
default:
assert(0);
}
}
if (num_dest_comp == 2) {
switch (start_comp) {
case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3);
case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc);
case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) &&
load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1);
default:
assert(0);
}
}
if (num_dest_comp == 3 && start_comp == 0)
return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) &&
load_interpolated_one_comp(dest, params, op2_interp_z);
int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp;
bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc);
success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3);
return success;
}
bool FragmentShaderEG::load_barycentric_at_sample(nir_intrinsic_instr* instr)
{
auto& vf = value_factory();
RegisterVec4 slope = vf.temp_vec4(pin_group);
auto src = emit_load_to_register(vf.src(instr->src[0], 0));
auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0,
R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
fetch->set_fetch_flag(FetchInstr::srf_mode);
emit_instruction(fetch);
auto grad = vf.temp_vec4(pin_group);
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
assert(interpolator.enabled);
RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0);
tex->set_tex_flag(TexInstr::grad_fine);
tex->set_tex_flag(TexInstr::x_unnormalized);
tex->set_tex_flag(TexInstr::y_unnormalized);
tex->set_tex_flag(TexInstr::z_unnormalized);
tex->set_tex_flag(TexInstr::w_unnormalized);
emit_instruction(tex);
tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0);
tex->set_tex_flag(TexInstr::x_unnormalized);
tex->set_tex_flag(TexInstr::y_unnormalized);
tex->set_tex_flag(TexInstr::z_unnormalized);
tex->set_tex_flag(TexInstr::w_unnormalized);
tex->set_tex_flag(TexInstr::grad_fine);
emit_instruction(tex);
auto tmp0 = vf.temp_register();
auto tmp1 = vf.temp_register();
emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write}));
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr}));
return true;
}
bool FragmentShaderEG::load_barycentric_at_offset(nir_intrinsic_instr* instr)
{
auto& vf = value_factory();
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
auto help = vf.temp_vec4(pin_group);
RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0);
getgradh->set_tex_flag(TexInstr::x_unnormalized);
getgradh->set_tex_flag(TexInstr::y_unnormalized);
getgradh->set_tex_flag(TexInstr::z_unnormalized);
getgradh->set_tex_flag(TexInstr::w_unnormalized);
getgradh->set_tex_flag(TexInstr::grad_fine);
emit_instruction(getgradh);
auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0);
getgradv->set_tex_flag(TexInstr::x_unnormalized);
getgradv->set_tex_flag(TexInstr::y_unnormalized);
getgradv->set_tex_flag(TexInstr::z_unnormalized);
getgradv->set_tex_flag(TexInstr::w_unnormalized);
getgradv->set_tex_flag(TexInstr::grad_fine);
emit_instruction(getgradv);
auto ofs_x = vf.src(instr->src[0], 0);
auto ofs_y = vf.src(instr->src[0], 1);
auto tmp0 = vf.temp_register();
auto tmp1 = vf.temp_register();
emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr}));
return true;
}
bool FragmentShaderEG::load_interpolated_one_comp(RegisterVec4& dest,
const InterpolateParams& params,
EAluOp op)
{
auto group = new AluGroup();
bool success = true;
AluInstr *ir = nullptr;
for (unsigned i = 0; i < 2 && success; ++i) {
int chan = i;
if (op == op2_interp_z)
chan += 2;
ir = new AluInstr(op, dest[chan],
i & 1 ? params.j : params.i,
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
i == 0 ? AluInstr::write : AluInstr::last);
ir->set_bank_swizzle(alu_vec_210);
success = group->add_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
if (success)
emit_instruction(group);
return success;
}
bool FragmentShaderEG::load_interpolated_two_comp(RegisterVec4& dest,
const InterpolateParams& params,
EAluOp op, int writemask)
{
auto group = new AluGroup();
bool success = true;
AluInstr *ir = nullptr;
assert(params.j);
assert(params.i);
for (unsigned i = 0; i < 4 ; ++i) {
ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
(writemask & (1 << i)) ? AluInstr::write : AluInstr::empty);
ir->set_bank_swizzle(alu_vec_210);
success = group->add_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
if (success)
emit_instruction(group);
return success;
}
bool FragmentShaderEG::load_interpolated_two_comp_for_one(RegisterVec4& dest,
const InterpolateParams& params, EAluOp op,
int comp)
{
auto group = new AluGroup();
bool success = true;
AluInstr *ir = nullptr;
for (int i = 0; i < 4 ; ++i) {
ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
i == comp ? AluInstr::write : AluInstr::empty);
ir->set_bank_swizzle(alu_vec_210);
success = group->add_instruction(ir);
}
ir->set_alu_flag(alu_last_instr);
if (success)
emit_instruction(group);
return success;
}
FragmentShaderEG::Interpolator::Interpolator():
enabled(false)
{
}

View file

@ -39,22 +39,18 @@ public:
bool process_stage_intrinsic(nir_intrinsic_instr *intr) override;
private:
class Interpolator {
public:
Interpolator();
bool enabled : 1;
unsigned ij_index : 4;
PRegister i;
PRegister j;
};
struct InterpolateParams {
PVirtualValue i,j;
int base;
};
protected:
static const int s_max_interpolators = 6;
bool interpolators_used(int i) const {return m_interpolators_used.test(i);}
private:
bool load_interpolated_input(nir_intrinsic_instr *intr);
virtual bool allocate_register_inputs() = 0;
virtual bool load_input_hw(nir_intrinsic_instr *intr) = 0;
virtual bool process_stage_intrinsic_hw(nir_intrinsic_instr *intr) = 0;
virtual bool load_interpolated_input_hw(nir_intrinsic_instr *intr) = 0;
bool do_scan_instruction(nir_instr *instr) override;
int do_allocate_reserved_registers() override;
@ -63,17 +59,6 @@ private:
bool scan_input(nir_intrinsic_instr *instr, int index_src_id);
bool load_barycentric_pixel(nir_intrinsic_instr *intr);
bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
bool load_interpolated_input(nir_intrinsic_instr *intr);
bool load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
int num_dest_comp, int start_comp);
bool load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op);
bool load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask);
bool load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op,
int start, int dest_slot);
bool emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs);
bool emit_load_sample_mask_in(nir_intrinsic_instr* instr);
@ -94,7 +79,6 @@ private:
ExportInstr *m_last_pixel_export;
std::bitset<s_max_interpolators> m_interpolators_used;
std::array<Interpolator, s_max_interpolators> m_interpolator;
RegisterVec4 m_pos_input;
Register *m_face_input{nullptr};
bool m_fs_write_all;
@ -109,6 +93,45 @@ private:
int m_rat_base{0};
};
class FragmentShaderEG : public FragmentShader {
public:
using FragmentShader::FragmentShader;
private:
class Interpolator {
public:
Interpolator();
bool enabled : 1;
unsigned ij_index : 4;
PRegister i;
PRegister j;
};
struct InterpolateParams {
PVirtualValue i,j;
int base;
};
bool allocate_register_inputs() override;
bool load_input_hw(nir_intrinsic_instr *intr) override;
bool process_stage_intrinsic_hw(nir_intrinsic_instr *intr) override;
bool load_interpolated_input_hw(nir_intrinsic_instr *intr) override;
bool load_barycentric_pixel(nir_intrinsic_instr *intr);
bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
bool load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
int num_dest_comp, int start_comp);
bool load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op);
bool load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask);
bool load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op,
int dest_slot);
std::array<Interpolator, s_max_interpolators> m_interpolator;
};
}
#endif

View file

@ -3073,7 +3073,7 @@ Shader *TestShader::from_string(const std::string& s)
Shader *shader = nullptr;
if (line.substr(0,2) == "FS")
shader = new FragmentShader(key);
shader = new FragmentShaderEG(key);
else if (line.substr(0,2) == "VS")
shader = new VertexShader(nullptr, nullptr, key);
else if (line.substr(0,2) == "GS")