r600/sfn: Factor out fragement shader class for EG
Pre-EG hardware handles the FS inputs differently, so we need to prepare a different code path. v2: Make m_interolators_used private (Filip) Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Reviewed-by: Filip Gawin <filip@gawin.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17822>
This commit is contained in:
parent
f9703ac34d
commit
8f96cd4db0
4 changed files with 377 additions and 317 deletions
|
|
@ -411,7 +411,7 @@ Shader *Shader::translate_from_nir(nir_shader *nir, const pipe_stream_output_inf
|
|||
|
||||
switch (nir->info.stage) {
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
shader = new FragmentShader(key);
|
||||
shader = new FragmentShaderEG(key);
|
||||
break;
|
||||
case MESA_SHADER_VERTEX:
|
||||
shader = new VertexShader(so_info, gs_shader, key);
|
||||
|
|
|
|||
|
|
@ -79,10 +79,10 @@ void FragmentShader::do_get_shader_info(r600_shader *sh_info)
|
|||
bool FragmentShader::load_input(nir_intrinsic_instr *intr)
|
||||
{
|
||||
auto& vf = value_factory();
|
||||
AluInstr *ir = nullptr;
|
||||
|
||||
auto location = nir_intrinsic_io_semantics(intr).location;
|
||||
if (location == VARYING_SLOT_POS) {
|
||||
AluInstr *ir = nullptr;
|
||||
for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
|
||||
ir = new AluInstr(op1_mov,
|
||||
vf.dest(intr->dest, i, pin_none),
|
||||
|
|
@ -96,41 +96,17 @@ bool FragmentShader::load_input(nir_intrinsic_instr *intr)
|
|||
}
|
||||
|
||||
if (location == VARYING_SLOT_FACE) {
|
||||
ir = new AluInstr(op2_setge_dx10,
|
||||
vf.dest(intr->dest, 0, pin_none),
|
||||
m_face_input,
|
||||
vf.inline_const(ALU_SRC_0, 0),
|
||||
AluInstr::last_write);
|
||||
auto ir = new AluInstr(op2_setge_dx10,
|
||||
vf.dest(intr->dest, 0, pin_none),
|
||||
m_face_input,
|
||||
vf.inline_const(ALU_SRC_0, 0),
|
||||
AluInstr::last_write);
|
||||
set_input_gpr(nir_intrinsic_base(intr), m_face_input->sel());
|
||||
|
||||
emit_instruction(ir);
|
||||
return true;
|
||||
}
|
||||
|
||||
auto io = input(nir_intrinsic_base(intr));
|
||||
auto comp = nir_intrinsic_component(intr);
|
||||
bool need_temp = comp > 0 || !intr->dest.is_ssa;
|
||||
for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
|
||||
if (need_temp) {
|
||||
auto tmp = vf.temp_register(comp + i);
|
||||
ir = new AluInstr(op1_interp_load_p0,
|
||||
tmp,
|
||||
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
|
||||
AluInstr::last_write);
|
||||
emit_instruction(ir);
|
||||
emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write));
|
||||
} else {
|
||||
|
||||
ir = new AluInstr(op1_interp_load_p0,
|
||||
vf.dest(intr->dest, i, pin_chan),
|
||||
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i),
|
||||
AluInstr::write);
|
||||
emit_instruction(ir);
|
||||
}
|
||||
|
||||
}
|
||||
ir->set_alu_flag(alu_last_instr);
|
||||
return true;
|
||||
return load_input_hw(intr);
|
||||
}
|
||||
|
||||
bool FragmentShader::store_output(nir_intrinsic_instr *intr)
|
||||
|
|
@ -193,22 +169,10 @@ barycentric_ij_index(nir_intrinsic_instr *intr)
|
|||
|
||||
bool FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
|
||||
{
|
||||
auto& vf = value_factory();
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_barycentric_centroid:
|
||||
case nir_intrinsic_load_barycentric_pixel:
|
||||
case nir_intrinsic_load_barycentric_sample: {
|
||||
unsigned ij = barycentric_ij_index(intr);
|
||||
vf.inject_value(intr->dest, 0, m_interpolator[ij].i);
|
||||
vf.inject_value(intr->dest, 1, m_interpolator[ij].j);
|
||||
if (process_stage_intrinsic_hw(intr))
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_load_input:
|
||||
return load_input(intr);
|
||||
case nir_intrinsic_load_barycentric_at_offset:
|
||||
return load_barycentric_at_offset(intr);
|
||||
case nir_intrinsic_load_barycentric_at_sample:
|
||||
return load_barycentric_at_sample(intr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
return load_interpolated_input(intr);
|
||||
case nir_intrinsic_discard_if:
|
||||
|
|
@ -250,176 +214,21 @@ bool FragmentShader::load_interpolated_input(nir_intrinsic_instr *intr)
|
|||
case VARYING_SLOT_POS:
|
||||
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i)
|
||||
vf.inject_value(intr->dest, i, m_pos_input[i]);
|
||||
return true;
|
||||
return true;
|
||||
case VARYING_SLOT_FACE:
|
||||
return false;
|
||||
default:
|
||||
;
|
||||
}
|
||||
|
||||
auto param = nir_src_as_const_value(intr->src[1]);
|
||||
assert(param && "Indirect PS inputs not (yet) supported");
|
||||
|
||||
int dest_num_comp = nir_dest_num_components(intr->dest);
|
||||
int start_comp = nir_intrinsic_component(intr);
|
||||
bool need_temp = start_comp > 0 || !intr->dest.is_ssa;
|
||||
|
||||
auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan);
|
||||
|
||||
InterpolateParams params;
|
||||
|
||||
params.i = vf.src(intr->src[0], 0);
|
||||
params.j = vf.src(intr->src[0], 1);
|
||||
params.base = input(nir_intrinsic_base(intr)).lds_pos();
|
||||
|
||||
if (!load_interpolated(dst, params, dest_num_comp, start_comp))
|
||||
return false;
|
||||
|
||||
if (need_temp) {
|
||||
AluInstr *ir = nullptr;
|
||||
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
|
||||
auto real_dst = vf.dest(intr->dest, i, pin_chan);
|
||||
ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write);
|
||||
emit_instruction(ir);
|
||||
}
|
||||
assert(ir);
|
||||
ir->set_alu_flag(alu_last_instr);
|
||||
}
|
||||
|
||||
return true;
|
||||
return load_interpolated_input_hw(intr);
|
||||
}
|
||||
|
||||
bool FragmentShader::load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
|
||||
int num_dest_comp, int start_comp)
|
||||
{
|
||||
sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i << ")" << "\n";
|
||||
|
||||
if (num_dest_comp == 1) {
|
||||
switch (start_comp) {
|
||||
case 0: return load_interpolated_one_comp(dest, params, op2_interp_x);
|
||||
case 1: return load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 0, 1);
|
||||
case 2: return load_interpolated_one_comp(dest, params, op2_interp_z);
|
||||
case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 2, 3);
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (num_dest_comp == 2) {
|
||||
switch (start_comp) {
|
||||
case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3);
|
||||
case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc);
|
||||
case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) &&
|
||||
load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 0, 1);
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (num_dest_comp == 3 && start_comp == 0)
|
||||
return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) &&
|
||||
load_interpolated_one_comp(dest, params, op2_interp_z);
|
||||
|
||||
int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp;
|
||||
|
||||
bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc);
|
||||
success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3);
|
||||
return success;
|
||||
}
|
||||
|
||||
bool FragmentShader::load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op)
|
||||
{
|
||||
auto group = new AluGroup();
|
||||
bool success = true;
|
||||
|
||||
AluInstr *ir = nullptr;
|
||||
for (unsigned i = 0; i < 2 && success; ++i) {
|
||||
int chan = i;
|
||||
if (op == op2_interp_z)
|
||||
chan += 2;
|
||||
|
||||
|
||||
ir = new AluInstr(op, dest[chan],
|
||||
i & 1 ? params.j : params.i,
|
||||
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
|
||||
i == 0 ? AluInstr::write : AluInstr::last);
|
||||
|
||||
ir->set_bank_swizzle(alu_vec_210);
|
||||
success = group->add_instruction(ir);
|
||||
}
|
||||
ir->set_alu_flag(alu_last_instr);
|
||||
if (success)
|
||||
emit_instruction(group);
|
||||
return success;
|
||||
}
|
||||
|
||||
bool FragmentShader::load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask)
|
||||
{
|
||||
auto group = new AluGroup();
|
||||
bool success = true;
|
||||
|
||||
AluInstr *ir = nullptr;
|
||||
assert(params.j);
|
||||
assert(params.i);
|
||||
for (unsigned i = 0; i < 4 ; ++i) {
|
||||
ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
|
||||
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
|
||||
(writemask & (1 << i)) ? AluInstr::write : AluInstr::empty);
|
||||
ir->set_bank_swizzle(alu_vec_210);
|
||||
success = group->add_instruction(ir);
|
||||
}
|
||||
ir->set_alu_flag(alu_last_instr);
|
||||
if (success)
|
||||
emit_instruction(group);
|
||||
return success;
|
||||
}
|
||||
|
||||
bool FragmentShader::load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op,
|
||||
UNUSED int start, int comp)
|
||||
{
|
||||
auto group = new AluGroup();
|
||||
bool success = true;
|
||||
AluInstr *ir = nullptr;
|
||||
|
||||
for (int i = 0; i < 4 ; ++i) {
|
||||
ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
|
||||
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
|
||||
i == comp ? AluInstr::write : AluInstr::empty);
|
||||
ir->set_bank_swizzle(alu_vec_210);
|
||||
success = group->add_instruction(ir);
|
||||
}
|
||||
ir->set_alu_flag(alu_last_instr);
|
||||
if (success)
|
||||
emit_instruction(group);
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
int FragmentShader::do_allocate_reserved_registers()
|
||||
{
|
||||
for (unsigned i = 0; i < s_max_interpolators; ++i) {
|
||||
if (m_interpolators_used.test(i)) {
|
||||
sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
|
||||
m_interpolator[i].enabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
int num_baryc = 0;
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
if (m_interpolator[i].enabled) {
|
||||
sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
|
||||
unsigned sel = num_baryc / 2;
|
||||
unsigned chan = 2 * (num_baryc % 2);
|
||||
|
||||
m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1);
|
||||
m_interpolator[i].i->pin_live_range(true, false);
|
||||
|
||||
m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan);
|
||||
m_interpolator[i].j->pin_live_range(true, false);
|
||||
|
||||
m_interpolator[i].ij_index = num_baryc++;
|
||||
}
|
||||
}
|
||||
int num_baryc = allocate_register_inputs();
|
||||
|
||||
int next_register = (num_baryc + 1) >> 1;
|
||||
|
||||
|
|
@ -658,90 +467,6 @@ bool FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id)
|
|||
}
|
||||
}
|
||||
|
||||
bool FragmentShader::load_barycentric_at_sample(nir_intrinsic_instr* instr)
|
||||
{
|
||||
auto& vf = value_factory();
|
||||
RegisterVec4 slope = vf.temp_vec4(pin_group);
|
||||
auto src = emit_load_to_register(vf.src(instr->src[0], 0));
|
||||
auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0,
|
||||
R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
|
||||
|
||||
fetch->set_fetch_flag(FetchInstr::srf_mode);
|
||||
emit_instruction(fetch);
|
||||
|
||||
auto grad = vf.temp_vec4(pin_group);
|
||||
|
||||
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
|
||||
assert(interpolator.enabled);
|
||||
|
||||
RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
|
||||
|
||||
auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0);
|
||||
tex->set_tex_flag(TexInstr::grad_fine);
|
||||
tex->set_tex_flag(TexInstr::x_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::y_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::z_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::w_unnormalized);
|
||||
emit_instruction(tex);
|
||||
|
||||
tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0);
|
||||
tex->set_tex_flag(TexInstr::x_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::y_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::z_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::w_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::grad_fine);
|
||||
emit_instruction(tex);
|
||||
|
||||
auto tmp0 = vf.temp_register();
|
||||
auto tmp1 = vf.temp_register();
|
||||
|
||||
emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
|
||||
emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
|
||||
|
||||
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write}));
|
||||
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr}));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FragmentShader::load_barycentric_at_offset(nir_intrinsic_instr* instr)
|
||||
{
|
||||
auto& vf = value_factory();
|
||||
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
|
||||
|
||||
auto help = vf.temp_vec4(pin_group);
|
||||
RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
|
||||
|
||||
auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0);
|
||||
getgradh->set_tex_flag(TexInstr::x_unnormalized);
|
||||
getgradh->set_tex_flag(TexInstr::y_unnormalized);
|
||||
getgradh->set_tex_flag(TexInstr::z_unnormalized);
|
||||
getgradh->set_tex_flag(TexInstr::w_unnormalized);
|
||||
getgradh->set_tex_flag(TexInstr::grad_fine);
|
||||
emit_instruction(getgradh);
|
||||
|
||||
auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0);
|
||||
getgradv->set_tex_flag(TexInstr::x_unnormalized);
|
||||
getgradv->set_tex_flag(TexInstr::y_unnormalized);
|
||||
getgradv->set_tex_flag(TexInstr::z_unnormalized);
|
||||
getgradv->set_tex_flag(TexInstr::w_unnormalized);
|
||||
getgradv->set_tex_flag(TexInstr::grad_fine);
|
||||
emit_instruction(getgradv);
|
||||
|
||||
auto ofs_x = vf.src(instr->src[0], 0);
|
||||
auto ofs_y = vf.src(instr->src[0], 1);
|
||||
auto tmp0 = vf.temp_register();
|
||||
auto tmp1 = vf.temp_register();
|
||||
emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
|
||||
emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
|
||||
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
|
||||
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr}));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool FragmentShader::emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs)
|
||||
{
|
||||
RegisterVec4::Swizzle swizzle;
|
||||
|
|
@ -890,7 +615,319 @@ void FragmentShader::do_print_properties(std::ostream& os) const
|
|||
}
|
||||
|
||||
|
||||
FragmentShader::Interpolator::Interpolator():
|
||||
bool FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr)
|
||||
{
|
||||
auto& vf = value_factory();
|
||||
auto io = input(nir_intrinsic_base(intr));
|
||||
auto comp = nir_intrinsic_component(intr);
|
||||
|
||||
bool need_temp = comp > 0 || !intr->dest.is_ssa;
|
||||
AluInstr *ir = nullptr;
|
||||
for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
|
||||
if (need_temp) {
|
||||
auto tmp = vf.temp_register(comp + i);
|
||||
ir = new AluInstr(op1_interp_load_p0,
|
||||
tmp,
|
||||
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
|
||||
AluInstr::last_write);
|
||||
emit_instruction(ir);
|
||||
emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write));
|
||||
} else {
|
||||
|
||||
ir = new AluInstr(op1_interp_load_p0,
|
||||
vf.dest(intr->dest, i, pin_chan),
|
||||
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i),
|
||||
AluInstr::write);
|
||||
emit_instruction(ir);
|
||||
}
|
||||
|
||||
}
|
||||
ir->set_alu_flag(alu_last_instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FragmentShaderEG::allocate_register_inputs()
|
||||
{
|
||||
for (unsigned i = 0; i < s_max_interpolators; ++i) {
|
||||
if (interpolators_used(i)) {
|
||||
sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
|
||||
m_interpolator[i].enabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
int num_baryc = 0;
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
if (m_interpolator[i].enabled) {
|
||||
sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
|
||||
unsigned sel = num_baryc / 2;
|
||||
unsigned chan = 2 * (num_baryc % 2);
|
||||
|
||||
m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1);
|
||||
m_interpolator[i].i->pin_live_range(true, false);
|
||||
|
||||
m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan);
|
||||
m_interpolator[i].j->pin_live_range(true, false);
|
||||
|
||||
m_interpolator[i].ij_index = num_baryc++;
|
||||
}
|
||||
}
|
||||
return num_baryc;
|
||||
}
|
||||
|
||||
bool FragmentShaderEG::process_stage_intrinsic_hw(nir_intrinsic_instr *intr)
|
||||
{
|
||||
auto& vf = value_factory();
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_barycentric_centroid:
|
||||
case nir_intrinsic_load_barycentric_pixel:
|
||||
case nir_intrinsic_load_barycentric_sample: {
|
||||
unsigned ij = barycentric_ij_index(intr);
|
||||
vf.inject_value(intr->dest, 0, m_interpolator[ij].i);
|
||||
vf.inject_value(intr->dest, 1, m_interpolator[ij].j);
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_load_input:
|
||||
return load_input(intr);
|
||||
case nir_intrinsic_load_barycentric_at_offset:
|
||||
return load_barycentric_at_offset(intr);
|
||||
case nir_intrinsic_load_barycentric_at_sample:
|
||||
return load_barycentric_at_sample(intr);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool FragmentShaderEG::load_interpolated_input_hw(nir_intrinsic_instr *intr)
|
||||
{
|
||||
auto& vf = value_factory();
|
||||
auto param = nir_src_as_const_value(intr->src[1]);
|
||||
assert(param && "Indirect PS inputs not (yet) supported");
|
||||
|
||||
int dest_num_comp = nir_dest_num_components(intr->dest);
|
||||
int start_comp = nir_intrinsic_component(intr);
|
||||
bool need_temp = start_comp > 0 || !intr->dest.is_ssa;
|
||||
|
||||
auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan);
|
||||
|
||||
InterpolateParams params;
|
||||
|
||||
params.i = vf.src(intr->src[0], 0);
|
||||
params.j = vf.src(intr->src[0], 1);
|
||||
params.base = input(nir_intrinsic_base(intr)).lds_pos();
|
||||
|
||||
if (!load_interpolated(dst, params, dest_num_comp, start_comp))
|
||||
return false;
|
||||
|
||||
if (need_temp) {
|
||||
AluInstr *ir = nullptr;
|
||||
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
|
||||
auto real_dst = vf.dest(intr->dest, i, pin_chan);
|
||||
ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write);
|
||||
emit_instruction(ir);
|
||||
}
|
||||
assert(ir);
|
||||
ir->set_alu_flag(alu_last_instr);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FragmentShaderEG::load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
|
||||
int num_dest_comp, int start_comp)
|
||||
{
|
||||
sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i << ")" << "\n";
|
||||
|
||||
if (num_dest_comp == 1) {
|
||||
switch (start_comp) {
|
||||
case 0: return load_interpolated_one_comp(dest, params, op2_interp_x);
|
||||
case 1: return load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1);
|
||||
case 2: return load_interpolated_one_comp(dest, params, op2_interp_z);
|
||||
case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 3);
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (num_dest_comp == 2) {
|
||||
switch (start_comp) {
|
||||
case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3);
|
||||
case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc);
|
||||
case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) &&
|
||||
load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1);
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (num_dest_comp == 3 && start_comp == 0)
|
||||
return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) &&
|
||||
load_interpolated_one_comp(dest, params, op2_interp_z);
|
||||
|
||||
int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp;
|
||||
|
||||
bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc);
|
||||
success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3);
|
||||
return success;
|
||||
}
|
||||
|
||||
|
||||
bool FragmentShaderEG::load_barycentric_at_sample(nir_intrinsic_instr* instr)
|
||||
{
|
||||
auto& vf = value_factory();
|
||||
RegisterVec4 slope = vf.temp_vec4(pin_group);
|
||||
auto src = emit_load_to_register(vf.src(instr->src[0], 0));
|
||||
auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0,
|
||||
R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
|
||||
|
||||
fetch->set_fetch_flag(FetchInstr::srf_mode);
|
||||
emit_instruction(fetch);
|
||||
|
||||
auto grad = vf.temp_vec4(pin_group);
|
||||
|
||||
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
|
||||
assert(interpolator.enabled);
|
||||
|
||||
RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
|
||||
|
||||
auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0);
|
||||
tex->set_tex_flag(TexInstr::grad_fine);
|
||||
tex->set_tex_flag(TexInstr::x_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::y_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::z_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::w_unnormalized);
|
||||
emit_instruction(tex);
|
||||
|
||||
tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0);
|
||||
tex->set_tex_flag(TexInstr::x_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::y_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::z_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::w_unnormalized);
|
||||
tex->set_tex_flag(TexInstr::grad_fine);
|
||||
emit_instruction(tex);
|
||||
|
||||
auto tmp0 = vf.temp_register();
|
||||
auto tmp1 = vf.temp_register();
|
||||
|
||||
emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
|
||||
emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
|
||||
|
||||
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write}));
|
||||
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr}));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FragmentShaderEG::load_barycentric_at_offset(nir_intrinsic_instr* instr)
|
||||
{
|
||||
auto& vf = value_factory();
|
||||
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
|
||||
|
||||
auto help = vf.temp_vec4(pin_group);
|
||||
RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
|
||||
|
||||
auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0);
|
||||
getgradh->set_tex_flag(TexInstr::x_unnormalized);
|
||||
getgradh->set_tex_flag(TexInstr::y_unnormalized);
|
||||
getgradh->set_tex_flag(TexInstr::z_unnormalized);
|
||||
getgradh->set_tex_flag(TexInstr::w_unnormalized);
|
||||
getgradh->set_tex_flag(TexInstr::grad_fine);
|
||||
emit_instruction(getgradh);
|
||||
|
||||
auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0);
|
||||
getgradv->set_tex_flag(TexInstr::x_unnormalized);
|
||||
getgradv->set_tex_flag(TexInstr::y_unnormalized);
|
||||
getgradv->set_tex_flag(TexInstr::z_unnormalized);
|
||||
getgradv->set_tex_flag(TexInstr::w_unnormalized);
|
||||
getgradv->set_tex_flag(TexInstr::grad_fine);
|
||||
emit_instruction(getgradv);
|
||||
|
||||
auto ofs_x = vf.src(instr->src[0], 0);
|
||||
auto ofs_y = vf.src(instr->src[0], 1);
|
||||
auto tmp0 = vf.temp_register();
|
||||
auto tmp1 = vf.temp_register();
|
||||
emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
|
||||
emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
|
||||
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
|
||||
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr}));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FragmentShaderEG::load_interpolated_one_comp(RegisterVec4& dest,
|
||||
const InterpolateParams& params,
|
||||
EAluOp op)
|
||||
{
|
||||
auto group = new AluGroup();
|
||||
bool success = true;
|
||||
|
||||
AluInstr *ir = nullptr;
|
||||
for (unsigned i = 0; i < 2 && success; ++i) {
|
||||
int chan = i;
|
||||
if (op == op2_interp_z)
|
||||
chan += 2;
|
||||
|
||||
|
||||
ir = new AluInstr(op, dest[chan],
|
||||
i & 1 ? params.j : params.i,
|
||||
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
|
||||
i == 0 ? AluInstr::write : AluInstr::last);
|
||||
|
||||
ir->set_bank_swizzle(alu_vec_210);
|
||||
success = group->add_instruction(ir);
|
||||
}
|
||||
ir->set_alu_flag(alu_last_instr);
|
||||
if (success)
|
||||
emit_instruction(group);
|
||||
return success;
|
||||
}
|
||||
|
||||
bool FragmentShaderEG::load_interpolated_two_comp(RegisterVec4& dest,
|
||||
const InterpolateParams& params,
|
||||
EAluOp op, int writemask)
|
||||
{
|
||||
auto group = new AluGroup();
|
||||
bool success = true;
|
||||
|
||||
AluInstr *ir = nullptr;
|
||||
assert(params.j);
|
||||
assert(params.i);
|
||||
for (unsigned i = 0; i < 4 ; ++i) {
|
||||
ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
|
||||
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
|
||||
(writemask & (1 << i)) ? AluInstr::write : AluInstr::empty);
|
||||
ir->set_bank_swizzle(alu_vec_210);
|
||||
success = group->add_instruction(ir);
|
||||
}
|
||||
ir->set_alu_flag(alu_last_instr);
|
||||
if (success)
|
||||
emit_instruction(group);
|
||||
return success;
|
||||
}
|
||||
|
||||
bool FragmentShaderEG::load_interpolated_two_comp_for_one(RegisterVec4& dest,
|
||||
const InterpolateParams& params, EAluOp op,
|
||||
int comp)
|
||||
{
|
||||
auto group = new AluGroup();
|
||||
bool success = true;
|
||||
AluInstr *ir = nullptr;
|
||||
|
||||
for (int i = 0; i < 4 ; ++i) {
|
||||
ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
|
||||
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
|
||||
i == comp ? AluInstr::write : AluInstr::empty);
|
||||
ir->set_bank_swizzle(alu_vec_210);
|
||||
success = group->add_instruction(ir);
|
||||
}
|
||||
ir->set_alu_flag(alu_last_instr);
|
||||
if (success)
|
||||
emit_instruction(group);
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
|
||||
FragmentShaderEG::Interpolator::Interpolator():
|
||||
enabled(false)
|
||||
{
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,22 +39,18 @@ public:
|
|||
|
||||
bool process_stage_intrinsic(nir_intrinsic_instr *intr) override;
|
||||
|
||||
private:
|
||||
class Interpolator {
|
||||
public:
|
||||
Interpolator();
|
||||
bool enabled : 1;
|
||||
unsigned ij_index : 4;
|
||||
PRegister i;
|
||||
PRegister j;
|
||||
};
|
||||
|
||||
struct InterpolateParams {
|
||||
PVirtualValue i,j;
|
||||
int base;
|
||||
};
|
||||
protected:
|
||||
|
||||
static const int s_max_interpolators = 6;
|
||||
bool interpolators_used(int i) const {return m_interpolators_used.test(i);}
|
||||
private:
|
||||
|
||||
bool load_interpolated_input(nir_intrinsic_instr *intr);
|
||||
|
||||
virtual bool allocate_register_inputs() = 0;
|
||||
virtual bool load_input_hw(nir_intrinsic_instr *intr) = 0;
|
||||
virtual bool process_stage_intrinsic_hw(nir_intrinsic_instr *intr) = 0;
|
||||
virtual bool load_interpolated_input_hw(nir_intrinsic_instr *intr) = 0;
|
||||
|
||||
bool do_scan_instruction(nir_instr *instr) override;
|
||||
int do_allocate_reserved_registers() override;
|
||||
|
|
@ -63,17 +59,6 @@ private:
|
|||
|
||||
bool scan_input(nir_intrinsic_instr *instr, int index_src_id);
|
||||
|
||||
bool load_barycentric_pixel(nir_intrinsic_instr *intr);
|
||||
bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
|
||||
bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
|
||||
bool load_interpolated_input(nir_intrinsic_instr *intr);
|
||||
bool load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
|
||||
int num_dest_comp, int start_comp);
|
||||
|
||||
bool load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op);
|
||||
bool load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask);
|
||||
bool load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op,
|
||||
int start, int dest_slot);
|
||||
|
||||
bool emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs);
|
||||
bool emit_load_sample_mask_in(nir_intrinsic_instr* instr);
|
||||
|
|
@ -94,7 +79,6 @@ private:
|
|||
ExportInstr *m_last_pixel_export;
|
||||
|
||||
std::bitset<s_max_interpolators> m_interpolators_used;
|
||||
std::array<Interpolator, s_max_interpolators> m_interpolator;
|
||||
RegisterVec4 m_pos_input;
|
||||
Register *m_face_input{nullptr};
|
||||
bool m_fs_write_all;
|
||||
|
|
@ -109,6 +93,45 @@ private:
|
|||
int m_rat_base{0};
|
||||
};
|
||||
|
||||
class FragmentShaderEG : public FragmentShader {
|
||||
public:
|
||||
using FragmentShader::FragmentShader;
|
||||
|
||||
private:
|
||||
class Interpolator {
|
||||
public:
|
||||
Interpolator();
|
||||
bool enabled : 1;
|
||||
unsigned ij_index : 4;
|
||||
PRegister i;
|
||||
PRegister j;
|
||||
};
|
||||
|
||||
struct InterpolateParams {
|
||||
PVirtualValue i,j;
|
||||
int base;
|
||||
};
|
||||
|
||||
bool allocate_register_inputs() override;
|
||||
bool load_input_hw(nir_intrinsic_instr *intr) override;
|
||||
bool process_stage_intrinsic_hw(nir_intrinsic_instr *intr) override;
|
||||
bool load_interpolated_input_hw(nir_intrinsic_instr *intr) override;
|
||||
|
||||
bool load_barycentric_pixel(nir_intrinsic_instr *intr);
|
||||
bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
|
||||
bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
|
||||
bool load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
|
||||
int num_dest_comp, int start_comp);
|
||||
|
||||
bool load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op);
|
||||
bool load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask);
|
||||
bool load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op,
|
||||
int dest_slot);
|
||||
|
||||
std::array<Interpolator, s_max_interpolators> m_interpolator;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -3073,7 +3073,7 @@ Shader *TestShader::from_string(const std::string& s)
|
|||
Shader *shader = nullptr;
|
||||
|
||||
if (line.substr(0,2) == "FS")
|
||||
shader = new FragmentShader(key);
|
||||
shader = new FragmentShaderEG(key);
|
||||
else if (line.substr(0,2) == "VS")
|
||||
shader = new VertexShader(nullptr, nullptr, key);
|
||||
else if (line.substr(0,2) == "GS")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue