diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp index 61fd0849eb7..e2e33980226 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp @@ -411,7 +411,7 @@ Shader *Shader::translate_from_nir(nir_shader *nir, const pipe_stream_output_inf switch (nir->info.stage) { case MESA_SHADER_FRAGMENT: - shader = new FragmentShader(key); + shader = new FragmentShaderEG(key); break; case MESA_SHADER_VERTEX: shader = new VertexShader(so_info, gs_shader, key); diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp index 8a75c920797..c9dd9a51ce5 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_fs.cpp @@ -79,10 +79,10 @@ void FragmentShader::do_get_shader_info(r600_shader *sh_info) bool FragmentShader::load_input(nir_intrinsic_instr *intr) { auto& vf = value_factory(); - AluInstr *ir = nullptr; auto location = nir_intrinsic_io_semantics(intr).location; if (location == VARYING_SLOT_POS) { + AluInstr *ir = nullptr; for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) { ir = new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_none), @@ -96,41 +96,17 @@ bool FragmentShader::load_input(nir_intrinsic_instr *intr) } if (location == VARYING_SLOT_FACE) { - ir = new AluInstr(op2_setge_dx10, - vf.dest(intr->dest, 0, pin_none), - m_face_input, - vf.inline_const(ALU_SRC_0, 0), - AluInstr::last_write); + auto ir = new AluInstr(op2_setge_dx10, + vf.dest(intr->dest, 0, pin_none), + m_face_input, + vf.inline_const(ALU_SRC_0, 0), + AluInstr::last_write); set_input_gpr(nir_intrinsic_base(intr), m_face_input->sel()); - emit_instruction(ir); return true; } - auto io = input(nir_intrinsic_base(intr)); - auto comp = nir_intrinsic_component(intr); - bool need_temp = comp > 0 || !intr->dest.is_ssa; - for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) { - if (need_temp) { - auto tmp = vf.temp_register(comp + i); - ir = new AluInstr(op1_interp_load_p0, - tmp, - new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp), - AluInstr::last_write); - emit_instruction(ir); - emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write)); - } else { - - ir = new AluInstr(op1_interp_load_p0, - vf.dest(intr->dest, i, pin_chan), - new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i), - AluInstr::write); - emit_instruction(ir); - } - - } - ir->set_alu_flag(alu_last_instr); - return true; + return load_input_hw(intr); } bool FragmentShader::store_output(nir_intrinsic_instr *intr) @@ -193,22 +169,10 @@ barycentric_ij_index(nir_intrinsic_instr *intr) bool FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr) { - auto& vf = value_factory(); - switch (intr->intrinsic) { - case nir_intrinsic_load_barycentric_centroid: - case nir_intrinsic_load_barycentric_pixel: - case nir_intrinsic_load_barycentric_sample: { - unsigned ij = barycentric_ij_index(intr); - vf.inject_value(intr->dest, 0, m_interpolator[ij].i); - vf.inject_value(intr->dest, 1, m_interpolator[ij].j); + if (process_stage_intrinsic_hw(intr)) return true; - } - case nir_intrinsic_load_input: - return load_input(intr); - case nir_intrinsic_load_barycentric_at_offset: - return load_barycentric_at_offset(intr); - case nir_intrinsic_load_barycentric_at_sample: - return load_barycentric_at_sample(intr); + + switch (intr->intrinsic) { case nir_intrinsic_load_interpolated_input: return load_interpolated_input(intr); case nir_intrinsic_discard_if: @@ -250,176 +214,21 @@ bool FragmentShader::load_interpolated_input(nir_intrinsic_instr *intr) case VARYING_SLOT_POS: for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) vf.inject_value(intr->dest, i, m_pos_input[i]); - return true; + return true; case VARYING_SLOT_FACE: return false; default: ; } - auto param = nir_src_as_const_value(intr->src[1]); - assert(param && "Indirect PS inputs not (yet) supported"); - - int dest_num_comp = nir_dest_num_components(intr->dest); - int start_comp = nir_intrinsic_component(intr); - bool need_temp = start_comp > 0 || !intr->dest.is_ssa; - - auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan); - - InterpolateParams params; - - params.i = vf.src(intr->src[0], 0); - params.j = vf.src(intr->src[0], 1); - params.base = input(nir_intrinsic_base(intr)).lds_pos(); - - if (!load_interpolated(dst, params, dest_num_comp, start_comp)) - return false; - - if (need_temp) { - AluInstr *ir = nullptr; - for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { - auto real_dst = vf.dest(intr->dest, i, pin_chan); - ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write); - emit_instruction(ir); - } - assert(ir); - ir->set_alu_flag(alu_last_instr); - } - - return true; + return load_interpolated_input_hw(intr); } -bool FragmentShader::load_interpolated(RegisterVec4& dest, const InterpolateParams& params, - int num_dest_comp, int start_comp) -{ - sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i << ")" << "\n"; - - if (num_dest_comp == 1) { - switch (start_comp) { - case 0: return load_interpolated_one_comp(dest, params, op2_interp_x); - case 1: return load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 0, 1); - case 2: return load_interpolated_one_comp(dest, params, op2_interp_z); - case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 2, 3); - default: - assert(0); - } - } - - if (num_dest_comp == 2) { - switch (start_comp) { - case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3); - case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc); - case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) && - load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 0, 1); - default: - assert(0); - } - } - - if (num_dest_comp == 3 && start_comp == 0) - return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) && - load_interpolated_one_comp(dest, params, op2_interp_z); - - int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp; - - bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc); - success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3); - return success; -} - -bool FragmentShader::load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op) -{ - auto group = new AluGroup(); - bool success = true; - - AluInstr *ir = nullptr; - for (unsigned i = 0; i < 2 && success; ++i) { - int chan = i; - if (op == op2_interp_z) - chan += 2; - - - ir = new AluInstr(op, dest[chan], - i & 1 ? params.j : params.i, - new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan), - i == 0 ? AluInstr::write : AluInstr::last); - - ir->set_bank_swizzle(alu_vec_210); - success = group->add_instruction(ir); - } - ir->set_alu_flag(alu_last_instr); - if (success) - emit_instruction(group); - return success; -} - -bool FragmentShader::load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask) -{ - auto group = new AluGroup(); - bool success = true; - - AluInstr *ir = nullptr; - assert(params.j); - assert(params.i); - for (unsigned i = 0; i < 4 ; ++i) { - ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i, - new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i), - (writemask & (1 << i)) ? AluInstr::write : AluInstr::empty); - ir->set_bank_swizzle(alu_vec_210); - success = group->add_instruction(ir); - } - ir->set_alu_flag(alu_last_instr); - if (success) - emit_instruction(group); - return success; -} - -bool FragmentShader::load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, - UNUSED int start, int comp) -{ - auto group = new AluGroup(); - bool success = true; - AluInstr *ir = nullptr; - - for (int i = 0; i < 4 ; ++i) { - ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i, - new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i), - i == comp ? AluInstr::write : AluInstr::empty); - ir->set_bank_swizzle(alu_vec_210); - success = group->add_instruction(ir); - } - ir->set_alu_flag(alu_last_instr); - if (success) - emit_instruction(group); - - return success; -} int FragmentShader::do_allocate_reserved_registers() { - for (unsigned i = 0; i < s_max_interpolators; ++i) { - if (m_interpolators_used.test(i)) { - sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n"; - m_interpolator[i].enabled = true; - } - } - int num_baryc = 0; - for (int i = 0; i < 6; ++i) { - if (m_interpolator[i].enabled) { - sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n"; - unsigned sel = num_baryc / 2; - unsigned chan = 2 * (num_baryc % 2); - - m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1); - m_interpolator[i].i->pin_live_range(true, false); - - m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan); - m_interpolator[i].j->pin_live_range(true, false); - - m_interpolator[i].ij_index = num_baryc++; - } - } + int num_baryc = allocate_register_inputs(); int next_register = (num_baryc + 1) >> 1; @@ -658,90 +467,6 @@ bool FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id) } } -bool FragmentShader::load_barycentric_at_sample(nir_intrinsic_instr* instr) -{ - auto& vf = value_factory(); - RegisterVec4 slope = vf.temp_vec4(pin_group); - auto src = emit_load_to_register(vf.src(instr->src[0], 0)); - auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0, - R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float); - - fetch->set_fetch_flag(FetchInstr::srf_mode); - emit_instruction(fetch); - - auto grad = vf.temp_vec4(pin_group); - - auto interpolator = m_interpolator[barycentric_ij_index(instr)]; - assert(interpolator.enabled); - - RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group); - - auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0); - tex->set_tex_flag(TexInstr::grad_fine); - tex->set_tex_flag(TexInstr::x_unnormalized); - tex->set_tex_flag(TexInstr::y_unnormalized); - tex->set_tex_flag(TexInstr::z_unnormalized); - tex->set_tex_flag(TexInstr::w_unnormalized); - emit_instruction(tex); - - tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0); - tex->set_tex_flag(TexInstr::x_unnormalized); - tex->set_tex_flag(TexInstr::y_unnormalized); - tex->set_tex_flag(TexInstr::z_unnormalized); - tex->set_tex_flag(TexInstr::w_unnormalized); - tex->set_tex_flag(TexInstr::grad_fine); - emit_instruction(tex); - - auto tmp0 = vf.temp_register(); - auto tmp1 = vf.temp_register(); - - emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write})); - emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr})); - - emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write})); - emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr})); - - return true; -} - -bool FragmentShader::load_barycentric_at_offset(nir_intrinsic_instr* instr) -{ - auto& vf = value_factory(); - auto interpolator = m_interpolator[barycentric_ij_index(instr)]; - - auto help = vf.temp_vec4(pin_group); - RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group); - - auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0); - getgradh->set_tex_flag(TexInstr::x_unnormalized); - getgradh->set_tex_flag(TexInstr::y_unnormalized); - getgradh->set_tex_flag(TexInstr::z_unnormalized); - getgradh->set_tex_flag(TexInstr::w_unnormalized); - getgradh->set_tex_flag(TexInstr::grad_fine); - emit_instruction(getgradh); - - auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0); - getgradv->set_tex_flag(TexInstr::x_unnormalized); - getgradv->set_tex_flag(TexInstr::y_unnormalized); - getgradv->set_tex_flag(TexInstr::z_unnormalized); - getgradv->set_tex_flag(TexInstr::w_unnormalized); - getgradv->set_tex_flag(TexInstr::grad_fine); - emit_instruction(getgradv); - - auto ofs_x = vf.src(instr->src[0], 0); - auto ofs_y = vf.src(instr->src[0], 1); - auto tmp0 = vf.temp_register(); - auto tmp1 = vf.temp_register(); - emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write})); - emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr})); - emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write})); - emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr})); - - return true; -} - - - bool FragmentShader::emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs) { RegisterVec4::Swizzle swizzle; @@ -890,7 +615,319 @@ void FragmentShader::do_print_properties(std::ostream& os) const } -FragmentShader::Interpolator::Interpolator(): +bool FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr) +{ + auto& vf = value_factory(); + auto io = input(nir_intrinsic_base(intr)); + auto comp = nir_intrinsic_component(intr); + + bool need_temp = comp > 0 || !intr->dest.is_ssa; + AluInstr *ir = nullptr; + for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) { + if (need_temp) { + auto tmp = vf.temp_register(comp + i); + ir = new AluInstr(op1_interp_load_p0, + tmp, + new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp), + AluInstr::last_write); + emit_instruction(ir); + emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write)); + } else { + + ir = new AluInstr(op1_interp_load_p0, + vf.dest(intr->dest, i, pin_chan), + new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i), + AluInstr::write); + emit_instruction(ir); + } + + } + ir->set_alu_flag(alu_last_instr); + return true; +} + +bool FragmentShaderEG::allocate_register_inputs() +{ + for (unsigned i = 0; i < s_max_interpolators; ++i) { + if (interpolators_used(i)) { + sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n"; + m_interpolator[i].enabled = true; + } + } + + int num_baryc = 0; + for (int i = 0; i < 6; ++i) { + if (m_interpolator[i].enabled) { + sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n"; + unsigned sel = num_baryc / 2; + unsigned chan = 2 * (num_baryc % 2); + + m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1); + m_interpolator[i].i->pin_live_range(true, false); + + m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan); + m_interpolator[i].j->pin_live_range(true, false); + + m_interpolator[i].ij_index = num_baryc++; + } + } + return num_baryc; +} + +bool FragmentShaderEG::process_stage_intrinsic_hw(nir_intrinsic_instr *intr) +{ + auto& vf = value_factory(); + switch (intr->intrinsic) { + case nir_intrinsic_load_barycentric_centroid: + case nir_intrinsic_load_barycentric_pixel: + case nir_intrinsic_load_barycentric_sample: { + unsigned ij = barycentric_ij_index(intr); + vf.inject_value(intr->dest, 0, m_interpolator[ij].i); + vf.inject_value(intr->dest, 1, m_interpolator[ij].j); + return true; + } + case nir_intrinsic_load_input: + return load_input(intr); + case nir_intrinsic_load_barycentric_at_offset: + return load_barycentric_at_offset(intr); + case nir_intrinsic_load_barycentric_at_sample: + return load_barycentric_at_sample(intr); + default: + return false; + } +} + +bool FragmentShaderEG::load_interpolated_input_hw(nir_intrinsic_instr *intr) +{ + auto& vf = value_factory(); + auto param = nir_src_as_const_value(intr->src[1]); + assert(param && "Indirect PS inputs not (yet) supported"); + + int dest_num_comp = nir_dest_num_components(intr->dest); + int start_comp = nir_intrinsic_component(intr); + bool need_temp = start_comp > 0 || !intr->dest.is_ssa; + + auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan); + + InterpolateParams params; + + params.i = vf.src(intr->src[0], 0); + params.j = vf.src(intr->src[0], 1); + params.base = input(nir_intrinsic_base(intr)).lds_pos(); + + if (!load_interpolated(dst, params, dest_num_comp, start_comp)) + return false; + + if (need_temp) { + AluInstr *ir = nullptr; + for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { + auto real_dst = vf.dest(intr->dest, i, pin_chan); + ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write); + emit_instruction(ir); + } + assert(ir); + ir->set_alu_flag(alu_last_instr); + } + + return true; +} + +bool FragmentShaderEG::load_interpolated(RegisterVec4& dest, const InterpolateParams& params, + int num_dest_comp, int start_comp) +{ + sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i << ")" << "\n"; + + if (num_dest_comp == 1) { + switch (start_comp) { + case 0: return load_interpolated_one_comp(dest, params, op2_interp_x); + case 1: return load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1); + case 2: return load_interpolated_one_comp(dest, params, op2_interp_z); + case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 3); + default: + assert(0); + } + } + + if (num_dest_comp == 2) { + switch (start_comp) { + case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3); + case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc); + case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) && + load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1); + default: + assert(0); + } + } + + if (num_dest_comp == 3 && start_comp == 0) + return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) && + load_interpolated_one_comp(dest, params, op2_interp_z); + + int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp; + + bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc); + success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3); + return success; +} + + +bool FragmentShaderEG::load_barycentric_at_sample(nir_intrinsic_instr* instr) +{ + auto& vf = value_factory(); + RegisterVec4 slope = vf.temp_vec4(pin_group); + auto src = emit_load_to_register(vf.src(instr->src[0], 0)); + auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0, + R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float); + + fetch->set_fetch_flag(FetchInstr::srf_mode); + emit_instruction(fetch); + + auto grad = vf.temp_vec4(pin_group); + + auto interpolator = m_interpolator[barycentric_ij_index(instr)]; + assert(interpolator.enabled); + + RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group); + + auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0); + tex->set_tex_flag(TexInstr::grad_fine); + tex->set_tex_flag(TexInstr::x_unnormalized); + tex->set_tex_flag(TexInstr::y_unnormalized); + tex->set_tex_flag(TexInstr::z_unnormalized); + tex->set_tex_flag(TexInstr::w_unnormalized); + emit_instruction(tex); + + tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0); + tex->set_tex_flag(TexInstr::x_unnormalized); + tex->set_tex_flag(TexInstr::y_unnormalized); + tex->set_tex_flag(TexInstr::z_unnormalized); + tex->set_tex_flag(TexInstr::w_unnormalized); + tex->set_tex_flag(TexInstr::grad_fine); + emit_instruction(tex); + + auto tmp0 = vf.temp_register(); + auto tmp1 = vf.temp_register(); + + emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write})); + emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr})); + + emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write})); + emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr})); + + return true; +} + +bool FragmentShaderEG::load_barycentric_at_offset(nir_intrinsic_instr* instr) +{ + auto& vf = value_factory(); + auto interpolator = m_interpolator[barycentric_ij_index(instr)]; + + auto help = vf.temp_vec4(pin_group); + RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group); + + auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0); + getgradh->set_tex_flag(TexInstr::x_unnormalized); + getgradh->set_tex_flag(TexInstr::y_unnormalized); + getgradh->set_tex_flag(TexInstr::z_unnormalized); + getgradh->set_tex_flag(TexInstr::w_unnormalized); + getgradh->set_tex_flag(TexInstr::grad_fine); + emit_instruction(getgradh); + + auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0); + getgradv->set_tex_flag(TexInstr::x_unnormalized); + getgradv->set_tex_flag(TexInstr::y_unnormalized); + getgradv->set_tex_flag(TexInstr::z_unnormalized); + getgradv->set_tex_flag(TexInstr::w_unnormalized); + getgradv->set_tex_flag(TexInstr::grad_fine); + emit_instruction(getgradv); + + auto ofs_x = vf.src(instr->src[0], 0); + auto ofs_y = vf.src(instr->src[0], 1); + auto tmp0 = vf.temp_register(); + auto tmp1 = vf.temp_register(); + emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write})); + emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr})); + emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write})); + emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr})); + + return true; +} + +bool FragmentShaderEG::load_interpolated_one_comp(RegisterVec4& dest, + const InterpolateParams& params, + EAluOp op) +{ + auto group = new AluGroup(); + bool success = true; + + AluInstr *ir = nullptr; + for (unsigned i = 0; i < 2 && success; ++i) { + int chan = i; + if (op == op2_interp_z) + chan += 2; + + + ir = new AluInstr(op, dest[chan], + i & 1 ? params.j : params.i, + new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan), + i == 0 ? AluInstr::write : AluInstr::last); + + ir->set_bank_swizzle(alu_vec_210); + success = group->add_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + if (success) + emit_instruction(group); + return success; +} + +bool FragmentShaderEG::load_interpolated_two_comp(RegisterVec4& dest, + const InterpolateParams& params, + EAluOp op, int writemask) +{ + auto group = new AluGroup(); + bool success = true; + + AluInstr *ir = nullptr; + assert(params.j); + assert(params.i); + for (unsigned i = 0; i < 4 ; ++i) { + ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i, + new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i), + (writemask & (1 << i)) ? AluInstr::write : AluInstr::empty); + ir->set_bank_swizzle(alu_vec_210); + success = group->add_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + if (success) + emit_instruction(group); + return success; +} + +bool FragmentShaderEG::load_interpolated_two_comp_for_one(RegisterVec4& dest, + const InterpolateParams& params, EAluOp op, + int comp) +{ + auto group = new AluGroup(); + bool success = true; + AluInstr *ir = nullptr; + + for (int i = 0; i < 4 ; ++i) { + ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i, + new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i), + i == comp ? AluInstr::write : AluInstr::empty); + ir->set_bank_swizzle(alu_vec_210); + success = group->add_instruction(ir); + } + ir->set_alu_flag(alu_last_instr); + if (success) + emit_instruction(group); + + return success; +} + + +FragmentShaderEG::Interpolator::Interpolator(): enabled(false) { } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fs.h b/src/gallium/drivers/r600/sfn/sfn_shader_fs.h index 913e1e0cf58..23ca27b7955 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_fs.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader_fs.h @@ -39,22 +39,18 @@ public: bool process_stage_intrinsic(nir_intrinsic_instr *intr) override; -private: - class Interpolator { - public: - Interpolator(); - bool enabled : 1; - unsigned ij_index : 4; - PRegister i; - PRegister j; - }; - - struct InterpolateParams { - PVirtualValue i,j; - int base; - }; +protected: static const int s_max_interpolators = 6; + bool interpolators_used(int i) const {return m_interpolators_used.test(i);} +private: + + bool load_interpolated_input(nir_intrinsic_instr *intr); + + virtual bool allocate_register_inputs() = 0; + virtual bool load_input_hw(nir_intrinsic_instr *intr) = 0; + virtual bool process_stage_intrinsic_hw(nir_intrinsic_instr *intr) = 0; + virtual bool load_interpolated_input_hw(nir_intrinsic_instr *intr) = 0; bool do_scan_instruction(nir_instr *instr) override; int do_allocate_reserved_registers() override; @@ -63,17 +59,6 @@ private: bool scan_input(nir_intrinsic_instr *instr, int index_src_id); - bool load_barycentric_pixel(nir_intrinsic_instr *intr); - bool load_barycentric_at_sample(nir_intrinsic_instr* instr); - bool load_barycentric_at_offset(nir_intrinsic_instr* instr); - bool load_interpolated_input(nir_intrinsic_instr *intr); - bool load_interpolated(RegisterVec4& dest, const InterpolateParams& params, - int num_dest_comp, int start_comp); - - bool load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op); - bool load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask); - bool load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, - int start, int dest_slot); bool emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs); bool emit_load_sample_mask_in(nir_intrinsic_instr* instr); @@ -94,7 +79,6 @@ private: ExportInstr *m_last_pixel_export; std::bitset m_interpolators_used; - std::array m_interpolator; RegisterVec4 m_pos_input; Register *m_face_input{nullptr}; bool m_fs_write_all; @@ -109,6 +93,45 @@ private: int m_rat_base{0}; }; +class FragmentShaderEG : public FragmentShader { +public: + using FragmentShader::FragmentShader; + +private: + class Interpolator { + public: + Interpolator(); + bool enabled : 1; + unsigned ij_index : 4; + PRegister i; + PRegister j; + }; + + struct InterpolateParams { + PVirtualValue i,j; + int base; + }; + + bool allocate_register_inputs() override; + bool load_input_hw(nir_intrinsic_instr *intr) override; + bool process_stage_intrinsic_hw(nir_intrinsic_instr *intr) override; + bool load_interpolated_input_hw(nir_intrinsic_instr *intr) override; + + bool load_barycentric_pixel(nir_intrinsic_instr *intr); + bool load_barycentric_at_sample(nir_intrinsic_instr* instr); + bool load_barycentric_at_offset(nir_intrinsic_instr* instr); + bool load_interpolated(RegisterVec4& dest, const InterpolateParams& params, + int num_dest_comp, int start_comp); + + bool load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op); + bool load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask); + bool load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, + int dest_slot); + + std::array m_interpolator; + +}; + } #endif diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp index 1dde8ce1e3c..5073028f78f 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp @@ -3073,7 +3073,7 @@ Shader *TestShader::from_string(const std::string& s) Shader *shader = nullptr; if (line.substr(0,2) == "FS") - shader = new FragmentShader(key); + shader = new FragmentShaderEG(key); else if (line.substr(0,2) == "VS") shader = new VertexShader(nullptr, nullptr, key); else if (line.substr(0,2) == "GS")