mesa-24.0.2
-----BEGIN PGP SIGNATURE----- iQFGBAABCAAwFiEEV1Ud4VuWj2NBwkj2jY4xr8MkKKYFAmXffd4SHGVyaWNAZW5n ZXN0cm9tLmNoAAoJEI2OMa/DJCime7oH/3U7BdJqYW0o12sWEk0vHgi1eGYRzAK9 CZMMMNzNeBBNfBoDBMlHh+r6jnwzmMnOLYcQQIY3jn+QA/md9vdM9GyHxJgSyQcJ Up7/9dJeWr9lOvxKiJR1c0Wz6y8cr+aLYJMjVihnHCTFU51cLjh+W1hfOtRhQtVJ o8yDtNBXCLyQgyXdPWm/ANYDtYWyuEkaONHq8tL2KaGXM7txjeTn1j8E4/nQe1QQ 6jmQKGKm148ftoplssNBYyLMWg2f46Fbp3c4s6pJ3fHwCQel8BTV9Rq3mfjtDeVF P3rTvBAUZ8hV1Rh51/ZxbFIG6M3FFnm/4ryKH5zPuPQ6WsTthTM9SKk= =P30T -----END PGP SIGNATURE----- gpgsig -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEzgD2pEY1++nF3Ggr3Ztj+AXPXAMFAmXkohwACgkQ3Ztj+AXP XANvdA//aNtR+ZJh3mqwJ1iFvnlzl1B9hqw5fOczjiQs33qq3AD62XyDV+ZmP9Rg fevI8VMoDaFTBL+LRSdi5NIwEqSg7g9m2eU1I85YJB6f1yMeJlPOaHoKDhGHF5vK pJEgFc1bG6RyS6nmR2PJ1TZ93pSBWPDNSqkXAXm6BbY7+tcAmXSYBz1SgpyaBVC5 kZslxAkmN5OkITc0M8E2/1ph69Xo30673zGtQ92hZuYzlZpKNB8n+aQpo21nHWpj TkI36jjYkW0optGv13Xp2LAixmUvc1F+5uXPudGyw6VbpotutWfQR8cxslhvwUXq VAY3fV7wN71+L52dfIJvJGQ74brbJm+gxXZGjcdXjCO1Ux//TC8rAdtIv62DbHeQ Lt8eRL+mXCMpRNg/WoIjtYkKNsn/Hunxlvukcs+iFmPirkkEbYsr+82Hdut667Yc SX5PzayYCfqnmLx1BONmOX8kl1tA9hGyykrFXI6SawEclQ30+S0k6irfYzQmBlm/ jVaQCLvR8Y6A2Uj/Pp9sEbw4aR3JhmLPDRiaZMHkTPi0u9OY+YW1OHMBHMEmcACU COqu0Ks15WZt4c1mOJSJeocJ4A/OVfGHaJimL8vDXCsVovGQ5o6DcYgvIXm6k58p g2MYKZf2q7QwxEgkLN8qAVBcZmJGW4312VZhgI8Z0geDWkmK9lc= =Tln2 -----END PGP SIGNATURE----- Merge tag 'mesa-24.0.2' into 24/neroreflex mesa-24.0.2
This commit is contained in:
commit
097a0be263
87 changed files with 8813 additions and 500 deletions
|
|
@ -316,6 +316,7 @@ fedora/x86_64_build:
|
|||
|
||||
|
||||
.kernel+rootfs:
|
||||
timeout: 2h # 24.0-only change
|
||||
extends:
|
||||
- .container+build-rules
|
||||
- .debian-container
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ rustfmt:
|
|||
- rustfmt --verbose src/**/lib.rs
|
||||
- rustfmt --verbose src/**/main.rs
|
||||
|
||||
clang-format:
|
||||
.clang-format:
|
||||
extends:
|
||||
- .formatting-check
|
||||
- .lint-clang-format-rules
|
||||
|
|
|
|||
7756
.pick_status.json
7756
.pick_status.json
File diff suppressed because it is too large
Load diff
2
VERSION
2
VERSION
|
|
@ -1 +1 @@
|
|||
24.0.1
|
||||
24.0.2
|
||||
|
|
|
|||
|
|
@ -1359,7 +1359,7 @@ RADV driver environment variables
|
|||
``video_decode``
|
||||
enable experimental video decoding support
|
||||
``gsfastlaunch2``
|
||||
use GS_FAST_LAUNCH=2 for Mesh shaders (GFX11+)
|
||||
use GS_FAST_LAUNCH=2 for Mesh shaders (GFX11+ dGPUs only)
|
||||
|
||||
.. envvar:: RADV_TEX_ANISO
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ Release Notes
|
|||
|
||||
The release notes summarize what's new or changed in each Mesa release.
|
||||
|
||||
- :doc:`24.0.2 release notes <relnotes/24.0.2>`
|
||||
- :doc:`24.0.1 release notes <relnotes/24.0.1>`
|
||||
- :doc:`24.0.0 release notes <relnotes/24.0.0>`
|
||||
- :doc:`23.3.3 release notes <relnotes/23.3.3>`
|
||||
|
|
@ -409,6 +410,7 @@ The release notes summarize what's new or changed in each Mesa release.
|
|||
:maxdepth: 1
|
||||
:hidden:
|
||||
|
||||
24.0.2 <relnotes/24.0.2>
|
||||
24.0.1 <relnotes/24.0.1>
|
||||
24.0.0 <relnotes/24.0.0>
|
||||
23.3.3 <relnotes/23.3.3>
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ SHA256 checksum
|
|||
|
||||
::
|
||||
|
||||
TBD.
|
||||
f387192b08c471c545590dd12230a2a343244804b5fe866fec6aea02eab57613 mesa-24.0.1.tar.xz
|
||||
|
||||
|
||||
New features
|
||||
|
|
|
|||
230
docs/relnotes/24.0.2.rst
Normal file
230
docs/relnotes/24.0.2.rst
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
Mesa 24.0.2 Release Notes / 2024-02-28
|
||||
======================================
|
||||
|
||||
Mesa 24.0.2 is a bug fix release which fixes bugs found since the 24.0.1 release.
|
||||
|
||||
Mesa 24.0.2 implements the OpenGL 4.6 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.6. OpenGL
|
||||
4.6 is **only** available if requested at context creation.
|
||||
Compatibility contexts may report a lower version depending on each driver.
|
||||
|
||||
Mesa 24.0.2 implements the Vulkan 1.3 API, but the version reported by
|
||||
the apiVersion property of the VkPhysicalDeviceProperties struct
|
||||
depends on the particular driver being used.
|
||||
|
||||
SHA256 checksum
|
||||
---------------
|
||||
|
||||
::
|
||||
|
||||
TBD.
|
||||
|
||||
|
||||
New features
|
||||
------------
|
||||
|
||||
- None
|
||||
|
||||
|
||||
Bug fixes
|
||||
---------
|
||||
|
||||
- KHR-Single-GL46.arrays_of_arrays_gl.AtomicUsage fails on MTL
|
||||
- GTF-GL46.gtf42.GL3Tests.texture_storage.texture_storage_texture_as_framebuffer_attachment fails on MTL
|
||||
- [intel][anv][build][regression] - genX_grl.h:27:10: fatal error: grl/grl_cl_kernel.h: No such file or directory
|
||||
- RX 6600 VDPAU not recognizing HEVC_MAIN_10 correctly
|
||||
- Running an app on another AMD GPU (offload, DRI_PRIME) produces corrupted frames on Wayland.
|
||||
- VDPAU declares a texture as "immutable" without also setting its ImmutableLevels attribute.
|
||||
- RX6600 hardware HEVC video decode fails for VDPAU but works for VA-API. (Can lock up GPU!)
|
||||
- Rusticl panics when getting program build logs using opencl.hpp
|
||||
- ue5 game issues lighting Rog Ally 7080u (z1e)
|
||||
- Missing textures in RoboCop: Rogue City with mesh shaders enabled
|
||||
- radv: Multiview PSO forgets to export layer in some cases.
|
||||
- zink: flickering artifacts in Selaco
|
||||
|
||||
|
||||
Changes
|
||||
-------
|
||||
|
||||
Boyuan Zhang (1):
|
||||
|
||||
- radeonsi/vcn: only use multi slices reflist when available
|
||||
|
||||
Chia-I Wu (1):
|
||||
|
||||
- radv: fix pipeline stats mask
|
||||
|
||||
Chris Rankin (2):
|
||||
|
||||
- vdpau: Declare texture object as immutable using helper function.
|
||||
- vdpau: Refactor query for video surface formats.
|
||||
|
||||
Connor Abbott (1):
|
||||
|
||||
- tu: Follow pipeline compatibility rules for dynamic descriptors
|
||||
|
||||
Daniel Schürmann (1):
|
||||
|
||||
- spirv: Fix SpvOpExpectKHR
|
||||
|
||||
Daniel Stone (2):
|
||||
|
||||
- egl/wayland: Add opaque-equivalent FourCCs
|
||||
- egl/wayland: Fix EGL_EXT_present_opaque
|
||||
|
||||
Dave Airlie (2):
|
||||
|
||||
- nouveau/winsys: fix bda heap leak.
|
||||
- nvk: fix dri options leak.
|
||||
|
||||
David Rosca (1):
|
||||
|
||||
- frontends/va: Only set VP9 segmentation fields when segmentation is enabled
|
||||
|
||||
Eric Engestrom (10):
|
||||
|
||||
- docs: add sha256sum for 24.0.1
|
||||
- [24.0-only change] ci: increase the kernel+rootfs builds timeout to 2h
|
||||
- .pick_status.json: Update to c6e855b64b9015235462959b2b7f3e9fc34b2f1f
|
||||
- .pick_status.json: Update to dce20690542c84ac00509a6db7902dcfc90b25bb
|
||||
- .pick_status.json: Update to c12300844d3f084ca011a3f54f0cbaa9807418f0
|
||||
- .pick_status.json: Mark 3b927567ac927316eb11901f50ee1573ead44fd2 as denominated
|
||||
- .pick_status.json: Update to 423add61e2d5b6ab6b5505d1feec01b93609f8fc
|
||||
- .pick_status.json: Update to 4071c399a27932ea9253eb8a65d5725504bac6f3
|
||||
- .pick_status.json: Update to 82ff9204abab5267f82a9ce73f9dca1541ef5ee6
|
||||
- [24.0 only] disable clang-format
|
||||
|
||||
Erik Faye-Lund (1):
|
||||
|
||||
- mesa/main: allow GL_BGRA for FBOs
|
||||
|
||||
Faith Ekstrand (1):
|
||||
|
||||
- nvk: Invalidate the texture cache before MSAA resolves
|
||||
|
||||
Hans-Kristian Arntzen (1):
|
||||
|
||||
- radv: export multiview in VS/TES/GS for depth-only rendering
|
||||
|
||||
Iago Toral Quiroga (1):
|
||||
|
||||
- v3d,v3dv: fix BO allocation for shared vars
|
||||
|
||||
Ian Romanick (1):
|
||||
|
||||
- nir: Mark nir_intrinsic_load_global_block_intel as divergent
|
||||
|
||||
Jesse Natalie (1):
|
||||
|
||||
- dzn: Don't set view instancing mask until after the PSO
|
||||
|
||||
Jordan Justen (1):
|
||||
|
||||
- intel/dev: Add 2 additional ADL-N PCI ids
|
||||
|
||||
Juston Li (1):
|
||||
|
||||
- venus: fix image reqs cache store locking
|
||||
|
||||
Karol Herbst (3):
|
||||
|
||||
- zink: lower unaligned memory accesses
|
||||
- rusticl/program: fix CL_PROGRAM_BINARIES for devs with no builds
|
||||
- meson: do not pull in clc for clover
|
||||
|
||||
Konstantin Seurer (5):
|
||||
|
||||
- zink: Always set mfence->submit_count to the fence submit_count
|
||||
- Revert "zink: always force flushes when originating from api frontend"
|
||||
- llvmpipe: Use full subgroups when possible
|
||||
- gallivm: Consider the initial mask when terminating loops
|
||||
- ci: Update llvmpipe trace checksums
|
||||
|
||||
Lionel Landwerlin (8):
|
||||
|
||||
- vulkan/runtime: add helper to query attachment layout
|
||||
- anv: fixup push descriptor shader analysis
|
||||
- anv: reenable ANV_ALWAYS_BINDLESS
|
||||
- anv: fix Wa_16013994831 macros
|
||||
- anv: disable Wa_16013994831
|
||||
- intel/nir: only consider ray query variables in lowering
|
||||
- anv: limit depth flush on dynamic render pass suspend
|
||||
- anv: add missing generated file dep
|
||||
|
||||
Martin Roukala (né Peres) (1):
|
||||
|
||||
- radv/ci: switch vkcts-polaris10 from mupuf to KWS' farm
|
||||
|
||||
Michel Dänzer (1):
|
||||
|
||||
- egl/wayland: Flush after blitting to linear copy
|
||||
|
||||
Mike Blumenkrantz (25):
|
||||
|
||||
- zink: prune dmabuf export tracking when adding resource binds
|
||||
- zink: fix sparse bo placement
|
||||
- zink: zero allocate resident_defs array in ntv
|
||||
- zink: move sparse lowering up in file
|
||||
- zink: run sparse lowering after all optimization passes
|
||||
- zink: adjust swizzled deref loads by the variable component offset
|
||||
- zink: clamp zink_gfx_lib_cache::stages_present for generated tcs
|
||||
- zink: promote gpl libs freeing during shader destroy out of prog loop
|
||||
- zink: don't add VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT for sparse textures
|
||||
- zink: delete maxDescriptorBufferBindings checks
|
||||
- zink: avoid infinite recursion on (very) small BAR systems in bo alloc
|
||||
- zink: add checks/compat for low-spec descriptor buffer implementations
|
||||
- zink: add a second fence disambiguation case
|
||||
- zink: force host-visible allocations for MAP_COHERENT resources
|
||||
- zink: handle stencil_fallback in zink_clear_depth_stencil
|
||||
- zink: don't destroy the current batch state on context destroy
|
||||
- mesa: check driver format support for certain GetInternalformativ queries
|
||||
- vk/wsi/x11/sw: use swapchain depth for putimage
|
||||
- zink: only scan active batch states for free states if > 1 exist
|
||||
- zink: fix longstanding issue with active batch state recycling
|
||||
- zink: assert that batch_id is valid in zink_screen_check_last_finished()
|
||||
- zink: clamp in_rp clears to fb size
|
||||
- zink: fix (dynamic rendering) execution of scissored clears during flush
|
||||
- zink: lock buffer age when chundering swapchain for readback
|
||||
- zink: flag acquired swapchain image as readback target on acquire, not present
|
||||
|
||||
Patrick Lerda (3):
|
||||
|
||||
- r300: fix vertex_buffer related refcnt imbalance
|
||||
- r300: fix r300_destroy_context() related memory leaks
|
||||
- r300: fix memory leaks when register allocation fails
|
||||
|
||||
Pavel Ondračka (1):
|
||||
|
||||
- r300: add explicit flrp lowering
|
||||
|
||||
Rhys Perry (2):
|
||||
|
||||
- aco/ra: don't initialize assigned in initializer list
|
||||
- aco/ra: fix GFX9- writelane
|
||||
|
||||
Sagar Ghuge (1):
|
||||
|
||||
- nir: Allow nir_texop_tg4 in implicit derivative
|
||||
|
||||
Samuel Pitoiset (4):
|
||||
|
||||
- radv: fix RGP barrier reason for RP barriers inserted by the runtime
|
||||
- radv: enable GS_FAST_LAUNCH=2 by default for RDNA3 APUs (Phoenix)
|
||||
- spirv: only consider IO variables when adjusting patch locations for TES
|
||||
- radv: fix indirect dispatches on compute queue with conditional rendering on GFX7
|
||||
|
||||
Tapani Pälli (2):
|
||||
|
||||
- intel/blorp: disable use of REP16 independent of format
|
||||
- iris: make sure DS and TE are sent in pairs on >= gfx125
|
||||
|
||||
Yiwei Zhang (2):
|
||||
|
||||
- venus: force async pipeline create on threads creating descriptor pools
|
||||
- venus: fix the cmd stride used for qfb recording
|
||||
|
||||
thfrwn (1):
|
||||
|
||||
- mesa: fix off-by-one for newblock allocation in dlist_alloc
|
||||
|
|
@ -187,6 +187,8 @@ CHIPSET(0x46c3, adl_gt2, "ADL GT2", "Intel(R) Graphics")
|
|||
CHIPSET(0x46d0, adl_n, "ADL-N", "Intel(R) Graphics")
|
||||
CHIPSET(0x46d1, adl_n, "ADL-N", "Intel(R) Graphics")
|
||||
CHIPSET(0x46d2, adl_n, "ADL-N", "Intel(R) Graphics")
|
||||
CHIPSET(0x46d3, adl_n, "ADL-N", "Intel(R) Graphics")
|
||||
CHIPSET(0x46d4, adl_n, "ADL-N", "Intel(R) Graphics")
|
||||
|
||||
CHIPSET(0x9a40, tgl_gt2, "TGL GT2", "Intel(R) Xe Graphics")
|
||||
CHIPSET(0x9a49, tgl_gt2, "TGL GT2", "Intel(R) Xe Graphics")
|
||||
|
|
|
|||
|
|
@ -813,7 +813,6 @@ if _opencl != 'disabled'
|
|||
error('The Clover OpenCL state tracker requires rtti')
|
||||
endif
|
||||
|
||||
with_clc = true
|
||||
with_gallium_opencl = true
|
||||
with_opencl_icd = _opencl == 'icd'
|
||||
else
|
||||
|
|
@ -838,7 +837,7 @@ if with_gallium_rusticl
|
|||
endif
|
||||
|
||||
dep_clc = null_dep
|
||||
if with_clc
|
||||
if with_gallium_opencl or with_clc
|
||||
dep_clc = dependency('libclc')
|
||||
endif
|
||||
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ radeonsi-raven-va-full:x86_64:
|
|||
vkcts-polaris10-valve:
|
||||
extends:
|
||||
- .vkcts-test-valve
|
||||
- .polaris10-test-valve-mupuf
|
||||
- .polaris10-test-valve-kws
|
||||
- .radv-valve-manual-rules
|
||||
timeout: 1h 15m
|
||||
variables:
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ struct assignment {
|
|||
};
|
||||
uint32_t affinity = 0;
|
||||
assignment() = default;
|
||||
assignment(PhysReg reg_, RegClass rc_) : reg(reg_), rc(rc_), assigned(-1) {}
|
||||
assignment(PhysReg reg_, RegClass rc_) : reg(reg_), rc(rc_) { assigned = true; }
|
||||
void set(const Definition& def)
|
||||
{
|
||||
assigned = true;
|
||||
|
|
@ -1936,19 +1936,6 @@ bool
|
|||
operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg,
|
||||
RegClass rc)
|
||||
{
|
||||
bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 ||
|
||||
instr->opcode == aco_opcode::v_writelane_b32_e64;
|
||||
if (gfx_level <= GFX9 && is_writelane && idx <= 1) {
|
||||
/* v_writelane_b32 can take two sgprs but only if one is m0. */
|
||||
bool is_other_sgpr =
|
||||
instr->operands[!idx].isTemp() &&
|
||||
(!instr->operands[!idx].isFixed() || instr->operands[!idx].physReg() != m0);
|
||||
if (is_other_sgpr && instr->operands[!idx].tempId() != instr->operands[idx].tempId()) {
|
||||
instr->operands[idx].setFixed(m0);
|
||||
return reg == m0;
|
||||
}
|
||||
}
|
||||
|
||||
if (reg.byte()) {
|
||||
unsigned stride = get_subdword_operand_stride(gfx_level, instr, idx, rc);
|
||||
if (reg.byte() % stride)
|
||||
|
|
@ -2844,6 +2831,18 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
operand.isFixed() && ctx.assignments[operand.tempId()].reg != operand.physReg();
|
||||
}
|
||||
|
||||
bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 ||
|
||||
instr->opcode == aco_opcode::v_writelane_b32_e64;
|
||||
if (program->gfx_level <= GFX9 && is_writelane && instr->operands[0].isTemp() &&
|
||||
instr->operands[1].isTemp()) {
|
||||
/* v_writelane_b32 can take two sgprs but only if one is m0. */
|
||||
if (ctx.assignments[instr->operands[0].tempId()].reg != m0 &&
|
||||
ctx.assignments[instr->operands[1].tempId()].reg != m0) {
|
||||
instr->operands[0].setFixed(m0);
|
||||
fixed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (fixed)
|
||||
handle_fixed_operands(ctx, register_file, parallelcopy, instr);
|
||||
|
||||
|
|
|
|||
|
|
@ -410,3 +410,21 @@ BEGIN_TEST(regalloc.vinterp_fp16)
|
|||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(regalloc.writelane)
|
||||
//>> v1: %in0:v[0], s1: %in1:s[0], s1: %in2:s[1], s1: %in3:s[2] = p_startpgm
|
||||
if (!setup_cs("v1 s1 s1 s1", GFX8))
|
||||
return;
|
||||
|
||||
//! s1: %tmp:m0 = p_parallelcopy %int3:s[2]
|
||||
Temp tmp = bld.copy(bld.def(s1, m0), inputs[3]);
|
||||
|
||||
//! s1: %in1_2:m0, s1: %tmp_2:s[0] = p_parallelcopy %in1:s[0], %tmp:m0
|
||||
//! v1: %tmp2:v[0] = v_writelane_b32_e64 %in1_2:m0, %in2:s[1], %in0:v[0]
|
||||
Temp tmp2 = bld.writelane(bld.def(v1), inputs[1], inputs[2], inputs[0]);
|
||||
|
||||
//! p_unit_test %tmp_2:s[0], %tmp2:v[0]
|
||||
bld.pseudo(aco_opcode::p_unit_test, tmp, tmp2);
|
||||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
|
|
|
|||
|
|
@ -9665,12 +9665,16 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
|
|||
|
||||
if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
|
||||
uint64_t indirect_va = info->va;
|
||||
const bool needs_align32_workaround =
|
||||
cmd_buffer->device->physical_device->rad_info.has_async_compute_align32_bug &&
|
||||
cmd_buffer->qf == RADV_QUEUE_COMPUTE && !radv_is_aligned(indirect_va, 32);
|
||||
const unsigned ace_predication_size =
|
||||
4 /* DISPATCH_INDIRECT */ + (needs_align32_workaround ? 6 * 3 /* 3x COPY_DATA */ : 0);
|
||||
|
||||
radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
|
||||
&cmd_buffer->mec_inv_pred_emitted, 4 /* DISPATCH_INDIRECT size */);
|
||||
&cmd_buffer->mec_inv_pred_emitted, ace_predication_size);
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.has_async_compute_align32_bug &&
|
||||
cmd_buffer->qf == RADV_QUEUE_COMPUTE && !radv_is_aligned(indirect_va, 32)) {
|
||||
if (needs_align32_workaround) {
|
||||
const uint64_t unaligned_va = indirect_va;
|
||||
UNUSED void *ptr;
|
||||
uint32_t offset;
|
||||
|
|
@ -10642,8 +10646,15 @@ VKAPI_ATTR void VKAPI_CALL
|
|||
radv_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
enum rgp_barrier_reason barrier_reason;
|
||||
|
||||
radv_barrier(cmd_buffer, pDependencyInfo, RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER);
|
||||
if (cmd_buffer->vk.runtime_rp_barrier) {
|
||||
barrier_reason = RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC;
|
||||
} else {
|
||||
barrier_reason = RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER;
|
||||
}
|
||||
|
||||
radv_barrier(cmd_buffer, pDependencyInfo, barrier_reason);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -1178,11 +1178,12 @@ get_vs_output_info(const struct radv_graphics_pipeline *pipeline)
|
|||
static bool
|
||||
radv_should_export_multiview(const struct radv_shader_stage *stage, const struct radv_pipeline_key *pipeline_key)
|
||||
{
|
||||
/* Export the layer in the last VGT stage if multiview is used. When the next stage is unknown
|
||||
* (with graphics pipeline library), the layer is exported unconditionally.
|
||||
/* Export the layer in the last VGT stage if multiview is used.
|
||||
* Also checks for NONE stage, which happens when we have depth-only rendering.
|
||||
* When the next stage is unknown (with graphics pipeline library), the layer is exported unconditionally.
|
||||
*/
|
||||
return pipeline_key->has_multiview_view_index &&
|
||||
(stage->info.next_stage == MESA_SHADER_FRAGMENT ||
|
||||
(stage->info.next_stage == MESA_SHADER_FRAGMENT || stage->info.next_stage == MESA_SHADER_NONE ||
|
||||
!(pipeline_key->lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) &&
|
||||
!(stage->nir->info.outputs_written & VARYING_BIT_LAYER);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1155,7 +1155,7 @@ radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkPipeline *pipeline, stru
|
|||
.range = VK_WHOLE_SIZE}}});
|
||||
|
||||
/* Encode the number of elements for easy access by the shader. */
|
||||
pipeline_stats_mask &= (1 << radv_get_pipelinestat_query_size(device)) - 1;
|
||||
pipeline_stats_mask &= (1 << (radv_get_pipelinestat_query_size(device) / 8)) - 1;
|
||||
pipeline_stats_mask |= util_bitcount(pipeline_stats_mask) << 16;
|
||||
|
||||
avail_offset -= src_offset;
|
||||
|
|
|
|||
|
|
@ -4327,7 +4327,7 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
if (cs_variant->prog_data.cs->shared_size > 0) {
|
||||
job->csd.shared_memory =
|
||||
v3dv_bo_alloc(cmd_buffer->device,
|
||||
cs_variant->prog_data.cs->shared_size * wgs_per_sg,
|
||||
cs_variant->prog_data.cs->shared_size * num_wgs,
|
||||
"shared_vars", true);
|
||||
if (!job->csd.shared_memory) {
|
||||
v3dv_flag_oom(cmd_buffer, NULL);
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ subdir('nir')
|
|||
|
||||
subdir('spirv')
|
||||
|
||||
if with_opencl_spirv
|
||||
if with_clc
|
||||
subdir('clc')
|
||||
endif
|
||||
if with_gallium
|
||||
|
|
|
|||
|
|
@ -3133,6 +3133,8 @@ nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
|
|||
case nir_texop_txb:
|
||||
case nir_texop_lod:
|
||||
return true;
|
||||
case nir_texop_tg4:
|
||||
return instr->is_gather_implicit_lod;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -189,7 +189,6 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_load_resume_shader_address_amd:
|
||||
case nir_intrinsic_load_global_const_block_intel:
|
||||
case nir_intrinsic_load_reloc_const_intel:
|
||||
case nir_intrinsic_load_global_block_intel:
|
||||
case nir_intrinsic_load_btd_global_arg_addr_intel:
|
||||
case nir_intrinsic_load_btd_local_arg_addr_intel:
|
||||
case nir_intrinsic_load_mesh_inline_data_intel:
|
||||
|
|
@ -219,6 +218,13 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
|
|||
is_divergent = false;
|
||||
break;
|
||||
|
||||
/* This is divergent because it specifically loads sequential values into
|
||||
* successive SIMD lanes.
|
||||
*/
|
||||
case nir_intrinsic_load_global_block_intel:
|
||||
is_divergent = true;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_decl_reg:
|
||||
is_divergent = nir_intrinsic_divergent(instr);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -4383,6 +4383,7 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
|
|||
break;
|
||||
}
|
||||
case SpvOpCopyObject:
|
||||
case SpvOpExpectKHR:
|
||||
vtn_copy_value(b, w[3], w[2]);
|
||||
return;
|
||||
|
||||
|
|
@ -6458,18 +6459,18 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
|
|||
vtn_handle_integer_dot(b, opcode, w, count);
|
||||
break;
|
||||
|
||||
case SpvOpBitcast:
|
||||
vtn_handle_bitcast(b, w, count);
|
||||
break;
|
||||
|
||||
/* TODO: One day, we should probably do something with this information
|
||||
* For now, though, it's safe to implement them as no-ops.
|
||||
* Needed for Rusticl sycl support.
|
||||
*/
|
||||
case SpvOpAssumeTrueKHR:
|
||||
break;
|
||||
|
||||
case SpvOpExpectKHR:
|
||||
break;
|
||||
|
||||
case SpvOpBitcast:
|
||||
vtn_handle_bitcast(b, w, count);
|
||||
break;
|
||||
|
||||
case SpvOpVectorExtractDynamic:
|
||||
case SpvOpVectorInsertDynamic:
|
||||
case SpvOpVectorShuffle:
|
||||
|
|
|
|||
|
|
@ -2024,7 +2024,9 @@ adjust_patch_locations(struct vtn_builder *b, struct vtn_variable *var)
|
|||
|
||||
for (uint16_t i = 0; i < num_data; i++) {
|
||||
vtn_assert(data[i].location < VARYING_SLOT_PATCH0);
|
||||
if (data[i].patch && data[i].location >= VARYING_SLOT_VAR0)
|
||||
if (data[i].patch &&
|
||||
(data[i].mode == nir_var_shader_in || data[i].mode == nir_var_shader_out) &&
|
||||
data[i].location >= VARYING_SLOT_VAR0)
|
||||
data[i].location += VARYING_SLOT_PATCH0 - VARYING_SLOT_VAR0;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,6 +73,7 @@ static const struct dri2_wl_visual {
|
|||
*/
|
||||
int alt_dri_image_format;
|
||||
int bpp;
|
||||
int opaque_wl_drm_format;
|
||||
int rgba_shifts[4];
|
||||
unsigned int rgba_sizes[4];
|
||||
} dri2_wl_visuals[] = {
|
||||
|
|
@ -83,6 +84,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_ABGR16161616F,
|
||||
0,
|
||||
64,
|
||||
WL_DRM_FORMAT_XBGR16F,
|
||||
{0, 16, 32, 48},
|
||||
{16, 16, 16, 16},
|
||||
},
|
||||
|
|
@ -93,6 +95,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_XBGR16161616F,
|
||||
0,
|
||||
64,
|
||||
WL_DRM_FORMAT_XBGR16F,
|
||||
{0, 16, 32, -1},
|
||||
{16, 16, 16, 0},
|
||||
},
|
||||
|
|
@ -103,6 +106,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_XRGB2101010,
|
||||
__DRI_IMAGE_FORMAT_XBGR2101010,
|
||||
32,
|
||||
WL_DRM_FORMAT_XRGB2101010,
|
||||
{20, 10, 0, -1},
|
||||
{10, 10, 10, 0},
|
||||
},
|
||||
|
|
@ -113,6 +117,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_ARGB2101010,
|
||||
__DRI_IMAGE_FORMAT_ABGR2101010,
|
||||
32,
|
||||
WL_DRM_FORMAT_XRGB2101010,
|
||||
{20, 10, 0, 30},
|
||||
{10, 10, 10, 2},
|
||||
},
|
||||
|
|
@ -123,6 +128,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_XBGR2101010,
|
||||
__DRI_IMAGE_FORMAT_XRGB2101010,
|
||||
32,
|
||||
WL_DRM_FORMAT_XBGR2101010,
|
||||
{0, 10, 20, -1},
|
||||
{10, 10, 10, 0},
|
||||
},
|
||||
|
|
@ -133,6 +139,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_ABGR2101010,
|
||||
__DRI_IMAGE_FORMAT_ARGB2101010,
|
||||
32,
|
||||
WL_DRM_FORMAT_XBGR2101010,
|
||||
{0, 10, 20, 30},
|
||||
{10, 10, 10, 2},
|
||||
},
|
||||
|
|
@ -143,6 +150,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_XRGB8888,
|
||||
__DRI_IMAGE_FORMAT_NONE,
|
||||
32,
|
||||
WL_DRM_FORMAT_XRGB8888,
|
||||
{16, 8, 0, -1},
|
||||
{8, 8, 8, 0},
|
||||
},
|
||||
|
|
@ -153,6 +161,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_ARGB8888,
|
||||
__DRI_IMAGE_FORMAT_NONE,
|
||||
32,
|
||||
WL_DRM_FORMAT_XRGB8888,
|
||||
{16, 8, 0, 24},
|
||||
{8, 8, 8, 8},
|
||||
},
|
||||
|
|
@ -163,6 +172,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_ABGR8888,
|
||||
__DRI_IMAGE_FORMAT_NONE,
|
||||
32,
|
||||
WL_DRM_FORMAT_XBGR8888,
|
||||
{0, 8, 16, 24},
|
||||
{8, 8, 8, 8},
|
||||
},
|
||||
|
|
@ -173,6 +183,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_XBGR8888,
|
||||
__DRI_IMAGE_FORMAT_NONE,
|
||||
32,
|
||||
WL_DRM_FORMAT_XBGR8888,
|
||||
{0, 8, 16, -1},
|
||||
{8, 8, 8, 0},
|
||||
},
|
||||
|
|
@ -183,6 +194,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_RGB565,
|
||||
__DRI_IMAGE_FORMAT_NONE,
|
||||
16,
|
||||
WL_DRM_FORMAT_RGB565,
|
||||
{11, 5, 0, -1},
|
||||
{5, 6, 5, 0},
|
||||
},
|
||||
|
|
@ -193,6 +205,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_ARGB1555,
|
||||
__DRI_IMAGE_FORMAT_ABGR1555,
|
||||
16,
|
||||
WL_DRM_FORMAT_XRGB1555,
|
||||
{10, 5, 0, 15},
|
||||
{5, 5, 5, 1},
|
||||
},
|
||||
|
|
@ -203,6 +216,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_XRGB1555,
|
||||
__DRI_IMAGE_FORMAT_XBGR1555,
|
||||
16,
|
||||
WL_DRM_FORMAT_XRGB1555,
|
||||
{10, 5, 0, -1},
|
||||
{5, 5, 5, 0},
|
||||
},
|
||||
|
|
@ -213,6 +227,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_ARGB4444,
|
||||
__DRI_IMAGE_FORMAT_XBGR4444,
|
||||
16,
|
||||
WL_DRM_FORMAT_XRGB4444,
|
||||
{8, 4, 0, 12},
|
||||
{4, 4, 4, 4},
|
||||
},
|
||||
|
|
@ -223,6 +238,7 @@ static const struct dri2_wl_visual {
|
|||
__DRI_IMAGE_FORMAT_XRGB4444,
|
||||
__DRI_IMAGE_FORMAT_XBGR4444,
|
||||
16,
|
||||
WL_DRM_FORMAT_XRGB4444,
|
||||
{8, 4, 0, -1},
|
||||
{4, 4, 4, 0},
|
||||
},
|
||||
|
|
@ -230,7 +246,7 @@ static const struct dri2_wl_visual {
|
|||
|
||||
static int
|
||||
dri2_wl_visual_idx_from_config(struct dri2_egl_display *dri2_dpy,
|
||||
const __DRIconfig *config, bool force_opaque)
|
||||
const __DRIconfig *config)
|
||||
{
|
||||
int shifts[4];
|
||||
unsigned int sizes[4];
|
||||
|
|
@ -240,16 +256,13 @@ dri2_wl_visual_idx_from_config(struct dri2_egl_display *dri2_dpy,
|
|||
for (unsigned int i = 0; i < ARRAY_SIZE(dri2_wl_visuals); i++) {
|
||||
const struct dri2_wl_visual *wl_visual = &dri2_wl_visuals[i];
|
||||
|
||||
int cmp_rgb_shifts =
|
||||
memcmp(shifts, wl_visual->rgba_shifts, 3 * sizeof(shifts[0]));
|
||||
int cmp_rgb_sizes =
|
||||
memcmp(sizes, wl_visual->rgba_sizes, 3 * sizeof(sizes[0]));
|
||||
int cmp_rgba_shifts =
|
||||
memcmp(shifts, wl_visual->rgba_shifts, 4 * sizeof(shifts[0]));
|
||||
int cmp_rgba_sizes =
|
||||
memcmp(sizes, wl_visual->rgba_sizes, 4 * sizeof(sizes[0]));
|
||||
|
||||
if (cmp_rgb_shifts == 0 && cmp_rgb_sizes == 0 &&
|
||||
wl_visual->rgba_shifts[3] == (force_opaque ? -1 : shifts[3]) &&
|
||||
wl_visual->rgba_sizes[3] == (force_opaque ? 0 : sizes[3])) {
|
||||
if (cmp_rgba_shifts == 0 && cmp_rgba_sizes == 0)
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
|
|
@ -302,7 +315,7 @@ dri2_wl_is_format_supported(void *user_data, uint32_t format)
|
|||
|
||||
for (int i = 0; dri2_dpy->driver_configs[i]; i++)
|
||||
if (j == dri2_wl_visual_idx_from_config(
|
||||
dri2_dpy, dri2_dpy->driver_configs[i], false))
|
||||
dri2_dpy, dri2_dpy->driver_configs[i]))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
|
@ -710,43 +723,10 @@ dri2_wl_create_window_surface(_EGLDisplay *disp, _EGLConfig *conf,
|
|||
dri2_surf->base.Width = window->width;
|
||||
dri2_surf->base.Height = window->height;
|
||||
|
||||
#ifndef NDEBUG
|
||||
/* Enforce that every visual has an opaque variant (requirement to support
|
||||
* EGL_EXT_present_opaque)
|
||||
*/
|
||||
for (unsigned int i = 0; i < ARRAY_SIZE(dri2_wl_visuals); i++) {
|
||||
const struct dri2_wl_visual *transparent_visual = &dri2_wl_visuals[i];
|
||||
if (transparent_visual->rgba_sizes[3] == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool found_opaque_equivalent = false;
|
||||
for (unsigned int j = 0; j < ARRAY_SIZE(dri2_wl_visuals); j++) {
|
||||
const struct dri2_wl_visual *opaque_visual = &dri2_wl_visuals[j];
|
||||
if (opaque_visual->rgba_sizes[3] != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int cmp_rgb_shifts =
|
||||
memcmp(transparent_visual->rgba_shifts, opaque_visual->rgba_shifts,
|
||||
3 * sizeof(opaque_visual->rgba_shifts[0]));
|
||||
int cmp_rgb_sizes =
|
||||
memcmp(transparent_visual->rgba_sizes, opaque_visual->rgba_sizes,
|
||||
3 * sizeof(opaque_visual->rgba_sizes[0]));
|
||||
|
||||
if (cmp_rgb_shifts == 0 && cmp_rgb_sizes == 0) {
|
||||
found_opaque_equivalent = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(found_opaque_equivalent);
|
||||
}
|
||||
#endif
|
||||
|
||||
visual_idx = dri2_wl_visual_idx_from_config(dri2_dpy, config,
|
||||
dri2_surf->base.PresentOpaque);
|
||||
visual_idx = dri2_wl_visual_idx_from_config(dri2_dpy, config);
|
||||
assert(visual_idx != -1);
|
||||
assert(dri2_wl_visuals[visual_idx].dri_image_format !=
|
||||
__DRI_IMAGE_FORMAT_NONE);
|
||||
|
||||
if (dri2_dpy->wl_dmabuf || dri2_dpy->wl_drm) {
|
||||
dri2_surf->format = dri2_wl_visuals[visual_idx].wl_drm_format;
|
||||
|
|
@ -1501,6 +1481,9 @@ create_wl_buffer(struct dri2_egl_display *dri2_dpy,
|
|||
close(fd);
|
||||
}
|
||||
|
||||
if (dri2_surf && dri2_surf->base.PresentOpaque)
|
||||
fourcc = dri2_wl_visuals[visual_idx].opaque_wl_drm_format;
|
||||
|
||||
ret = zwp_linux_buffer_params_v1_create_immed(params, width, height,
|
||||
fourcc, 0);
|
||||
zwp_linux_buffer_params_v1_destroy(params);
|
||||
|
|
@ -1643,6 +1626,12 @@ dri2_wl_swap_buffers_with_damage(_EGLDisplay *disp, _EGLSurface *draw,
|
|||
dri2_surf->current->dri_image, 0, 0, dri2_surf->base.Width,
|
||||
dri2_surf->base.Height, 0, 0, dri2_surf->base.Width,
|
||||
dri2_surf->base.Height, 0);
|
||||
|
||||
if (dri2_dpy->flush) {
|
||||
__DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(draw);
|
||||
|
||||
dri2_dpy->flush->flush(dri_drawable);
|
||||
}
|
||||
}
|
||||
|
||||
wl_surface_commit(dri2_surf->wl_surface_wrapper);
|
||||
|
|
@ -2078,7 +2067,7 @@ dri2_wl_add_configs_for_visuals(_EGLDisplay *disp)
|
|||
|
||||
/* No match for config. Try if we can blitImage convert to a visual */
|
||||
c = dri2_wl_visual_idx_from_config(dri2_dpy,
|
||||
dri2_dpy->driver_configs[i], false);
|
||||
dri2_dpy->driver_configs[i]);
|
||||
|
||||
if (c == -1)
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -2093,7 +2093,7 @@ tu_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
|
|||
memset(&cmd_buffer->descriptors[i].push_set, 0, sizeof(cmd_buffer->descriptors[i].push_set));
|
||||
cmd_buffer->descriptors[i].push_set.base.type = VK_OBJECT_TYPE_DESCRIPTOR_SET;
|
||||
cmd_buffer->descriptors[i].max_sets_bound = 0;
|
||||
cmd_buffer->descriptors[i].dynamic_bound = 0;
|
||||
cmd_buffer->descriptors[i].max_dynamic_offset_size = 0;
|
||||
}
|
||||
|
||||
u_trace_fini(&cmd_buffer->trace);
|
||||
|
|
@ -2385,12 +2385,12 @@ tu6_emit_descriptor_sets(struct tu_cmd_buffer *cmd,
|
|||
cmd->state.desc_sets =
|
||||
tu_cs_draw_state(&cmd->sub_cs, &state_cs,
|
||||
4 + 4 * descriptors_state->max_sets_bound +
|
||||
(descriptors_state->dynamic_bound ? 6 : 0));
|
||||
(descriptors_state->max_dynamic_offset_size ? 6 : 0));
|
||||
} else {
|
||||
cmd->state.desc_sets =
|
||||
tu_cs_draw_state(&cmd->sub_cs, &state_cs,
|
||||
3 + 2 * descriptors_state->max_sets_bound +
|
||||
(descriptors_state->dynamic_bound ? 3 : 0));
|
||||
(descriptors_state->max_dynamic_offset_size ? 3 : 0));
|
||||
}
|
||||
cs = &state_cs;
|
||||
} else {
|
||||
|
|
@ -2410,7 +2410,7 @@ tu6_emit_descriptor_sets(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
|
||||
/* Dynamic descriptors get the reserved descriptor set. */
|
||||
if (descriptors_state->dynamic_bound) {
|
||||
if (descriptors_state->max_dynamic_offset_size) {
|
||||
int reserved_set_idx = cmd->device->physical_device->reserved_set_idx;
|
||||
assert(reserved_set_idx >= 0); /* reserved set must be bound */
|
||||
|
||||
|
|
@ -2561,22 +2561,26 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
|
|||
assert(dyn_idx == dynamicOffsetCount);
|
||||
|
||||
if (dynamic_offset_offset) {
|
||||
descriptors_state->max_dynamic_offset_size =
|
||||
MAX2(descriptors_state->max_dynamic_offset_size, dynamic_offset_offset);
|
||||
|
||||
/* allocate and fill out dynamic descriptor set */
|
||||
struct tu_cs_memory dynamic_desc_set;
|
||||
int reserved_set_idx = cmd->device->physical_device->reserved_set_idx;
|
||||
VkResult result = tu_cs_alloc(&cmd->sub_cs,
|
||||
dynamic_offset_offset / (4 * A6XX_TEX_CONST_DWORDS),
|
||||
A6XX_TEX_CONST_DWORDS, &dynamic_desc_set);
|
||||
VkResult result =
|
||||
tu_cs_alloc(&cmd->sub_cs,
|
||||
descriptors_state->max_dynamic_offset_size /
|
||||
(4 * A6XX_TEX_CONST_DWORDS),
|
||||
A6XX_TEX_CONST_DWORDS, &dynamic_desc_set);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(dynamic_desc_set.map, descriptors_state->dynamic_descriptors,
|
||||
dynamic_offset_offset);
|
||||
descriptors_state->max_dynamic_offset_size);
|
||||
assert(reserved_set_idx >= 0); /* reserved set must be bound */
|
||||
descriptors_state->set_iova[reserved_set_idx] = dynamic_desc_set.iova | BINDLESS_DESCRIPTOR_64B;
|
||||
descriptors_state->dynamic_bound = true;
|
||||
}
|
||||
|
||||
tu_dirty_desc_sets(cmd, pipelineBindPoint);
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ struct tu_descriptor_state
|
|||
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS_SIZE];
|
||||
uint64_t set_iova[MAX_SETS];
|
||||
uint32_t max_sets_bound;
|
||||
bool dynamic_bound;
|
||||
uint32_t max_dynamic_offset_size;
|
||||
};
|
||||
|
||||
enum tu_cmd_dirty_bits
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
**************************************************************************/
|
||||
|
||||
#include "util/u_memory.h"
|
||||
#include "lp_bld_const.h"
|
||||
#include "lp_bld_type.h"
|
||||
#include "lp_bld_init.h"
|
||||
#include "lp_bld_flow.h"
|
||||
|
|
@ -271,18 +272,17 @@ void lp_exec_bgnloop(struct lp_exec_mask *mask, bool load)
|
|||
}
|
||||
|
||||
void lp_exec_endloop(struct gallivm_state *gallivm,
|
||||
struct lp_exec_mask *mask)
|
||||
struct lp_exec_mask *exec_mask,
|
||||
struct lp_build_mask_context *mask)
|
||||
{
|
||||
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
||||
struct function_ctx *ctx = func_ctx(mask);
|
||||
LLVMBuilderRef builder = exec_mask->bld->gallivm->builder;
|
||||
struct function_ctx *ctx = func_ctx(exec_mask);
|
||||
LLVMBasicBlockRef endloop;
|
||||
LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
|
||||
LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
|
||||
mask->bld->type.width *
|
||||
mask->bld->type.length);
|
||||
LLVMTypeRef int_type = LLVMInt32TypeInContext(exec_mask->bld->gallivm->context);
|
||||
LLVMTypeRef mask_type = LLVMIntTypeInContext(exec_mask->bld->gallivm->context, exec_mask->bld->type.length);
|
||||
LLVMValueRef i1cond, i2cond, icond, limiter;
|
||||
|
||||
assert(mask->break_mask);
|
||||
assert(exec_mask->break_mask);
|
||||
|
||||
assert(ctx->loop_stack_size);
|
||||
if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
|
||||
|
|
@ -294,14 +294,14 @@ void lp_exec_endloop(struct gallivm_state *gallivm,
|
|||
/*
|
||||
* Restore the cont_mask, but don't pop
|
||||
*/
|
||||
mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
|
||||
lp_exec_mask_update(mask);
|
||||
exec_mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
|
||||
lp_exec_mask_update(exec_mask);
|
||||
|
||||
/*
|
||||
* Unlike the continue mask, the break_mask must be preserved across loop
|
||||
* iterations
|
||||
*/
|
||||
LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
|
||||
LLVMBuildStore(builder, exec_mask->break_mask, ctx->break_var);
|
||||
|
||||
/* Decrement the loop limiter */
|
||||
limiter = LLVMBuildLoad2(builder, int_type, ctx->loop_limiter, "");
|
||||
|
|
@ -314,12 +314,18 @@ void lp_exec_endloop(struct gallivm_state *gallivm,
|
|||
|
||||
LLVMBuildStore(builder, limiter, ctx->loop_limiter);
|
||||
|
||||
/* i1cond = (mask != 0) */
|
||||
LLVMValueRef end_mask = exec_mask->exec_mask;
|
||||
if (mask)
|
||||
end_mask = LLVMBuildAnd(builder, exec_mask->exec_mask, lp_build_mask_value(mask), "");
|
||||
end_mask = LLVMBuildICmp(builder, LLVMIntNE, end_mask, lp_build_zero(gallivm, exec_mask->bld->type), "");
|
||||
end_mask = LLVMBuildBitCast(builder, end_mask, mask_type, "");
|
||||
|
||||
/* i1cond = (end_mask != 0) */
|
||||
i1cond = LLVMBuildICmp(
|
||||
builder,
|
||||
LLVMIntNE,
|
||||
LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
|
||||
LLVMConstNull(reg_type), "i1cond");
|
||||
end_mask,
|
||||
LLVMConstNull(mask_type), "i1cond");
|
||||
|
||||
/* i2cond = (looplimiter > 0) */
|
||||
i2cond = LLVMBuildICmp(
|
||||
|
|
@ -331,7 +337,7 @@ void lp_exec_endloop(struct gallivm_state *gallivm,
|
|||
/* if( i1cond && i2cond ) */
|
||||
icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
|
||||
|
||||
endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
|
||||
endloop = lp_build_insert_new_block(exec_mask->bld->gallivm, "endloop");
|
||||
|
||||
LLVMBuildCondBr(builder,
|
||||
icond, ctx->loop_block, endloop);
|
||||
|
|
@ -341,14 +347,14 @@ void lp_exec_endloop(struct gallivm_state *gallivm,
|
|||
assert(ctx->loop_stack_size);
|
||||
--ctx->loop_stack_size;
|
||||
--ctx->bgnloop_stack_size;
|
||||
mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
|
||||
mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
|
||||
exec_mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
|
||||
exec_mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
|
||||
ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
|
||||
ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
|
||||
ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
|
||||
ctx->switch_stack_size];
|
||||
|
||||
lp_exec_mask_update(mask);
|
||||
lp_exec_mask_update(exec_mask);
|
||||
}
|
||||
|
||||
void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
|
||||
|
|
|
|||
|
|
@ -101,6 +101,8 @@ struct lp_exec_mask {
|
|||
int function_stack_size;
|
||||
};
|
||||
|
||||
struct lp_build_mask_context;
|
||||
|
||||
void lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx);
|
||||
void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld);
|
||||
void lp_exec_mask_fini(struct lp_exec_mask *mask);
|
||||
|
|
@ -112,7 +114,8 @@ void lp_exec_mask_update(struct lp_exec_mask *mask);
|
|||
void lp_exec_bgnloop_post_phi(struct lp_exec_mask *mask);
|
||||
void lp_exec_bgnloop(struct lp_exec_mask *mask, bool load_mask);
|
||||
void lp_exec_endloop(struct gallivm_state *gallivm,
|
||||
struct lp_exec_mask *mask);
|
||||
struct lp_exec_mask *exec_mask,
|
||||
struct lp_build_mask_context *mask);
|
||||
void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
|
||||
LLVMValueRef val);
|
||||
void lp_exec_mask_cond_invert(struct lp_exec_mask *mask);
|
||||
|
|
|
|||
|
|
@ -2024,7 +2024,7 @@ static void bgnloop(struct lp_build_nir_context *bld_base)
|
|||
static void endloop(struct lp_build_nir_context *bld_base)
|
||||
{
|
||||
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
|
||||
lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
|
||||
lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask, bld->mask);
|
||||
}
|
||||
|
||||
static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
|
||||
|
|
|
|||
|
|
@ -4268,7 +4268,7 @@ endloop_emit(
|
|||
{
|
||||
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
||||
|
||||
lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
|
||||
lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask, bld->mask);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -118,21 +118,21 @@ vl_video_buffer_is_format_supported(struct pipe_screen *screen,
|
|||
vl_get_video_buffer_formats(screen, format, resource_formats);
|
||||
|
||||
for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
|
||||
enum pipe_format format = resource_formats[i];
|
||||
enum pipe_format fmt = resource_formats[i];
|
||||
|
||||
if (format == PIPE_FORMAT_NONE)
|
||||
if (fmt == PIPE_FORMAT_NONE)
|
||||
continue;
|
||||
|
||||
/* we at least need to sample from it */
|
||||
if (!screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, 0, PIPE_BIND_SAMPLER_VIEW))
|
||||
return false;
|
||||
if (!screen->is_format_supported(screen, fmt, PIPE_TEXTURE_2D, 0, 0, PIPE_BIND_SAMPLER_VIEW))
|
||||
continue;
|
||||
|
||||
format = vl_video_buffer_surface_format(format);
|
||||
if (!screen->is_format_supported(screen, format, PIPE_TEXTURE_2D, 0, 0, PIPE_BIND_RENDER_TARGET))
|
||||
return false;
|
||||
fmt = vl_video_buffer_surface_format(fmt);
|
||||
if (screen->is_format_supported(screen, fmt, PIPE_TEXTURE_2D, 0, 0, PIPE_BIND_RENDER_TARGET))
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned
|
||||
|
|
|
|||
|
|
@ -7135,7 +7135,12 @@ iris_upload_dirty_render_state(struct iris_context *ice,
|
|||
}
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
/* This is only used on >= gfx125 for dynamic 3DSTATE_TE emission
|
||||
* related workarounds.
|
||||
*/
|
||||
bool program_needs_wa_14015055625 = false;
|
||||
#endif
|
||||
|
||||
#if INTEL_WA_14015055625_GFX_VER
|
||||
/* Check if FS stage will use primitive ID overrides for Wa_14015055625. */
|
||||
|
|
@ -7239,16 +7244,14 @@ iris_upload_dirty_render_state(struct iris_context *ice,
|
|||
GENX(3DSTATE_PS_length));
|
||||
iris_emit_merge(batch, shader_psx, psx_state,
|
||||
GENX(3DSTATE_PS_EXTRA_length));
|
||||
} else if (stage == MESA_SHADER_TESS_EVAL &&
|
||||
intel_needs_workaround(batch->screen->devinfo, 14015055625) &&
|
||||
!program_needs_wa_14015055625) {
|
||||
/* This program doesn't require Wa_14015055625, so we can enable
|
||||
* a Tessellation Distribution Mode.
|
||||
*/
|
||||
#if GFX_VERx10 >= 125
|
||||
} else if (stage == MESA_SHADER_TESS_EVAL) {
|
||||
uint32_t te_state[GENX(3DSTATE_TE_length)] = { 0 };
|
||||
iris_pack_command(GENX(3DSTATE_TE), te_state, te) {
|
||||
if (intel_needs_workaround(batch->screen->devinfo, 22012699309))
|
||||
if (intel_needs_workaround(screen->devinfo, 14015055625) &&
|
||||
program_needs_wa_14015055625)
|
||||
te.TessellationDistributionMode = TEDMODE_OFF;
|
||||
else if (intel_needs_workaround(screen->devinfo, 22012699309))
|
||||
te.TessellationDistributionMode = TEDMODE_RR_STRICT;
|
||||
else
|
||||
te.TessellationDistributionMode = TEDMODE_RR_FREE;
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ traces:
|
|||
checksum: 58a6a276abc0e28fcb2a8acea3342712
|
||||
gputest/pixmark-piano-v2.trace:
|
||||
gl-vmware-llvmpipe:
|
||||
checksum: edc09da55fea262e76686d99548f2cfd
|
||||
checksum: b0077264046fe6dd2cdec059d9e53bf5
|
||||
gputest/triangle-v2.trace:
|
||||
gl-vmware-llvmpipe:
|
||||
checksum: 7812de00011a3a059892e36cea19c696
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ enum {
|
|||
CS_ARG_VERTEX_DATA,
|
||||
CS_ARG_PER_THREAD_DATA,
|
||||
CS_ARG_OUTER_COUNT,
|
||||
CS_ARG_CORO_X_LOOPS = CS_ARG_OUTER_COUNT,
|
||||
CS_ARG_CORO_SUBGROUP_COUNT = CS_ARG_OUTER_COUNT,
|
||||
CS_ARG_CORO_PARTIALS,
|
||||
CS_ARG_CORO_BLOCK_X_SIZE,
|
||||
CS_ARG_CORO_BLOCK_Y_SIZE,
|
||||
|
|
@ -374,7 +374,7 @@ generate_compute(struct llvmpipe_context *lp,
|
|||
else
|
||||
arg_types[CS_ARG_VERTEX_DATA] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* mesh shaders only */
|
||||
arg_types[CS_ARG_PER_THREAD_DATA] = variant->jit_cs_thread_data_ptr_type; /* per thread data */
|
||||
arg_types[CS_ARG_CORO_X_LOOPS] = int32_type; /* coro only - num X loops */
|
||||
arg_types[CS_ARG_CORO_SUBGROUP_COUNT] = int32_type; /* coro only - subgroup count */
|
||||
arg_types[CS_ARG_CORO_PARTIALS] = int32_type; /* coro only - partials */
|
||||
arg_types[CS_ARG_CORO_BLOCK_X_SIZE] = int32_type; /* coro block_x_size */
|
||||
arg_types[CS_ARG_CORO_BLOCK_Y_SIZE] = int32_type; /* coro block_y_size */
|
||||
|
|
@ -560,23 +560,24 @@ generate_compute(struct llvmpipe_context *lp,
|
|||
output_array = lp_build_array_alloca(gallivm, output_type, lp_build_const_int32(gallivm, align(MAX2(nir->info.mesh.max_primitives_out, nir->info.mesh.max_vertices_out), 8)), "outputs");
|
||||
}
|
||||
|
||||
struct lp_build_loop_state loop_state[4];
|
||||
LLVMValueRef num_x_loop;
|
||||
LLVMValueRef vec_length = lp_build_const_int32(gallivm, cs_type.length);
|
||||
num_x_loop = LLVMBuildAdd(gallivm->builder, block_x_size_arg, vec_length, "");
|
||||
num_x_loop = LLVMBuildSub(gallivm->builder, num_x_loop, lp_build_const_int32(gallivm, 1), "");
|
||||
num_x_loop = LLVMBuildUDiv(gallivm->builder, num_x_loop, vec_length, "");
|
||||
LLVMValueRef partials = LLVMBuildURem(gallivm->builder, block_x_size_arg, vec_length, "");
|
||||
struct lp_build_loop_state loop_state[2];
|
||||
|
||||
LLVMValueRef coro_num_hdls = LLVMBuildMul(gallivm->builder, num_x_loop, block_y_size_arg, "");
|
||||
coro_num_hdls = LLVMBuildMul(gallivm->builder, coro_num_hdls, block_z_size_arg, "");
|
||||
LLVMValueRef vec_length = lp_build_const_int32(gallivm, cs_type.length);
|
||||
|
||||
LLVMValueRef invocation_count = LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, "");
|
||||
invocation_count = LLVMBuildMul(gallivm->builder, invocation_count, block_z_size_arg, "");
|
||||
|
||||
LLVMValueRef partials = LLVMBuildURem(gallivm->builder, invocation_count, vec_length, "");
|
||||
|
||||
LLVMValueRef num_subgroup_loop = LLVMBuildAdd(gallivm->builder, invocation_count, lp_build_const_int32(gallivm, cs_type.length - 1), "");
|
||||
num_subgroup_loop = LLVMBuildUDiv(gallivm->builder, num_subgroup_loop, vec_length, "");
|
||||
|
||||
/* build a ptr in memory to store all the frames in later. */
|
||||
LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
|
||||
LLVMValueRef coro_mem = LLVMBuildAlloca(gallivm->builder, hdl_ptr_type, "coro_mem");
|
||||
LLVMBuildStore(builder, LLVMConstNull(hdl_ptr_type), coro_mem);
|
||||
|
||||
LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, coro_num_hdls, "coro_hdls");
|
||||
LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_subgroup_loop, "coro_hdls");
|
||||
|
||||
unsigned end_coroutine = INT_MAX;
|
||||
|
||||
|
|
@ -585,22 +586,17 @@ generate_compute(struct llvmpipe_context *lp,
|
|||
* and calls the coroutine main entrypoint on the first pass, but in subsequent
|
||||
* passes it checks if the coroutine has completed and resumes it if not.
|
||||
*/
|
||||
/* take x_width - round up to type.length width */
|
||||
lp_build_loop_begin(&loop_state[3], gallivm,
|
||||
lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
|
||||
lp_build_loop_begin(&loop_state[2], gallivm,
|
||||
lp_build_const_int32(gallivm, 0)); /* z loop */
|
||||
lp_build_loop_begin(&loop_state[1], gallivm,
|
||||
lp_build_const_int32(gallivm, 0)); /* y loop */
|
||||
lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
|
||||
lp_build_loop_begin(&loop_state[0], gallivm,
|
||||
lp_build_const_int32(gallivm, 0)); /* x loop */
|
||||
lp_build_const_int32(gallivm, 0)); /* subgroup loop */
|
||||
{
|
||||
LLVMValueRef args[CS_ARG_MAX];
|
||||
args[CS_ARG_CONTEXT] = context_ptr;
|
||||
args[CS_ARG_RESOURCES] = resources_ptr;
|
||||
args[CS_ARG_BLOCK_X_SIZE] = loop_state[0].counter;
|
||||
args[CS_ARG_BLOCK_Y_SIZE] = loop_state[1].counter;
|
||||
args[CS_ARG_BLOCK_Z_SIZE] = loop_state[2].counter;
|
||||
args[CS_ARG_BLOCK_X_SIZE] = LLVMGetUndef(int32_type);
|
||||
args[CS_ARG_BLOCK_Y_SIZE] = LLVMGetUndef(int32_type);
|
||||
args[CS_ARG_BLOCK_Z_SIZE] = LLVMGetUndef(int32_type);
|
||||
args[CS_ARG_GRID_X] = grid_x_arg;
|
||||
args[CS_ARG_GRID_Y] = grid_y_arg;
|
||||
args[CS_ARG_GRID_Z] = grid_z_arg;
|
||||
|
|
@ -611,34 +607,25 @@ generate_compute(struct llvmpipe_context *lp,
|
|||
args[CS_ARG_DRAW_ID] = draw_id_arg;
|
||||
args[CS_ARG_VERTEX_DATA] = io_ptr;
|
||||
args[CS_ARG_PER_THREAD_DATA] = thread_data_ptr;
|
||||
args[CS_ARG_CORO_X_LOOPS] = num_x_loop;
|
||||
args[CS_ARG_CORO_SUBGROUP_COUNT] = num_subgroup_loop;
|
||||
args[CS_ARG_CORO_PARTIALS] = partials;
|
||||
args[CS_ARG_CORO_BLOCK_X_SIZE] = block_x_size_arg;
|
||||
args[CS_ARG_CORO_BLOCK_Y_SIZE] = block_y_size_arg;
|
||||
args[CS_ARG_CORO_BLOCK_Z_SIZE] = block_z_size_arg;
|
||||
|
||||
/* idx = (z * (size_x * size_y) + y * size_x + x */
|
||||
LLVMValueRef coro_hdl_idx = LLVMBuildMul(gallivm->builder, loop_state[2].counter,
|
||||
LLVMBuildMul(gallivm->builder, num_x_loop, block_y_size_arg, ""), "");
|
||||
coro_hdl_idx = LLVMBuildAdd(gallivm->builder, coro_hdl_idx,
|
||||
LLVMBuildMul(gallivm->builder, loop_state[1].counter,
|
||||
num_x_loop, ""), "");
|
||||
coro_hdl_idx = LLVMBuildAdd(gallivm->builder, coro_hdl_idx,
|
||||
loop_state[0].counter, "");
|
||||
|
||||
args[CS_ARG_CORO_IDX] = coro_hdl_idx;
|
||||
args[CS_ARG_CORO_IDX] = loop_state[0].counter;
|
||||
|
||||
args[CS_ARG_CORO_MEM] = coro_mem;
|
||||
|
||||
if (is_mesh)
|
||||
args[CS_ARG_CORO_OUTPUTS] = output_array;
|
||||
|
||||
LLVMValueRef coro_entry = LLVMBuildGEP2(gallivm->builder, hdl_ptr_type, coro_hdls, &coro_hdl_idx, 1, "");
|
||||
LLVMValueRef coro_entry = LLVMBuildGEP2(gallivm->builder, hdl_ptr_type, coro_hdls, &loop_state[0].counter, 1, "");
|
||||
|
||||
LLVMValueRef coro_hdl = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_entry, "coro_hdl");
|
||||
|
||||
struct lp_build_if_state ifstate;
|
||||
LLVMValueRef cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, loop_state[3].counter,
|
||||
LLVMValueRef cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, loop_state[1].counter,
|
||||
lp_build_const_int32(gallivm, 0), "");
|
||||
/* first time here - call the coroutine function entry point */
|
||||
lp_build_if(&ifstate, gallivm, cmp);
|
||||
|
|
@ -651,24 +638,18 @@ generate_compute(struct llvmpipe_context *lp,
|
|||
lp_build_if(&ifstate2, gallivm, coro_done);
|
||||
/* if done destroy and force loop exit */
|
||||
lp_build_coro_destroy(gallivm, coro_hdl);
|
||||
lp_build_loop_force_set_counter(&loop_state[3], lp_build_const_int32(gallivm, end_coroutine - 1));
|
||||
lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));
|
||||
lp_build_else(&ifstate2);
|
||||
/* otherwise resume the coroutine */
|
||||
lp_build_coro_resume(gallivm, coro_hdl);
|
||||
lp_build_endif(&ifstate2);
|
||||
lp_build_endif(&ifstate);
|
||||
lp_build_loop_force_reload_counter(&loop_state[3]);
|
||||
lp_build_loop_force_reload_counter(&loop_state[1]);
|
||||
}
|
||||
lp_build_loop_end_cond(&loop_state[0],
|
||||
num_x_loop,
|
||||
num_subgroup_loop,
|
||||
NULL, LLVMIntUGE);
|
||||
lp_build_loop_end_cond(&loop_state[1],
|
||||
block_y_size_arg,
|
||||
NULL, LLVMIntUGE);
|
||||
lp_build_loop_end_cond(&loop_state[2],
|
||||
block_z_size_arg,
|
||||
NULL, LLVMIntUGE);
|
||||
lp_build_loop_end_cond(&loop_state[3],
|
||||
lp_build_const_int32(gallivm, end_coroutine),
|
||||
NULL, LLVMIntEQ);
|
||||
|
||||
|
|
@ -680,12 +661,8 @@ generate_compute(struct llvmpipe_context *lp,
|
|||
LLVMBuildRetVoid(builder);
|
||||
|
||||
/* This is stage (b) - generate the compute shader code inside the coroutine. */
|
||||
LLVMValueRef x_size_arg, y_size_arg, z_size_arg;
|
||||
context_ptr = LLVMGetParam(coro, CS_ARG_CONTEXT);
|
||||
resources_ptr = LLVMGetParam(coro, CS_ARG_RESOURCES);
|
||||
x_size_arg = LLVMGetParam(coro, CS_ARG_BLOCK_X_SIZE);
|
||||
y_size_arg = LLVMGetParam(coro, CS_ARG_BLOCK_Y_SIZE);
|
||||
z_size_arg = LLVMGetParam(coro, CS_ARG_BLOCK_Z_SIZE);
|
||||
grid_x_arg = LLVMGetParam(coro, CS_ARG_GRID_X);
|
||||
grid_y_arg = LLVMGetParam(coro, CS_ARG_GRID_Y);
|
||||
grid_z_arg = LLVMGetParam(coro, CS_ARG_GRID_Z);
|
||||
|
|
@ -696,12 +673,12 @@ generate_compute(struct llvmpipe_context *lp,
|
|||
draw_id_arg = LLVMGetParam(coro, CS_ARG_DRAW_ID);
|
||||
io_ptr = LLVMGetParam(coro, CS_ARG_VERTEX_DATA);
|
||||
thread_data_ptr = LLVMGetParam(coro, CS_ARG_PER_THREAD_DATA);
|
||||
num_x_loop = LLVMGetParam(coro, CS_ARG_CORO_X_LOOPS);
|
||||
num_subgroup_loop = LLVMGetParam(coro, CS_ARG_CORO_SUBGROUP_COUNT);
|
||||
partials = LLVMGetParam(coro, CS_ARG_CORO_PARTIALS);
|
||||
block_x_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_X_SIZE);
|
||||
block_y_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Y_SIZE);
|
||||
block_z_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Z_SIZE);
|
||||
LLVMValueRef coro_idx = LLVMGetParam(coro, CS_ARG_CORO_IDX);
|
||||
LLVMValueRef subgroup_id = LLVMGetParam(coro, CS_ARG_CORO_IDX);
|
||||
coro_mem = LLVMGetParam(coro, CS_ARG_CORO_MEM);
|
||||
if (is_mesh)
|
||||
output_array = LLVMGetParam(coro, CS_ARG_CORO_OUTPUTS);
|
||||
|
|
@ -730,27 +707,32 @@ generate_compute(struct llvmpipe_context *lp,
|
|||
variant->jit_cs_thread_data_type,
|
||||
thread_data_ptr);
|
||||
|
||||
LLVMValueRef coro_num_hdls = LLVMBuildMul(gallivm->builder, num_x_loop, block_y_size_arg, "");
|
||||
coro_num_hdls = LLVMBuildMul(gallivm->builder, coro_num_hdls, block_z_size_arg, "");
|
||||
|
||||
/* these are coroutine entrypoint necessities */
|
||||
LLVMValueRef coro_id = lp_build_coro_id(gallivm);
|
||||
LLVMValueRef coro_entry = lp_build_coro_alloc_mem_array(gallivm, coro_mem, coro_idx, coro_num_hdls);
|
||||
LLVMValueRef coro_entry = lp_build_coro_alloc_mem_array(gallivm, coro_mem, subgroup_id, num_subgroup_loop);
|
||||
LLVMTypeRef mem_ptr_type = LLVMInt8TypeInContext(gallivm->context);
|
||||
LLVMValueRef alloced_ptr = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_mem, "");
|
||||
alloced_ptr = LLVMBuildGEP2(gallivm->builder, mem_ptr_type, alloced_ptr, &coro_entry, 1, "");
|
||||
LLVMValueRef coro_hdl = lp_build_coro_begin(gallivm, coro_id, alloced_ptr);
|
||||
LLVMValueRef has_partials = LLVMBuildICmp(gallivm->builder, LLVMIntNE, partials, lp_build_const_int32(gallivm, 0), "");
|
||||
LLVMValueRef tids_x[LP_MAX_VECTOR_LENGTH], tids_y[LP_MAX_VECTOR_LENGTH], tids_z[LP_MAX_VECTOR_LENGTH];
|
||||
LLVMValueRef base_val = LLVMBuildMul(gallivm->builder, x_size_arg, vec_length, "");
|
||||
for (i = 0; i < cs_type.length; i++) {
|
||||
tids_x[i] = LLVMBuildAdd(gallivm->builder, base_val, lp_build_const_int32(gallivm, i), "");
|
||||
tids_y[i] = y_size_arg;
|
||||
tids_z[i] = z_size_arg;
|
||||
}
|
||||
system_values.thread_id[0] = lp_build_gather_values(gallivm, tids_x, cs_type.length);
|
||||
system_values.thread_id[1] = lp_build_gather_values(gallivm, tids_y, cs_type.length);
|
||||
system_values.thread_id[2] = lp_build_gather_values(gallivm, tids_z, cs_type.length);
|
||||
|
||||
struct lp_build_context bld;
|
||||
lp_build_context_init(&bld, gallivm, lp_uint_type(cs_type));
|
||||
|
||||
LLVMValueRef base_val = LLVMBuildMul(gallivm->builder, subgroup_id, vec_length, "");
|
||||
LLVMValueRef invocation_indices[LP_MAX_VECTOR_LENGTH];
|
||||
for (i = 0; i < cs_type.length; i++)
|
||||
invocation_indices[i] = LLVMBuildAdd(gallivm->builder, base_val, lp_build_const_int32(gallivm, i), "");
|
||||
LLVMValueRef invocation_index = lp_build_gather_values(gallivm, invocation_indices, cs_type.length);
|
||||
|
||||
LLVMValueRef block_x_size_vec = lp_build_broadcast_scalar(&bld, block_x_size_arg);
|
||||
LLVMValueRef block_y_size_vec = lp_build_broadcast_scalar(&bld, block_y_size_arg);
|
||||
|
||||
system_values.thread_id[0] = LLVMBuildURem(gallivm->builder, invocation_index, block_x_size_vec, "");
|
||||
system_values.thread_id[1] = LLVMBuildUDiv(gallivm->builder, invocation_index, block_x_size_vec, "");
|
||||
system_values.thread_id[1] = LLVMBuildURem(gallivm->builder, system_values.thread_id[1], block_y_size_vec, "");
|
||||
system_values.thread_id[2] = LLVMBuildUDiv(gallivm->builder, invocation_index, block_x_size_vec, "");
|
||||
system_values.thread_id[2] = LLVMBuildUDiv(gallivm->builder, system_values.thread_id[2], block_y_size_vec, "");
|
||||
|
||||
system_values.block_id[0] = grid_x_arg;
|
||||
system_values.block_id[1] = grid_y_arg;
|
||||
|
|
@ -763,38 +745,15 @@ generate_compute(struct llvmpipe_context *lp,
|
|||
system_values.work_dim = work_dim_arg;
|
||||
system_values.draw_id = draw_id_arg;
|
||||
|
||||
/* subgroup_id = ((z * block_size_x * block_size_y) + (y * block_size_x) + x) / subgroup_size
|
||||
*
|
||||
* this breaks if z or y is zero, so distribute the division to preserve ids
|
||||
*
|
||||
* subgroup_id = ((z * block_size_x * block_size_y) / subgroup_size) + ((y * block_size_x) / subgroup_size) + (x / subgroup_size)
|
||||
*
|
||||
* except "x" is pre-divided here
|
||||
*
|
||||
* subgroup_id = ((z * block_size_x * block_size_y) / subgroup_size) + ((y * block_size_x) / subgroup_size) + x
|
||||
*/
|
||||
LLVMValueRef subgroup_id = LLVMBuildUDiv(builder,
|
||||
LLVMBuildMul(gallivm->builder, z_size_arg, LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, ""), ""),
|
||||
vec_length, "");
|
||||
subgroup_id = LLVMBuildAdd(gallivm->builder,
|
||||
subgroup_id,
|
||||
LLVMBuildUDiv(builder, LLVMBuildMul(gallivm->builder, y_size_arg, block_x_size_arg, ""), vec_length, ""),
|
||||
"");
|
||||
subgroup_id = LLVMBuildAdd(gallivm->builder, subgroup_id, x_size_arg, "");
|
||||
system_values.subgroup_id = subgroup_id;
|
||||
LLVMValueRef num_subgroups = LLVMBuildUDiv(builder,
|
||||
LLVMBuildMul(builder, block_x_size_arg,
|
||||
LLVMBuildMul(builder, block_y_size_arg, block_z_size_arg, ""), ""),
|
||||
vec_length, "");
|
||||
LLVMValueRef subgroup_cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, num_subgroups, lp_build_const_int32(gallivm, 0), "");
|
||||
system_values.num_subgroups = LLVMBuildSelect(builder, subgroup_cmp, lp_build_const_int32(gallivm, 1), num_subgroups, "");
|
||||
system_values.num_subgroups = num_subgroup_loop;
|
||||
|
||||
system_values.block_size[0] = block_x_size_arg;
|
||||
system_values.block_size[1] = block_y_size_arg;
|
||||
system_values.block_size[2] = block_z_size_arg;
|
||||
|
||||
LLVMValueRef last_x_loop = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, x_size_arg, LLVMBuildSub(gallivm->builder, num_x_loop, lp_build_const_int32(gallivm, 1), ""), "");
|
||||
LLVMValueRef use_partial_mask = LLVMBuildAnd(gallivm->builder, last_x_loop, has_partials, "");
|
||||
LLVMValueRef last_loop = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, subgroup_id, LLVMBuildSub(gallivm->builder, num_subgroup_loop, lp_build_const_int32(gallivm, 1), ""), "");
|
||||
LLVMValueRef use_partial_mask = LLVMBuildAnd(gallivm->builder, last_loop, has_partials, "");
|
||||
struct lp_build_if_state if_state;
|
||||
LLVMTypeRef mask_type = LLVMVectorType(int32_type, cs_type.length);
|
||||
LLVMValueRef mask_val = lp_build_alloca(gallivm, mask_type, "mask");
|
||||
|
|
@ -866,7 +825,7 @@ generate_compute(struct llvmpipe_context *lp,
|
|||
lp_int_type(cs_type), 0);
|
||||
|
||||
struct lp_build_if_state iter0state;
|
||||
LLVMValueRef is_iter0 = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, coro_idx,
|
||||
LLVMValueRef is_iter0 = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, subgroup_id,
|
||||
lp_build_const_int32(gallivm, 0), "");
|
||||
LLVMValueRef vertex_count = LLVMBuildLoad2(gallivm->builder, i32t, mesh_iface.vertex_count, "");
|
||||
LLVMValueRef prim_count = LLVMBuildLoad2(gallivm->builder, i32t, mesh_iface.prim_count, "");
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
|
|||
NIR_PASS_V(s, nir_lower_vars_to_ssa);
|
||||
|
||||
NIR_PASS(progress, s, nir_copy_prop);
|
||||
NIR_PASS(progress, s, r300_nir_lower_flrp);
|
||||
NIR_PASS(progress, s, nir_opt_algebraic);
|
||||
if (s->info.stage == MESA_SHADER_VERTEX) {
|
||||
if (!is_r500)
|
||||
|
|
|
|||
|
|
@ -689,6 +689,7 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
|
|||
|
||||
if (!ra_allocate(graph)) {
|
||||
rc_error(c, "Ran out of hardware temporaries\n");
|
||||
ralloc_free(graph);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -357,6 +357,7 @@ static void do_advanced_regalloc(struct regalloc_state * s)
|
|||
|
||||
if (!ra_allocate(graph)) {
|
||||
rc_error(s->C, "Ran out of hardware temporaries\n");
|
||||
ralloc_free(graph);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -86,10 +86,15 @@ static void r300_destroy_context(struct pipe_context* context)
|
|||
if (r300->draw)
|
||||
draw_destroy(r300->draw);
|
||||
|
||||
for (unsigned i = 0; i < r300->nr_vertex_buffers; i++)
|
||||
pipe_vertex_buffer_unreference(&r300->vertex_buffer[i]);
|
||||
|
||||
if (r300->uploader)
|
||||
u_upload_destroy(r300->uploader);
|
||||
if (r300->context.stream_uploader)
|
||||
u_upload_destroy(r300->context.stream_uploader);
|
||||
if (r300->context.const_uploader)
|
||||
u_upload_destroy(r300->context.const_uploader);
|
||||
|
||||
/* XXX: This function assumes r300->query_list was initialized */
|
||||
r300_release_referenced_objects(r300);
|
||||
|
|
@ -99,6 +104,7 @@ static void r300_destroy_context(struct pipe_context* context)
|
|||
r300->rws->ctx_destroy(r300->ctx);
|
||||
|
||||
rc_destroy_regalloc_state(&r300->fs_regalloc_state);
|
||||
rc_destroy_regalloc_state(&r300->vs_regalloc_state);
|
||||
|
||||
/* XXX: No way to tell if this was initialized or not? */
|
||||
slab_destroy_child(&r300->pool_transfers);
|
||||
|
|
@ -125,6 +131,9 @@ static void r300_destroy_context(struct pipe_context* context)
|
|||
FREE(r300->vertex_stream_state.state);
|
||||
}
|
||||
}
|
||||
|
||||
FREE(r300->stencilref_fallback);
|
||||
|
||||
FREE(r300);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -525,6 +525,7 @@ static void r300_translate_fragment_shader(
|
|||
abort();
|
||||
}
|
||||
|
||||
free(compiler.code->constants.Constants);
|
||||
rc_destroy(&compiler.Base);
|
||||
r300_dummy_fragment_shader(r300, shader);
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -307,15 +307,15 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec,
|
|||
result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
|
||||
if (((struct si_screen *)dec->screen)->info.family == CHIP_CARRIZO)
|
||||
result.sps_info_flags |= 1 << 9;
|
||||
if (pic->UseRefPicList == true)
|
||||
if (pic->UseRefPicList == true) {
|
||||
result.sps_info_flags |= 1 << 10;
|
||||
result.sps_info_flags |= 1 << 12;
|
||||
}
|
||||
if (pic->UseStRpsBits == true && pic->pps->st_rps_bits != 0) {
|
||||
result.sps_info_flags |= 1 << 11;
|
||||
result.st_rps_bits = pic->pps->st_rps_bits;
|
||||
}
|
||||
|
||||
result.sps_info_flags |= 1 << 12;
|
||||
|
||||
result.chroma_format = pic->pps->sps->chroma_format_idc;
|
||||
result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
|
||||
result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
|
||||
|
|
|
|||
|
|
@ -1390,7 +1390,7 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
|
|||
v3d->compute_shared_memory =
|
||||
v3d_bo_alloc(v3d->screen,
|
||||
v3d->prog.compute->prog_data.compute->shared_size *
|
||||
wgs_per_sg,
|
||||
num_wgs,
|
||||
"shared_vars");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ traces:
|
|||
checksum: 57ddd36b117adc9216c65c10d914a37e
|
||||
gputest/pixmark-piano-v2.trace:
|
||||
gl-virgl:
|
||||
checksum: cbe50265c2d1a114fd75bf12407fbad9
|
||||
checksum: 3b760606c18aebda1ad0eff6eb03203a
|
||||
gputest/triangle-v2.trace:
|
||||
gl-virgl:
|
||||
checksum: 7812de00011a3a059892e36cea19c696
|
||||
|
|
|
|||
|
|
@ -4774,7 +4774,7 @@ nir_to_spirv(struct nir_shader *s, const struct zink_shader_info *sinfo, uint32_
|
|||
/* this could be huge, so only alloc if needed since it's extremely unlikely to
|
||||
* ever be used by anything except cts
|
||||
*/
|
||||
ctx.resident_defs = ralloc_array_size(ctx.mem_ctx,
|
||||
ctx.resident_defs = rzalloc_array_size(ctx.mem_ctx,
|
||||
sizeof(SpvId), entry->ssa_alloc);
|
||||
if (!ctx.resident_defs)
|
||||
goto fail;
|
||||
|
|
|
|||
|
|
@ -458,10 +458,13 @@ get_batch_state(struct zink_context *ctx, struct zink_batch *batch)
|
|||
}
|
||||
simple_mtx_unlock(&screen->free_batch_states_lock);
|
||||
}
|
||||
if (!bs && ctx->batch_states) {
|
||||
/* states are stored sequentially, so if the first one doesn't work, none of them will */
|
||||
if (zink_screen_check_last_finished(screen, ctx->batch_states->fence.batch_id) ||
|
||||
find_unused_state(ctx->batch_states)) {
|
||||
/* states are stored sequentially, so if the first one doesn't work, none of them will */
|
||||
if (!bs && ctx->batch_states && ctx->batch_states->next) {
|
||||
/* only a submitted state can be reused */
|
||||
if (p_atomic_read(&ctx->batch_states->fence.submitted) &&
|
||||
/* a submitted state must have completed before it can be reused */
|
||||
(zink_screen_check_last_finished(screen, ctx->batch_states->fence.batch_id) ||
|
||||
p_atomic_read(&ctx->batch_states->fence.completed))) {
|
||||
bs = ctx->batch_states;
|
||||
pop_batch_state(ctx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -548,7 +548,7 @@ bo_sparse_create(struct zink_screen *screen, uint64_t size)
|
|||
bo->base.base.alignment_log2 = util_logbase2(ZINK_SPARSE_BUFFER_PAGE_SIZE);
|
||||
bo->base.base.size = size;
|
||||
bo->base.vtbl = &bo_sparse_vtbl;
|
||||
unsigned placement = zink_mem_type_idx_from_bits(screen, ZINK_HEAP_DEVICE_LOCAL_SPARSE, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
unsigned placement = zink_mem_type_idx_from_types(screen, ZINK_HEAP_DEVICE_LOCAL_SPARSE, UINT32_MAX);
|
||||
assert(placement != UINT32_MAX);
|
||||
bo->base.base.placement = placement;
|
||||
bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id);
|
||||
|
|
@ -622,6 +622,8 @@ zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, en
|
|||
low_bound *= 2; //nvidia has fat textures or something
|
||||
unsigned vk_heap_idx = screen->info.mem_props.memoryTypes[mem_type_idx].heapIndex;
|
||||
reclaim_all = screen->info.mem_props.memoryHeaps[vk_heap_idx].size <= low_bound;
|
||||
if (reclaim_all)
|
||||
reclaim_all = clean_up_buffer_managers(screen);
|
||||
}
|
||||
entry = pb_slab_alloc_reclaimed(slabs, alloc_size, mem_type_idx, reclaim_all);
|
||||
if (!entry) {
|
||||
|
|
|
|||
|
|
@ -94,10 +94,10 @@ zink_heap_from_domain_flags(VkMemoryPropertyFlags domains, enum zink_alloc_flag
|
|||
}
|
||||
|
||||
static ALWAYS_INLINE unsigned
|
||||
zink_mem_type_idx_from_bits(struct zink_screen *screen, enum zink_heap heap, uint32_t bits)
|
||||
zink_mem_type_idx_from_types(struct zink_screen *screen, enum zink_heap heap, uint32_t types)
|
||||
{
|
||||
for (unsigned i = 0; i < screen->heap_count[heap]; i++) {
|
||||
if (bits & BITFIELD_BIT(screen->heap_map[heap][i])) {
|
||||
if (types & BITFIELD_BIT(screen->heap_map[heap][i])) {
|
||||
return screen->heap_map[heap][i];
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -98,8 +98,8 @@ clear_in_rp(struct pipe_context *pctx,
|
|||
return;
|
||||
cr.rect.offset.x = scissor_state->minx;
|
||||
cr.rect.offset.y = scissor_state->miny;
|
||||
cr.rect.extent.width = MIN2(fb->width, scissor_state->maxx - scissor_state->minx);
|
||||
cr.rect.extent.height = MIN2(fb->height, scissor_state->maxy - scissor_state->miny);
|
||||
cr.rect.extent.width = MIN2(fb->width - cr.rect.offset.x, scissor_state->maxx - scissor_state->minx);
|
||||
cr.rect.extent.height = MIN2(fb->height - cr.rect.offset.y, scissor_state->maxy - scissor_state->miny);
|
||||
} else {
|
||||
cr.rect.extent.width = fb->width;
|
||||
cr.rect.extent.height = fb->height;
|
||||
|
|
@ -644,6 +644,8 @@ zink_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *dst,
|
|||
bool render_condition_enabled)
|
||||
{
|
||||
struct zink_context *ctx = zink_context(pctx);
|
||||
/* check for stencil fallback */
|
||||
bool blitting = ctx->blitting;
|
||||
zink_flush_dgc_if_enabled(ctx);
|
||||
bool render_condition_active = ctx->render_condition_active;
|
||||
if (!render_condition_enabled && render_condition_active) {
|
||||
|
|
@ -656,14 +658,16 @@ zink_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *dst,
|
|||
dsty + height > ctx->fb_state.height)
|
||||
cur_attachment = false;
|
||||
if (!cur_attachment) {
|
||||
util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state);
|
||||
set_clear_fb(pctx, NULL, dst);
|
||||
zink_blit_barriers(ctx, NULL, zink_resource(dst->texture), false);
|
||||
ctx->blitting = true;
|
||||
if (!blitting) {
|
||||
util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state);
|
||||
set_clear_fb(pctx, NULL, dst);
|
||||
zink_blit_barriers(ctx, NULL, zink_resource(dst->texture), false);
|
||||
ctx->blitting = true;
|
||||
}
|
||||
}
|
||||
struct pipe_scissor_state scissor = {dstx, dsty, dstx + width, dsty + height};
|
||||
pctx->clear(pctx, clear_flags, &scissor, NULL, depth, stencil);
|
||||
if (!cur_attachment) {
|
||||
if (!cur_attachment && !blitting) {
|
||||
util_blitter_restore_fb_state(ctx->blitter);
|
||||
ctx->blitting = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3543,6 +3543,88 @@ invert_point_coord(nir_shader *nir)
|
|||
nir_metadata_dominance, NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_residency_code(nir_def *src)
|
||||
{
|
||||
nir_instr *parent = src->parent_instr;
|
||||
while (1) {
|
||||
if (parent->type == nir_instr_type_intrinsic) {
|
||||
ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
|
||||
assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
|
||||
return false;
|
||||
}
|
||||
if (parent->type == nir_instr_type_tex)
|
||||
return true;
|
||||
assert(parent->type == nir_instr_type_alu);
|
||||
nir_alu_instr *alu = nir_instr_as_alu(parent);
|
||||
parent = alu->src[0].src.ssa->parent_instr;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_sparse_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
|
||||
{
|
||||
if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
nir_def *src0;
|
||||
if (is_residency_code(instr->src[0].ssa))
|
||||
src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
|
||||
else
|
||||
src0 = instr->src[0].ssa;
|
||||
nir_def *src1;
|
||||
if (is_residency_code(instr->src[1].ssa))
|
||||
src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
|
||||
else
|
||||
src1 = instr->src[1].ssa;
|
||||
nir_def *def = nir_iand(b, src0, src1);
|
||||
nir_def_rewrite_uses_after(&instr->def, def, &instr->instr);
|
||||
nir_instr_remove(&instr->instr);
|
||||
return true;
|
||||
}
|
||||
if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
|
||||
return false;
|
||||
|
||||
/* vulkan vec can only be a vec4, but this is (maybe) vec5,
|
||||
* so just rewrite as the first component since ntv is going to use a different
|
||||
* method for storing the residency value anyway
|
||||
*/
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
nir_instr *parent = instr->src[0].ssa->parent_instr;
|
||||
if (is_residency_code(instr->src[0].ssa)) {
|
||||
assert(parent->type == nir_instr_type_alu);
|
||||
nir_alu_instr *alu = nir_instr_as_alu(parent);
|
||||
nir_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
|
||||
nir_instr_remove(parent);
|
||||
} else {
|
||||
nir_def *src;
|
||||
if (parent->type == nir_instr_type_intrinsic) {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
|
||||
assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
|
||||
src = intr->src[0].ssa;
|
||||
} else {
|
||||
assert(parent->type == nir_instr_type_alu);
|
||||
nir_alu_instr *alu = nir_instr_as_alu(parent);
|
||||
src = alu->src[0].src.ssa;
|
||||
}
|
||||
if (instr->def.bit_size != 32) {
|
||||
if (instr->def.bit_size == 1)
|
||||
src = nir_ieq_imm(b, src, 1);
|
||||
else
|
||||
src = nir_u2uN(b, src, instr->def.bit_size);
|
||||
}
|
||||
nir_def_rewrite_uses(&instr->def, src);
|
||||
nir_instr_remove(&instr->instr);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_sparse(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(shader, lower_sparse_instr,
|
||||
nir_metadata_dominance, NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
add_derefs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
|
|
@ -3679,7 +3761,7 @@ add_derefs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
}
|
||||
/* filter needed components */
|
||||
if (intr->num_components < load->num_components)
|
||||
load = nir_channels(b, load, BITFIELD_MASK(intr->num_components) << c);
|
||||
load = nir_channels(b, load, BITFIELD_MASK(intr->num_components) << (c - var->data.location_frac));
|
||||
nir_def_rewrite_uses(&intr->def, load);
|
||||
} else {
|
||||
nir_def *store = intr->src[0].ssa;
|
||||
|
|
@ -3936,6 +4018,7 @@ zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shad
|
|||
zs->can_inline = false;
|
||||
} else if (need_optimize)
|
||||
optimize_nir(nir, zs, true);
|
||||
NIR_PASS_V(nir, lower_sparse);
|
||||
|
||||
struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
|
||||
ralloc_free(nir);
|
||||
|
|
@ -4570,88 +4653,6 @@ scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
|
|||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
is_residency_code(nir_def *src)
|
||||
{
|
||||
nir_instr *parent = src->parent_instr;
|
||||
while (1) {
|
||||
if (parent->type == nir_instr_type_intrinsic) {
|
||||
ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
|
||||
assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
|
||||
return false;
|
||||
}
|
||||
if (parent->type == nir_instr_type_tex)
|
||||
return true;
|
||||
assert(parent->type == nir_instr_type_alu);
|
||||
nir_alu_instr *alu = nir_instr_as_alu(parent);
|
||||
parent = alu->src[0].src.ssa->parent_instr;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_sparse_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
|
||||
{
|
||||
if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
nir_def *src0;
|
||||
if (is_residency_code(instr->src[0].ssa))
|
||||
src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
|
||||
else
|
||||
src0 = instr->src[0].ssa;
|
||||
nir_def *src1;
|
||||
if (is_residency_code(instr->src[1].ssa))
|
||||
src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
|
||||
else
|
||||
src1 = instr->src[1].ssa;
|
||||
nir_def *def = nir_iand(b, src0, src1);
|
||||
nir_def_rewrite_uses_after(&instr->def, def, &instr->instr);
|
||||
nir_instr_remove(&instr->instr);
|
||||
return true;
|
||||
}
|
||||
if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
|
||||
return false;
|
||||
|
||||
/* vulkan vec can only be a vec4, but this is (maybe) vec5,
|
||||
* so just rewrite as the first component since ntv is going to use a different
|
||||
* method for storing the residency value anyway
|
||||
*/
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
nir_instr *parent = instr->src[0].ssa->parent_instr;
|
||||
if (is_residency_code(instr->src[0].ssa)) {
|
||||
assert(parent->type == nir_instr_type_alu);
|
||||
nir_alu_instr *alu = nir_instr_as_alu(parent);
|
||||
nir_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
|
||||
nir_instr_remove(parent);
|
||||
} else {
|
||||
nir_def *src;
|
||||
if (parent->type == nir_instr_type_intrinsic) {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
|
||||
assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
|
||||
src = intr->src[0].ssa;
|
||||
} else {
|
||||
assert(parent->type == nir_instr_type_alu);
|
||||
nir_alu_instr *alu = nir_instr_as_alu(parent);
|
||||
src = alu->src[0].src.ssa;
|
||||
}
|
||||
if (instr->def.bit_size != 32) {
|
||||
if (instr->def.bit_size == 1)
|
||||
src = nir_ieq_imm(b, src, 1);
|
||||
else
|
||||
src = nir_u2uN(b, src, instr->def.bit_size);
|
||||
}
|
||||
nir_def_rewrite_uses(&instr->def, src);
|
||||
nir_instr_remove(&instr->instr);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_sparse(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(shader, lower_sparse_instr,
|
||||
nir_metadata_dominance, NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
|
||||
{
|
||||
|
|
@ -5301,11 +5302,20 @@ mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
|
|||
|
||||
assert(util_is_power_of_two_nonzero(align));
|
||||
|
||||
return (nir_mem_access_size_align){
|
||||
.num_components = MIN2(bytes / (bit_size / 8), 4),
|
||||
.bit_size = bit_size,
|
||||
.align = bit_size / 8,
|
||||
};
|
||||
/* simply drop the bit_size for unaligned load/stores */
|
||||
if (align < (bit_size / 8)) {
|
||||
return (nir_mem_access_size_align){
|
||||
.num_components = MIN2(bytes / align, 4),
|
||||
.bit_size = align * 8,
|
||||
.align = align,
|
||||
};
|
||||
} else {
|
||||
return (nir_mem_access_size_align){
|
||||
.num_components = MIN2(bytes / (bit_size / 8), 4),
|
||||
.bit_size = bit_size,
|
||||
.align = bit_size / 8,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static nir_mem_access_size_align
|
||||
|
|
@ -5468,7 +5478,6 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir)
|
|||
|
||||
NIR_PASS_V(nir, lower_basevertex);
|
||||
NIR_PASS_V(nir, lower_baseinstance);
|
||||
NIR_PASS_V(nir, lower_sparse);
|
||||
NIR_PASS_V(nir, split_bitfields);
|
||||
NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
|
||||
|
||||
|
|
@ -5744,20 +5753,6 @@ zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
|
|||
}
|
||||
|
||||
}
|
||||
while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
|
||||
struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
|
||||
//this condition is equivalent to verifying that, for each bit stages_present_i in stages_present,
|
||||
//stages_present_i implies libs->stages_present_i
|
||||
if ((stages_present & ~(libs->stages_present & stages_present)) != 0)
|
||||
continue;
|
||||
if (!libs->removed) {
|
||||
libs->removed = true;
|
||||
simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
|
||||
_mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
|
||||
simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
|
||||
}
|
||||
zink_gfx_lib_cache_unref(screen, libs);
|
||||
}
|
||||
if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
|
||||
prog->shaders[stage] = NULL;
|
||||
prog->stages_remaining &= ~BITFIELD_BIT(stage);
|
||||
|
|
@ -5773,6 +5768,17 @@ zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
|
|||
}
|
||||
zink_gfx_program_reference(screen, &prog, NULL);
|
||||
}
|
||||
while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
|
||||
struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
|
||||
if (!libs->removed) {
|
||||
libs->removed = true;
|
||||
unsigned idx = zink_program_cache_stages(libs->stages_present);
|
||||
simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
|
||||
_mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
|
||||
simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
|
||||
}
|
||||
zink_gfx_lib_cache_unref(screen, libs);
|
||||
}
|
||||
if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
|
||||
shader->non_fs.generated_tcs) {
|
||||
/* automatically destroy generated tcs shaders when tes is destroyed */
|
||||
|
|
|
|||
|
|
@ -187,21 +187,31 @@ zink_context_destroy(struct pipe_context *pctx)
|
|||
screen->free_batch_states = ctx->batch_states;
|
||||
screen->last_free_batch_state = screen->free_batch_states;
|
||||
}
|
||||
while (screen->last_free_batch_state->next)
|
||||
screen->last_free_batch_state = screen->last_free_batch_state->next;
|
||||
}
|
||||
while (screen->last_free_batch_state && screen->last_free_batch_state->next)
|
||||
screen->last_free_batch_state = screen->last_free_batch_state->next;
|
||||
if (ctx->free_batch_states) {
|
||||
if (screen->free_batch_states)
|
||||
screen->last_free_batch_state->next = ctx->free_batch_states;
|
||||
else
|
||||
else {
|
||||
screen->free_batch_states = ctx->free_batch_states;
|
||||
screen->last_free_batch_state = ctx->last_free_batch_state;
|
||||
screen->last_free_batch_state = ctx->last_free_batch_state;
|
||||
}
|
||||
}
|
||||
simple_mtx_unlock(&screen->free_batch_states_lock);
|
||||
while (screen->last_free_batch_state && screen->last_free_batch_state->next)
|
||||
screen->last_free_batch_state = screen->last_free_batch_state->next;
|
||||
if (ctx->batch.state) {
|
||||
zink_clear_batch_state(ctx, ctx->batch.state);
|
||||
zink_batch_state_destroy(screen, ctx->batch.state);
|
||||
if (screen->free_batch_states)
|
||||
screen->last_free_batch_state->next = ctx->batch.state;
|
||||
else {
|
||||
screen->free_batch_states = ctx->batch.state;
|
||||
screen->last_free_batch_state = screen->free_batch_states;
|
||||
}
|
||||
}
|
||||
while (screen->last_free_batch_state && screen->last_free_batch_state->next)
|
||||
screen->last_free_batch_state = screen->last_free_batch_state->next;
|
||||
simple_mtx_unlock(&screen->free_batch_states_lock);
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
util_idalloc_fini(&ctx->di.bindless[i].tex_slots);
|
||||
|
|
@ -2837,6 +2847,29 @@ begin_rendering(struct zink_context *ctx)
|
|||
ctx->dynamic_fb.attachments[PIPE_MAX_COLOR_BUFS+1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (changed_size || changed_layout)
|
||||
ctx->rp_changed = true;
|
||||
ctx->rp_loadop_changed = false;
|
||||
ctx->rp_layout_changed = false;
|
||||
}
|
||||
/* always assemble clear_buffers mask:
|
||||
* if a scissored clear must be triggered during glFlush,
|
||||
* the renderpass metadata may be unchanged (e.g., LOAD from previous rp),
|
||||
* but the buffer mask must still be returned
|
||||
*/
|
||||
if (ctx->clears_enabled) {
|
||||
for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
|
||||
/* these are no-ops */
|
||||
if (!ctx->fb_state.cbufs[i] || !zink_fb_clear_enabled(ctx, i))
|
||||
continue;
|
||||
/* these need actual clear calls inside the rp */
|
||||
if (zink_fb_clear_needs_explicit(&ctx->fb_clears[i]))
|
||||
clear_buffers |= (PIPE_CLEAR_COLOR0 << i);
|
||||
}
|
||||
if (ctx->fb_state.zsbuf && zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS)) {
|
||||
struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS];
|
||||
struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0);
|
||||
if (zink_fb_clear_needs_explicit(fb_clear)) {
|
||||
for (int j = !zink_fb_clear_element_needs_explicit(clear);
|
||||
(clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL && j < zink_fb_clear_count(fb_clear);
|
||||
|
|
@ -2844,10 +2877,6 @@ begin_rendering(struct zink_context *ctx)
|
|||
clear_buffers |= zink_fb_clear_element(fb_clear, j)->zs.bits;
|
||||
}
|
||||
}
|
||||
if (changed_size || changed_layout)
|
||||
ctx->rp_changed = true;
|
||||
ctx->rp_loadop_changed = false;
|
||||
ctx->rp_layout_changed = false;
|
||||
}
|
||||
|
||||
if (!ctx->rp_changed && ctx->batch.in_rp)
|
||||
|
|
@ -3803,7 +3832,6 @@ zink_flush(struct pipe_context *pctx,
|
|||
struct zink_batch *batch = &ctx->batch;
|
||||
struct zink_fence *fence = NULL;
|
||||
struct zink_screen *screen = zink_screen(ctx->base.screen);
|
||||
unsigned submit_count = 0;
|
||||
VkSemaphore export_sem = VK_NULL_HANDLE;
|
||||
|
||||
/* triggering clears will force has_work */
|
||||
|
|
@ -3864,8 +3892,7 @@ zink_flush(struct pipe_context *pctx,
|
|||
}
|
||||
}
|
||||
|
||||
/* TODO: if swapchains gain timeline semaphore semantics, `flags` can be eliminated and no-op fence can return timeline id */
|
||||
if (!batch->has_work && flags) {
|
||||
if (!batch->has_work) {
|
||||
if (pfence) {
|
||||
/* reuse last fence */
|
||||
fence = ctx->last_fence;
|
||||
|
|
@ -3882,7 +3909,6 @@ zink_flush(struct pipe_context *pctx,
|
|||
tc_driver_internal_flush_notify(ctx->tc);
|
||||
} else {
|
||||
fence = &batch->state->fence;
|
||||
submit_count = batch->state->usage.submit_count;
|
||||
if (deferred && !(flags & PIPE_FLUSH_FENCE_FD) && pfence)
|
||||
deferred_fence = true;
|
||||
else
|
||||
|
|
@ -3906,7 +3932,7 @@ zink_flush(struct pipe_context *pctx,
|
|||
mfence->fence = fence;
|
||||
mfence->sem = export_sem;
|
||||
if (fence) {
|
||||
mfence->submit_count = submit_count;
|
||||
mfence->submit_count = zink_batch_state(fence)->usage.submit_count;
|
||||
util_dynarray_append(&fence->mfences, struct zink_tc_fence *, mfence);
|
||||
}
|
||||
if (export_sem) {
|
||||
|
|
|
|||
|
|
@ -185,7 +185,12 @@ zink_fence_finish(struct zink_screen *screen, struct pipe_context *pctx, struct
|
|||
if (submit_diff > 1)
|
||||
return true;
|
||||
|
||||
if (fence->submitted && zink_screen_check_last_finished(screen, fence->batch_id))
|
||||
/* - if fence is submitted, batch_id is nonzero and can be checked
|
||||
* - if fence is not submitted here, it must be reset; batch_id will be 0 and submitted is false
|
||||
* in either case, the fence has finished
|
||||
*/
|
||||
if ((fence->submitted && zink_screen_check_last_finished(screen, fence->batch_id)) ||
|
||||
(!fence->submitted && submit_diff))
|
||||
return true;
|
||||
|
||||
return fence_wait(screen, fence, timeout_ns);
|
||||
|
|
|
|||
|
|
@ -561,6 +561,8 @@ kopper_acquire(struct zink_screen *screen, struct zink_resource *res, uint64_t t
|
|||
if (cdt->swapchain->images[res->obj->dt_idx].readback)
|
||||
zink_resource(cdt->swapchain->images[res->obj->dt_idx].readback)->valid = false;
|
||||
res->obj->image = cdt->swapchain->images[res->obj->dt_idx].image;
|
||||
if (!cdt->age_locked)
|
||||
zink_kopper_update_last_written(res);
|
||||
cdt->swapchain->images[res->obj->dt_idx].acquired = false;
|
||||
if (!cdt->swapchain->images[res->obj->dt_idx].init) {
|
||||
/* swapchain images are initially in the UNDEFINED layout */
|
||||
|
|
@ -792,7 +794,7 @@ zink_kopper_present_queue(struct zink_screen *screen, struct zink_resource *res)
|
|||
cpi->res = res;
|
||||
cpi->swapchain = cdt->swapchain;
|
||||
cpi->indefinite_acquire = res->obj->indefinite_acquire;
|
||||
res->obj->last_dt_idx = cpi->image = res->obj->dt_idx;
|
||||
cpi->image = res->obj->dt_idx;
|
||||
cpi->info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
||||
cpi->info.pNext = NULL;
|
||||
cpi->info.waitSemaphoreCount = 1;
|
||||
|
|
@ -812,11 +814,13 @@ zink_kopper_present_queue(struct zink_screen *screen, struct zink_resource *res)
|
|||
* * Any other color buffers' ages are incremented by 1 if
|
||||
* their age was previously greater than 0.
|
||||
*/
|
||||
for (int i = 0; i < cdt->swapchain->num_images; i++) {
|
||||
if (i == res->obj->dt_idx)
|
||||
cdt->swapchain->images[i].age = 1;
|
||||
else if (cdt->swapchain->images[i].age > 0)
|
||||
cdt->swapchain->images[i].age += 1;
|
||||
if (!cdt->age_locked) {
|
||||
for (int i = 0; i < cdt->swapchain->num_images; i++) {
|
||||
if (i == res->obj->dt_idx)
|
||||
cdt->swapchain->images[i].age = 1;
|
||||
else if (cdt->swapchain->images[i].age > 0)
|
||||
cdt->swapchain->images[i].age += 1;
|
||||
}
|
||||
}
|
||||
if (util_queue_is_initialized(&screen->flush_queue)) {
|
||||
p_atomic_inc(&cpi->swapchain->async_presents);
|
||||
|
|
@ -832,6 +836,12 @@ zink_kopper_present_queue(struct zink_screen *screen, struct zink_resource *res)
|
|||
res->obj->dt_idx = UINT32_MAX;
|
||||
}
|
||||
|
||||
void
|
||||
zink_kopper_update_last_written(struct zink_resource *res)
|
||||
{
|
||||
res->obj->last_dt_idx = res->obj->dt_idx;
|
||||
}
|
||||
|
||||
static void
|
||||
kopper_ensure_readback(struct zink_screen *screen, struct zink_resource *res)
|
||||
{
|
||||
|
|
@ -873,14 +883,17 @@ zink_kopper_acquire_readback(struct zink_context *ctx, struct zink_resource *res
|
|||
if (++cdt->readback_counter >= ZINK_READBACK_THRESHOLD)
|
||||
kopper_ensure_readback(screen, res);
|
||||
while (res->obj->dt_idx != last_dt_idx) {
|
||||
cdt->age_locked = true;
|
||||
if (res->obj->dt_idx != UINT32_MAX && !zink_kopper_present_readback(ctx, res))
|
||||
break;
|
||||
cdt->age_locked = true;
|
||||
do {
|
||||
ret = kopper_acquire(screen, res, 0);
|
||||
} while (!is_swapchain_kill(ret) && (ret == VK_NOT_READY || ret == VK_TIMEOUT));
|
||||
if (is_swapchain_kill(ret)) {
|
||||
kill_swapchain(ctx, res);
|
||||
*readback = NULL;
|
||||
cdt->age_locked = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -936,6 +949,10 @@ zink_kopper_present_readback(struct zink_context *ctx, struct zink_resource *res
|
|||
simple_mtx_lock(&screen->semaphores_lock);
|
||||
util_dynarray_append(&screen->semaphores, VkSemaphore, acquire);
|
||||
simple_mtx_unlock(&screen->semaphores_lock);
|
||||
|
||||
struct kopper_displaytarget *cdt = res->obj->dt;
|
||||
cdt->age_locked = false;
|
||||
|
||||
return zink_screen_handle_vkresult(screen, error);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -95,6 +95,8 @@ struct kopper_displaytarget
|
|||
bool is_kill;
|
||||
VkPresentModeKHR present_mode;
|
||||
unsigned readback_counter;
|
||||
|
||||
bool age_locked; //disables buffer age during readback
|
||||
};
|
||||
|
||||
struct zink_context;
|
||||
|
|
@ -119,6 +121,9 @@ zink_kopper_acquired(const struct kopper_displaytarget *cdt, uint32_t idx)
|
|||
return idx != UINT32_MAX && cdt->swapchain->images[idx].acquired;
|
||||
}
|
||||
|
||||
void
|
||||
zink_kopper_update_last_written(struct zink_resource *res);
|
||||
|
||||
struct kopper_displaytarget *
|
||||
zink_kopper_displaytarget_create(struct zink_screen *screen, unsigned tex_usage,
|
||||
enum pipe_format format, unsigned width,
|
||||
|
|
|
|||
|
|
@ -1006,6 +1006,8 @@ create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs)
|
|||
{
|
||||
struct zink_gfx_lib_cache *libs = CALLOC_STRUCT(zink_gfx_lib_cache);
|
||||
libs->stages_present = prog->stages_present;
|
||||
if (generated_tcs)
|
||||
libs->stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
|
||||
simple_mtx_init(&libs->lock, mtx_plain);
|
||||
if (generated_tcs)
|
||||
_mesa_set_init(&libs->libs, NULL, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs);
|
||||
|
|
|
|||
|
|
@ -729,7 +729,8 @@ init_ici(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe_r
|
|||
|
||||
case PIPE_TEXTURE_3D:
|
||||
ici->imageType = VK_IMAGE_TYPE_3D;
|
||||
ici->flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
|
||||
if (!(templ->flags & PIPE_RESOURCE_FLAG_SPARSE))
|
||||
ici->flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
|
||||
if (screen->info.have_EXT_image_2d_view_of_3d)
|
||||
ici->flags |= VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT;
|
||||
break;
|
||||
|
|
@ -1180,6 +1181,10 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
|
|||
mai.pNext = NULL;
|
||||
mai.allocationSize = reqs.size;
|
||||
enum zink_heap heap = zink_heap_from_domain_flags(flags, aflags);
|
||||
if (templ->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) {
|
||||
if (!(vk_domain_from_heap(heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
|
||||
heap = zink_heap_from_domain_flags(flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, aflags);
|
||||
}
|
||||
|
||||
VkMemoryDedicatedAllocateInfo ded_alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
|
||||
|
|
@ -1267,7 +1272,7 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
|
|||
alignment = MAX2(alignment, screen->info.props.limits.minMemoryMapAlignment);
|
||||
obj->alignment = alignment;
|
||||
|
||||
if (zink_mem_type_idx_from_bits(screen, heap, reqs.memoryTypeBits) == UINT32_MAX) {
|
||||
if (zink_mem_type_idx_from_types(screen, heap, reqs.memoryTypeBits) == UINT32_MAX) {
|
||||
/* not valid based on reqs; demote to more compatible type */
|
||||
switch (heap) {
|
||||
case ZINK_HEAP_DEVICE_LOCAL_VISIBLE:
|
||||
|
|
@ -1279,7 +1284,7 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
|
|||
default:
|
||||
break;
|
||||
}
|
||||
assert(zink_mem_type_idx_from_bits(screen, heap, reqs.memoryTypeBits) != UINT32_MAX);
|
||||
assert(zink_mem_type_idx_from_types(screen, heap, reqs.memoryTypeBits) != UINT32_MAX);
|
||||
}
|
||||
|
||||
retry:
|
||||
|
|
@ -1611,6 +1616,11 @@ add_resource_bind(struct zink_context *ctx, struct zink_resource *res, unsigned
|
|||
box.depth = util_num_layers(&res->base.b, i);
|
||||
ctx->base.resource_copy_region(&ctx->base, &res->base.b, i, 0, 0, 0, &staging.base.b, i, &box);
|
||||
}
|
||||
if (old_obj->exportable) {
|
||||
simple_mtx_lock(&ctx->batch.state->exportable_lock);
|
||||
_mesa_set_remove_key(&ctx->batch.state->dmabuf_exports, &staging);
|
||||
simple_mtx_unlock(&ctx->batch.state->exportable_lock);
|
||||
}
|
||||
zink_resource_object_reference(screen, &old_obj, NULL);
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -837,6 +837,9 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
return 1;
|
||||
|
||||
case PIPE_CAP_BINDLESS_TEXTURE:
|
||||
if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB &&
|
||||
(screen->info.db_props.maxDescriptorBufferBindings < 2 || screen->info.db_props.maxSamplerDescriptorBufferBindings < 2))
|
||||
return 0;
|
||||
return screen->info.have_EXT_descriptor_indexing;
|
||||
|
||||
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
|
||||
|
|
@ -3465,20 +3468,11 @@ zink_internal_create_screen(const struct pipe_screen_config *config, int64_t dev
|
|||
mesa_logw("zink: bug detected: inputAttachmentDescriptorSize(%u) > %u", (unsigned)screen->info.db_props.inputAttachmentDescriptorSize, ZINK_FBFETCH_DESCRIPTOR_SIZE);
|
||||
can_db = false;
|
||||
}
|
||||
if (screen->compact_descriptors) {
|
||||
if (screen->info.db_props.maxDescriptorBufferBindings < 3) {
|
||||
if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
|
||||
mesa_loge("Cannot use db descriptor mode with compact descriptors with maxDescriptorBufferBindings < 3");
|
||||
goto fail;
|
||||
}
|
||||
can_db = false;
|
||||
}
|
||||
} else {
|
||||
if (screen->info.db_props.maxDescriptorBufferBindings < 5) {
|
||||
if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
|
||||
mesa_loge("Cannot use db descriptor mode with maxDescriptorBufferBindings < 5");
|
||||
goto fail;
|
||||
}
|
||||
if (screen->info.db_props.maxDescriptorBufferBindings < 2 || screen->info.db_props.maxSamplerDescriptorBufferBindings < 2) {
|
||||
if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
|
||||
/* allow for testing, but disable bindless */
|
||||
mesa_logw("Cannot use bindless and db descriptor mode with (maxDescriptorBufferBindings||maxSamplerDescriptorBufferBindings) < 2");
|
||||
} else {
|
||||
can_db = false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ static inline bool
|
|||
zink_screen_check_last_finished(struct zink_screen *screen, uint32_t batch_id)
|
||||
{
|
||||
const uint32_t check_id = (uint32_t)batch_id;
|
||||
assert(check_id);
|
||||
/* last_finished may have wrapped */
|
||||
if (screen->last_finished < UINT_MAX / 2) {
|
||||
/* last_finished has wrapped, batch_id has not */
|
||||
|
|
|
|||
|
|
@ -505,7 +505,12 @@ impl Program {
|
|||
for (i, d) in self.devs.iter().enumerate() {
|
||||
let mut ptr = ptrs[i];
|
||||
let info = lock.dev_build(d);
|
||||
let spirv = info.spirv.as_ref().unwrap().to_bin();
|
||||
|
||||
// no spirv means nothing to write
|
||||
let Some(spirv) = info.spirv.as_ref() else {
|
||||
continue;
|
||||
};
|
||||
let spirv = spirv.to_bin();
|
||||
|
||||
unsafe {
|
||||
// 1. binary format version
|
||||
|
|
|
|||
|
|
@ -56,8 +56,10 @@ void vlVaHandlePictureParameterBufferVP9(vlVaDriver *drv, vlVaContext *context,
|
|||
context->desc.vp9.picture_parameter.pic_fields.refresh_frame_context = vp9->pic_fields.bits.refresh_frame_context;
|
||||
context->desc.vp9.picture_parameter.pic_fields.frame_context_idx = vp9->pic_fields.bits.frame_context_idx;
|
||||
context->desc.vp9.picture_parameter.pic_fields.segmentation_enabled = vp9->pic_fields.bits.segmentation_enabled;
|
||||
context->desc.vp9.picture_parameter.pic_fields.segmentation_temporal_update = vp9->pic_fields.bits.segmentation_temporal_update;
|
||||
context->desc.vp9.picture_parameter.pic_fields.segmentation_update_map = vp9->pic_fields.bits.segmentation_update_map;
|
||||
context->desc.vp9.picture_parameter.pic_fields.segmentation_temporal_update =
|
||||
vp9->pic_fields.bits.segmentation_enabled && vp9->pic_fields.bits.segmentation_temporal_update;
|
||||
context->desc.vp9.picture_parameter.pic_fields.segmentation_update_map =
|
||||
vp9->pic_fields.bits.segmentation_enabled && vp9->pic_fields.bits.segmentation_update_map;
|
||||
context->desc.vp9.picture_parameter.pic_fields.last_ref_frame = vp9->pic_fields.bits.last_ref_frame;
|
||||
context->desc.vp9.picture_parameter.pic_fields.last_ref_frame_sign_bias = vp9->pic_fields.bits.last_ref_frame_sign_bias;
|
||||
context->desc.vp9.picture_parameter.pic_fields.golden_ref_frame = vp9->pic_fields.bits.golden_ref_frame;
|
||||
|
|
|
|||
|
|
@ -108,6 +108,8 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
|
|||
{
|
||||
vlVdpDevice *dev;
|
||||
struct pipe_screen *pscreen;
|
||||
VdpYCbCrFormat ycbcrFormat;
|
||||
bool supported;
|
||||
|
||||
if (!is_supported)
|
||||
return VDP_STATUS_INVALID_POINTER;
|
||||
|
|
@ -122,47 +124,50 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
|
|||
|
||||
mtx_lock(&dev->mutex);
|
||||
|
||||
ycbcrFormat = bits_ycbcr_format;
|
||||
switch(bits_ycbcr_format) {
|
||||
case VDP_YCBCR_FORMAT_NV12:
|
||||
*is_supported = surface_chroma_type == VDP_CHROMA_TYPE_420;
|
||||
supported = surface_chroma_type == VDP_CHROMA_TYPE_420;
|
||||
break;
|
||||
|
||||
case VDP_YCBCR_FORMAT_YV12:
|
||||
*is_supported = surface_chroma_type == VDP_CHROMA_TYPE_420;
|
||||
supported = surface_chroma_type == VDP_CHROMA_TYPE_420;
|
||||
|
||||
/* We can convert YV12 to NV12 on the fly! */
|
||||
if (*is_supported &&
|
||||
pscreen->is_video_format_supported(pscreen,
|
||||
PIPE_FORMAT_NV12,
|
||||
PIPE_VIDEO_PROFILE_UNKNOWN,
|
||||
PIPE_VIDEO_ENTRYPOINT_BITSTREAM)) {
|
||||
mtx_unlock(&dev->mutex);
|
||||
return VDP_STATUS_OK;
|
||||
}
|
||||
ycbcrFormat = VDP_YCBCR_FORMAT_NV12;
|
||||
break;
|
||||
|
||||
case VDP_YCBCR_FORMAT_UYVY:
|
||||
case VDP_YCBCR_FORMAT_YUYV:
|
||||
*is_supported = surface_chroma_type == VDP_CHROMA_TYPE_422;
|
||||
supported = surface_chroma_type == VDP_CHROMA_TYPE_422;
|
||||
break;
|
||||
|
||||
case VDP_YCBCR_FORMAT_Y8U8V8A8:
|
||||
case VDP_YCBCR_FORMAT_V8U8Y8A8:
|
||||
*is_supported = surface_chroma_type == VDP_CHROMA_TYPE_444;
|
||||
supported = surface_chroma_type == VDP_CHROMA_TYPE_444;
|
||||
break;
|
||||
|
||||
case VDP_YCBCR_FORMAT_P010:
|
||||
case VDP_YCBCR_FORMAT_P016:
|
||||
/* Do any other profiles imply support for this chroma type? */
|
||||
supported = (surface_chroma_type == VDP_CHROMA_TYPE_420_16)
|
||||
&& vl_codec_supported(pscreen, PIPE_VIDEO_PROFILE_HEVC_MAIN_10, false);
|
||||
break;
|
||||
|
||||
default:
|
||||
*is_supported = false;
|
||||
supported = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (*is_supported &&
|
||||
if (supported &&
|
||||
!pscreen->is_video_format_supported(pscreen,
|
||||
FormatYCBCRToPipe(bits_ycbcr_format),
|
||||
FormatYCBCRToPipe(ycbcrFormat),
|
||||
PIPE_VIDEO_PROFILE_UNKNOWN,
|
||||
PIPE_VIDEO_ENTRYPOINT_BITSTREAM)) {
|
||||
*is_supported = false;
|
||||
supported = false;
|
||||
}
|
||||
*is_supported = supported;
|
||||
|
||||
mtx_unlock(&dev->mutex);
|
||||
|
||||
return VDP_STATUS_OK;
|
||||
|
|
|
|||
|
|
@ -605,15 +605,17 @@ blorp_clear(struct blorp_batch *batch,
|
|||
if (batch->blorp->isl_dev->info->ver < 6)
|
||||
use_simd16_replicated_data = false;
|
||||
|
||||
/* From the BSpec: 47719 Replicate Data:
|
||||
/* From the BSpec: 47719 (TGL/DG2/MTL) Replicate Data:
|
||||
*
|
||||
* "Replicate Data Render Target Write message should not be used
|
||||
* on all projects TGL+."
|
||||
*
|
||||
* Xe2 spec (57350) does not mention this restriction.
|
||||
*
|
||||
* See 14017879046, 14017880152 for additional information.
|
||||
*/
|
||||
if (batch->blorp->isl_dev->info->ver >= 12 &&
|
||||
format == ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM)
|
||||
batch->blorp->isl_dev->info->ver < 20)
|
||||
use_simd16_replicated_data = false;
|
||||
|
||||
if (compute)
|
||||
|
|
|
|||
|
|
@ -122,8 +122,7 @@ brw_nir_ubo_surface_index_is_pushable(nir_src src)
|
|||
|
||||
if (intrin && intrin->intrinsic == nir_intrinsic_resource_intel) {
|
||||
return (nir_intrinsic_resource_access_intel(intrin) &
|
||||
nir_resource_intel_pushable) &&
|
||||
nir_src_is_const(intrin->src[1]);
|
||||
nir_resource_intel_pushable);
|
||||
}
|
||||
|
||||
return nir_src_is_const(src);
|
||||
|
|
@ -146,6 +145,14 @@ brw_nir_ubo_surface_index_get_push_block(nir_src src)
|
|||
return nir_intrinsic_resource_block_intel(intrin);
|
||||
}
|
||||
|
||||
/* This helper return the binding table index of a surface access (any
|
||||
* buffer/image/etc...). It works off the source of one of the intrinsics
|
||||
* (load_ubo, load_ssbo, store_ssbo, load_image, store_image, etc...).
|
||||
*
|
||||
* If the source is constant, then this is the binding table index. If we're
|
||||
* going through a resource_intel intel intrinsic, then we need to check
|
||||
* src[1] of that intrinsic.
|
||||
*/
|
||||
static inline unsigned
|
||||
brw_nir_ubo_surface_index_get_bti(nir_src src)
|
||||
{
|
||||
|
|
@ -155,8 +162,19 @@ brw_nir_ubo_surface_index_get_bti(nir_src src)
|
|||
assert(src.ssa->parent_instr->type == nir_instr_type_intrinsic);
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src.ssa->parent_instr);
|
||||
assert(intrin->intrinsic == nir_intrinsic_resource_intel);
|
||||
assert(nir_src_is_const(intrin->src[1]));
|
||||
if (!intrin || intrin->intrinsic != nir_intrinsic_resource_intel)
|
||||
return UINT32_MAX;
|
||||
|
||||
/* In practice we could even drop this intrinsic because the bindless
|
||||
* access always operate from a base offset coming from a push constant, so
|
||||
* they can never be constant.
|
||||
*/
|
||||
if (nir_intrinsic_resource_access_intel(intrin) &
|
||||
nir_resource_intel_bindless)
|
||||
return UINT32_MAX;
|
||||
|
||||
if (!nir_src_is_const(intrin->src[1]))
|
||||
return UINT32_MAX;
|
||||
|
||||
return nir_src_as_uint(intrin->src[1]);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -543,8 +543,11 @@ brw_nir_lower_ray_queries(nir_shader *shader,
|
|||
};
|
||||
|
||||
/* Map all query variable to internal type variables */
|
||||
nir_foreach_function_temp_variable(var, state.impl)
|
||||
nir_foreach_function_temp_variable(var, state.impl) {
|
||||
if (!var->data.ray_query)
|
||||
continue;
|
||||
register_opaque_var(var, &state);
|
||||
}
|
||||
hash_table_foreach(state.queries, entry)
|
||||
create_internal_var(entry->data, &state);
|
||||
|
||||
|
|
|
|||
|
|
@ -2168,6 +2168,14 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
|
|||
goto fail_fd;
|
||||
}
|
||||
|
||||
/* Disable Wa_16013994831 on Gfx12.0 because we found other cases where we
|
||||
* need to always disable preemption :
|
||||
* - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5963
|
||||
* - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5662
|
||||
*/
|
||||
if (devinfo.verx10 == 120)
|
||||
BITSET_CLEAR(devinfo.workarounds, INTEL_WA_16013994831);
|
||||
|
||||
if (!devinfo.has_context_isolation) {
|
||||
result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
|
||||
"Vulkan requires context isolation for %s", devinfo.name);
|
||||
|
|
|
|||
|
|
@ -1975,6 +1975,34 @@ add_push_entry(struct anv_pipeline_push_map *push_map,
|
|||
};
|
||||
}
|
||||
|
||||
static bool
|
||||
binding_should_use_surface_binding_table(const struct apply_pipeline_layout_state *state,
|
||||
const struct anv_descriptor_set_binding_layout *binding)
|
||||
{
|
||||
if ((binding->data & ANV_DESCRIPTOR_BTI_SURFACE_STATE) == 0)
|
||||
return false;
|
||||
|
||||
if (state->pdevice->always_use_bindless &&
|
||||
(binding->data & ANV_DESCRIPTOR_SURFACE))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
binding_should_use_sampler_binding_table(const struct apply_pipeline_layout_state *state,
|
||||
const struct anv_descriptor_set_binding_layout *binding)
|
||||
{
|
||||
if ((binding->data & ANV_DESCRIPTOR_BTI_SAMPLER_STATE) == 0)
|
||||
return false;
|
||||
|
||||
if (state->pdevice->always_use_bindless &&
|
||||
(binding->data & ANV_DESCRIPTOR_SAMPLER))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
anv_nir_apply_pipeline_layout(nir_shader *shader,
|
||||
const struct anv_physical_device *pdevice,
|
||||
|
|
@ -2146,7 +2174,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
state.set[set].binding[b].surface_offset = BINDLESS_OFFSET;
|
||||
state.set[set].binding[b].sampler_offset = BINDLESS_OFFSET;
|
||||
|
||||
if (binding->data & ANV_DESCRIPTOR_BTI_SURFACE_STATE) {
|
||||
if (binding_should_use_surface_binding_table(&state, binding)) {
|
||||
if (map->surface_count + array_size * array_multiplier > MAX_BINDING_TABLE_SIZE ||
|
||||
anv_descriptor_requires_bindless(pdevice, binding, false) ||
|
||||
brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
|
||||
|
|
@ -2177,7 +2205,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
|
||||
}
|
||||
|
||||
if (binding->data & ANV_DESCRIPTOR_BTI_SAMPLER_STATE) {
|
||||
if (binding_should_use_sampler_binding_table(&state, binding)) {
|
||||
if (map->sampler_count + array_size * array_multiplier > MAX_SAMPLER_TABLE_SIZE ||
|
||||
anv_descriptor_requires_bindless(pdevice, binding, true) ||
|
||||
brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
|
||||
|
|
|
|||
|
|
@ -126,18 +126,17 @@ anv_nir_loads_push_desc_buffer(nir_shader *nir,
|
|||
if (intrin->intrinsic != nir_intrinsic_load_ubo)
|
||||
continue;
|
||||
|
||||
const nir_const_value *const_bt_idx =
|
||||
nir_src_as_const_value(intrin->src[0]);
|
||||
if (const_bt_idx == NULL)
|
||||
const unsigned bt_idx =
|
||||
brw_nir_ubo_surface_index_get_bti(intrin->src[0]);
|
||||
if (bt_idx == UINT32_MAX)
|
||||
continue;
|
||||
|
||||
const unsigned bt_idx = const_bt_idx[0].u32;
|
||||
|
||||
const struct anv_pipeline_binding *binding =
|
||||
&bind_map->surface_to_descriptor[bt_idx];
|
||||
if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS &&
|
||||
binding->index == push_set)
|
||||
binding->index == push_set) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -162,6 +161,7 @@ anv_nir_push_desc_ubo_fully_promoted(nir_shader *nir,
|
|||
if (push_set_layout == NULL)
|
||||
return 0;
|
||||
|
||||
/* Assume every UBO can be promoted first. */
|
||||
uint32_t ubos_fully_promoted = 0;
|
||||
for (uint32_t b = 0; b < push_set_layout->binding_count; b++) {
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
|
|
@ -174,6 +174,10 @@ anv_nir_push_desc_ubo_fully_promoted(nir_shader *nir,
|
|||
ubos_fully_promoted |= BITFIELD_BIT(bind_layout->descriptor_index);
|
||||
}
|
||||
|
||||
/* For each load_ubo intrinsic, if the descriptor index or the offset is
|
||||
* not a constant, we could not promote to push constant. Then check the
|
||||
* offset + size against the push ranges.
|
||||
*/
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
|
|
@ -184,45 +188,65 @@ anv_nir_push_desc_ubo_fully_promoted(nir_shader *nir,
|
|||
if (intrin->intrinsic != nir_intrinsic_load_ubo)
|
||||
continue;
|
||||
|
||||
if (!brw_nir_ubo_surface_index_is_pushable(intrin->src[0]))
|
||||
/* Don't check the load_ubo from descriptor buffers */
|
||||
nir_intrinsic_instr *resource =
|
||||
intrin->src[0].ssa->parent_instr->type == nir_instr_type_intrinsic ?
|
||||
nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr) : NULL;
|
||||
if (resource == NULL || resource->intrinsic != nir_intrinsic_resource_intel)
|
||||
continue;
|
||||
|
||||
const unsigned bt_idx =
|
||||
brw_nir_ubo_surface_index_get_bti(intrin->src[0]);
|
||||
|
||||
/* Skip if this isn't a load from push descriptor buffer. */
|
||||
const struct anv_pipeline_binding *binding =
|
||||
&bind_map->surface_to_descriptor[bt_idx];
|
||||
if (binding->set != push_set)
|
||||
/* Skip load_ubo not loading from the push descriptor */
|
||||
if (nir_intrinsic_desc_set(resource) != push_set)
|
||||
continue;
|
||||
|
||||
uint32_t binding = nir_intrinsic_binding(resource);
|
||||
|
||||
/* If we have indirect indexing in the binding, no push promotion
|
||||
* in possible for the entire binding.
|
||||
*/
|
||||
if (!nir_src_is_const(resource->src[1])) {
|
||||
for (uint32_t i = 0; i < push_set_layout->binding[binding].array_size; i++) {
|
||||
ubos_fully_promoted &=
|
||||
~BITFIELD_BIT(push_set_layout->binding[binding].descriptor_index + i);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const nir_const_value *const_bt_id =
|
||||
nir_src_as_const_value(resource->src[1]);
|
||||
uint32_t bt_id = const_bt_id[0].u32;
|
||||
|
||||
const struct anv_pipeline_binding *pipe_bind =
|
||||
&bind_map->surface_to_descriptor[bt_id];
|
||||
|
||||
const uint32_t desc_idx =
|
||||
push_set_layout->binding[binding->binding].descriptor_index;
|
||||
assert(desc_idx < MAX_PUSH_DESCRIPTORS);
|
||||
|
||||
bool promoted = false;
|
||||
push_set_layout->binding[binding].descriptor_index;
|
||||
|
||||
/* If the offset in the entry is dynamic, we can't tell if
|
||||
* promoted or not.
|
||||
*/
|
||||
const nir_const_value *const_load_offset =
|
||||
nir_src_as_const_value(intrin->src[1]);
|
||||
if (const_load_offset != NULL) {
|
||||
/* Check if the load was promoted to a push constant. */
|
||||
const unsigned load_offset = const_load_offset[0].u32;
|
||||
const int load_bytes = nir_intrinsic_dest_components(intrin) *
|
||||
(intrin->def.bit_size / 8);
|
||||
if (const_load_offset == NULL) {
|
||||
ubos_fully_promoted &= ~BITFIELD_BIT(desc_idx);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bind_map->push_ranges); i++) {
|
||||
if (bind_map->push_ranges[i].set == binding->set &&
|
||||
bind_map->push_ranges[i].index == desc_idx &&
|
||||
bind_map->push_ranges[i].start * 32 <= load_offset &&
|
||||
(bind_map->push_ranges[i].start +
|
||||
bind_map->push_ranges[i].length) * 32 >=
|
||||
(load_offset + load_bytes)) {
|
||||
promoted = true;
|
||||
break;
|
||||
}
|
||||
/* Check if the load was promoted to a push constant. */
|
||||
const unsigned load_offset = const_load_offset[0].u32;
|
||||
const int load_bytes = nir_intrinsic_dest_components(intrin) *
|
||||
(intrin->def.bit_size / 8);
|
||||
|
||||
bool promoted = false;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bind_map->push_ranges); i++) {
|
||||
if (bind_map->push_ranges[i].set == pipe_bind->set &&
|
||||
bind_map->push_ranges[i].index == desc_idx &&
|
||||
bind_map->push_ranges[i].start * 32 <= load_offset &&
|
||||
(bind_map->push_ranges[i].start +
|
||||
bind_map->push_ranges[i].length) * 32 >=
|
||||
(load_offset + load_bytes)) {
|
||||
promoted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1728,7 +1728,7 @@ anv_pipeline_account_shader(struct anv_pipeline *pipeline,
|
|||
|
||||
if (shader->push_desc_info.used_set_buffer) {
|
||||
pipeline->use_push_descriptor_buffer |=
|
||||
BITFIELD_BIT(mesa_to_vk_shader_stage(shader->stage));
|
||||
mesa_to_vk_shader_stage(shader->stage);
|
||||
}
|
||||
if (shader->push_desc_info.used_descriptors &
|
||||
~shader->push_desc_info.fully_promoted_ubo_descriptors)
|
||||
|
|
|
|||
|
|
@ -8370,8 +8370,9 @@ void genX(CmdEndRendering)(
|
|||
"MSAA resolve");
|
||||
}
|
||||
|
||||
if (gfx->depth_att.resolve_mode != VK_RESOLVE_MODE_NONE ||
|
||||
gfx->stencil_att.resolve_mode != VK_RESOLVE_MODE_NONE) {
|
||||
if (!(gfx->rendering_flags & VK_RENDERING_SUSPENDING_BIT) &&
|
||||
(gfx->depth_att.resolve_mode != VK_RESOLVE_MODE_NONE ||
|
||||
gfx->stencil_att.resolve_mode != VK_RESOLVE_MODE_NONE)) {
|
||||
/* We are about to do some MSAA resolves. We need to flush so that the
|
||||
* result of writes to the MSAA depth attachments show up in the sampler
|
||||
* when we blit to the single-sampled resolve target.
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ static const uint32_t genX(vk_to_intel_blend_op)[] = {
|
|||
static void
|
||||
genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
#if GFX_VERx10 >= 120
|
||||
#if INTEL_WA_16013994831_GFX_VER
|
||||
/* Wa_16013994831 - Disable preemption during streamout, enable back
|
||||
* again if XFB not used by the current pipeline.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -80,7 +80,9 @@ emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
|
|||
anv_batch_emit(batch, GENX(3DSTATE_MESH_CONTROL), mesh);
|
||||
anv_batch_emit(batch, GENX(3DSTATE_TASK_CONTROL), task);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INTEL_WA_16013994831_GFX_VER
|
||||
/* Wa_16013994831 - Disable preemption during streamout. */
|
||||
if (intel_needs_workaround(device->info, 16013994831))
|
||||
genX(batch_set_preemption)(batch, device->info, _3D, false);
|
||||
|
|
|
|||
|
|
@ -201,6 +201,6 @@ libgrl = static_library(
|
|||
idep_grl = declare_dependency(
|
||||
link_with : libgrl,
|
||||
dependencies : libgrl_deps,
|
||||
sources : grl_metakernel_h,
|
||||
sources : [grl_metakernel_h, grl_cl_kernel_h],
|
||||
include_directories : include_directories('include', 'gpu'),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1220,7 +1220,7 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes, bool align8)
|
|||
ctx->ListState.CurrentPos++;
|
||||
}
|
||||
|
||||
if (ctx->ListState.CurrentPos + numNodes + contNodes > BLOCK_SIZE) {
|
||||
if (ctx->ListState.CurrentPos + numNodes + contNodes >= BLOCK_SIZE) {
|
||||
/* This block is full. Allocate a new block and chain to it */
|
||||
Node *newblock;
|
||||
Node *n = ctx->ListState.CurrentBlock + ctx->ListState.CurrentPos;
|
||||
|
|
|
|||
|
|
@ -2659,6 +2659,16 @@ _mesa_base_fbo_format(const struct gl_context *ctx, GLenum internalFormat)
|
|||
case GL_RGB565:
|
||||
return _mesa_is_gles(ctx) || ctx->Extensions.ARB_ES2_compatibility
|
||||
? GL_RGB : 0;
|
||||
|
||||
case GL_BGRA:
|
||||
/* EXT_texture_format_BGRA8888 only adds this as color-renderable for
|
||||
* GLES 2 and later
|
||||
*/
|
||||
if (_mesa_has_EXT_texture_format_BGRA8888(ctx) && _mesa_is_gles2(ctx))
|
||||
return GL_RGBA;
|
||||
else
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1112,6 +1112,12 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
|
|||
if (get_pname == 0)
|
||||
goto end;
|
||||
|
||||
/* if the resource is unsupported, zero is returned */
|
||||
if (!st_QueryTextureFormatSupport(ctx, target, internalformat)) {
|
||||
buffer[0] = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
_mesa_GetIntegerv(get_pname, buffer);
|
||||
break;
|
||||
}
|
||||
|
|
@ -1123,6 +1129,12 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
|
|||
if (!_mesa_is_array_texture(target))
|
||||
goto end;
|
||||
|
||||
/* if the resource is unsupported, zero is returned */
|
||||
if (!st_QueryTextureFormatSupport(ctx, target, internalformat)) {
|
||||
buffer[0] = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
_mesa_GetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, buffer);
|
||||
break;
|
||||
|
||||
|
|
@ -1137,6 +1149,12 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
|
|||
unsigned i;
|
||||
GLint current_value;
|
||||
|
||||
/* if the resource is unsupported, zero is returned */
|
||||
if (!st_QueryTextureFormatSupport(ctx, target, internalformat)) {
|
||||
buffer[0] = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Combining the dimensions. Note that for array targets, this would
|
||||
* automatically include the value of MAX_LAYERS, as that value is
|
||||
* returned as MAX_HEIGHT or MAX_DEPTH */
|
||||
|
|
@ -1515,6 +1533,14 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
|
|||
if (targetIndex < 0 || targetIndex == TEXTURE_BUFFER_INDEX)
|
||||
goto end;
|
||||
|
||||
/* If the resource is not supported for image textures,
|
||||
* or if image textures are not supported, NONE is returned.
|
||||
*/
|
||||
if (!st_QueryTextureFormatSupport(ctx, target, internalformat)) {
|
||||
buffer[0] = GL_NONE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* From spec: "Equivalent to calling GetTexParameter with <value> set
|
||||
* to IMAGE_FORMAT_COMPATIBILITY_TYPE."
|
||||
*
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@
|
|||
#include "glformats.h"
|
||||
#include "texobj.h"
|
||||
#include "teximage.h"
|
||||
#include "textureview.h"
|
||||
#include "api_exec_decl.h"
|
||||
|
||||
#include "state_tracker/st_cb_texture.h"
|
||||
|
|
@ -179,7 +180,7 @@ register_surface(struct gl_context *ctx, GLboolean isOutput,
|
|||
}
|
||||
|
||||
/* This will disallow respecifying the storage. */
|
||||
tex->Immutable = GL_TRUE;
|
||||
_mesa_set_texture_view_state(ctx, tex, target, 1);
|
||||
_mesa_unlock_texture(ctx, tex);
|
||||
|
||||
_mesa_reference_texobj(&surf->textures[i], tex);
|
||||
|
|
|
|||
|
|
@ -1507,6 +1507,49 @@ st_QuerySamplesForFormat(struct gl_context *ctx, GLenum target,
|
|||
return num_sample_counts;
|
||||
}
|
||||
|
||||
/* check whether any texture can be allocated for a given format */
|
||||
bool
|
||||
st_QueryTextureFormatSupport(struct gl_context *ctx, GLenum target, GLenum internalFormat)
|
||||
{
|
||||
struct st_context *st = st_context(ctx);
|
||||
|
||||
/* If an sRGB framebuffer is unsupported, sRGB formats behave like linear
|
||||
* formats.
|
||||
*/
|
||||
if (!ctx->Extensions.EXT_sRGB) {
|
||||
internalFormat = _mesa_get_linear_internalformat(internalFormat);
|
||||
}
|
||||
|
||||
/* multisample textures need >= 2 samples */
|
||||
unsigned min_samples = target == GL_TEXTURE_2D_MULTISAMPLE ||
|
||||
target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY ? 1 : 0;
|
||||
unsigned max_samples = min_samples ? 16 : 1;
|
||||
|
||||
/* compressed textures will be allocated as e.g., RGBA8, so check that instead */
|
||||
enum pipe_format pf = st_choose_format(st, internalFormat, GL_NONE, GL_NONE,
|
||||
PIPE_TEXTURE_2D, 0, 0, 0,
|
||||
false, false);
|
||||
if (util_format_is_compressed(pf)) {
|
||||
enum pipe_format fmts[2] = {0};
|
||||
pf = st_mesa_format_to_pipe_format(st, st_pipe_format_to_mesa_format(pf));
|
||||
fmts[0] = pf;
|
||||
for (unsigned i = max_samples; i > min_samples; i >>= 1) {
|
||||
if (find_supported_format(st->screen, fmts, PIPE_TEXTURE_2D,
|
||||
i, i, PIPE_BIND_SAMPLER_VIEW, false))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
for (unsigned i = max_samples; i > min_samples; i >>= 1) {
|
||||
if (st_choose_format(st, internalFormat, GL_NONE, GL_NONE,
|
||||
PIPE_TEXTURE_2D, i, i, PIPE_BIND_SAMPLER_VIEW,
|
||||
false, false))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* ARB_internalformat_query2 driver hook.
|
||||
|
|
|
|||
|
|
@ -70,7 +70,8 @@ extern mesa_format
|
|||
st_ChooseTextureFormat(struct gl_context * ctx, GLenum target,
|
||||
GLint internalFormat,
|
||||
GLenum format, GLenum type);
|
||||
|
||||
bool
|
||||
st_QueryTextureFormatSupport(struct gl_context *ctx, GLenum target, GLenum internalFormat);
|
||||
void
|
||||
st_QueryInternalFormat(struct gl_context *ctx, GLenum target,
|
||||
GLenum internalFormat, GLenum pname, GLint *params);
|
||||
|
|
|
|||
|
|
@ -3100,6 +3100,7 @@ dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint
|
|||
ID3D12PipelineState *old_pipeline_state =
|
||||
cmdbuf->state.pipeline ? cmdbuf->state.pipeline->state : NULL;
|
||||
|
||||
uint32_t view_instance_mask = 0;
|
||||
if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) {
|
||||
if (cmdbuf->state.bindpoint[bindpoint].root_sig != pipeline->root.sig) {
|
||||
cmdbuf->state.bindpoint[bindpoint].root_sig = pipeline->root.sig;
|
||||
|
|
@ -3135,9 +3136,9 @@ dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint
|
|||
ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, gfx->ia.topology);
|
||||
dzn_graphics_pipeline_get_state(gfx, &cmdbuf->state.pipeline_variant);
|
||||
if (gfx->multiview.native_view_instancing)
|
||||
ID3D12GraphicsCommandList1_SetViewInstanceMask(cmdbuf->cmdlist, gfx->multiview.view_mask);
|
||||
view_instance_mask = gfx->multiview.view_mask;
|
||||
else
|
||||
ID3D12GraphicsCommandList1_SetViewInstanceMask(cmdbuf->cmdlist, 1);
|
||||
view_instance_mask = 1;
|
||||
|
||||
if (gfx->zsa.dynamic_depth_bias && gfx->use_gs_for_polygon_mode_point)
|
||||
cmdbuf->state.bindpoint[bindpoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
|
||||
|
|
@ -3150,6 +3151,11 @@ dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint
|
|||
ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, pipeline->state);
|
||||
cmdbuf->state.pipeline = pipeline;
|
||||
}
|
||||
|
||||
/* Deferring this until after the pipeline has been set due to an NVIDIA driver bug
|
||||
* when view instancing mask is set with no pipeline bound. */
|
||||
if (view_instance_mask)
|
||||
ID3D12GraphicsCommandList1_SetViewInstanceMask(cmdbuf->cmdlist, view_instance_mask);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -917,7 +917,9 @@ nvk_CmdEndRendering(VkCommandBuffer commandBuffer)
|
|||
|
||||
if (need_resolve) {
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
|
||||
P_IMMD(p, NV9097, WAIT_FOR_IDLE, 0);
|
||||
P_IMMD(p, NVA097, INVALIDATE_TEXTURE_DATA_CACHE, {
|
||||
.lines = LINES_ALL,
|
||||
});
|
||||
|
||||
nvk_meta_resolve_rendering(cmd, &vk_render);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -158,6 +158,9 @@ nvk_DestroyInstance(VkInstance _instance,
|
|||
if (!instance)
|
||||
return;
|
||||
|
||||
driDestroyOptionCache(&instance->dri_options);
|
||||
driDestroyOptionInfo(&instance->available_dri_options);
|
||||
|
||||
vk_instance_finish(&instance->vk);
|
||||
vk_free(&instance->vk.alloc, instance);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -351,6 +351,7 @@ nouveau_ws_device_new(drmDevicePtr drm_device)
|
|||
out_err:
|
||||
if (device->has_vm_bind) {
|
||||
util_vma_heap_finish(&device->vma_heap);
|
||||
util_vma_heap_finish(&device->bda_heap);
|
||||
simple_mtx_destroy(&device->vma_mutex);
|
||||
}
|
||||
if (ver)
|
||||
|
|
@ -372,6 +373,7 @@ nouveau_ws_device_destroy(struct nouveau_ws_device *device)
|
|||
|
||||
if (device->has_vm_bind) {
|
||||
util_vma_heap_finish(&device->vma_heap);
|
||||
util_vma_heap_finish(&device->bda_heap);
|
||||
simple_mtx_destroy(&device->vma_mutex);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -390,6 +390,8 @@ vn_CreateDescriptorPool(VkDevice device,
|
|||
vn_async_vkCreateDescriptorPool(dev->primary_ring, device, pCreateInfo,
|
||||
NULL, &pool_handle);
|
||||
|
||||
vn_tls_set_async_pipeline_create();
|
||||
|
||||
*pDescriptorPool = pool_handle;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
|
|||
|
|
@ -570,6 +570,8 @@ vn_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
vn_log(instance, "%s", physical_dev->properties.vulkan_1_2.driverInfo);
|
||||
}
|
||||
|
||||
vn_tls_set_async_pipeline_create();
|
||||
|
||||
*pDevice = vn_device_to_handle(dev);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
|
|||
|
|
@ -231,8 +231,15 @@ vn_image_store_reqs_in_cache(struct vn_device *dev,
|
|||
assert(cache->ht);
|
||||
|
||||
simple_mtx_lock(&cache->mutex);
|
||||
uint32_t cache_entry_count = _mesa_hash_table_num_entries(cache->ht);
|
||||
if (cache_entry_count == IMAGE_REQS_CACHE_MAX_ENTRIES) {
|
||||
|
||||
/* Check if entry was added before lock */
|
||||
if (_mesa_hash_table_search(cache->ht, key)) {
|
||||
simple_mtx_unlock(&cache->mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
if (_mesa_hash_table_num_entries(cache->ht) ==
|
||||
IMAGE_REQS_CACHE_MAX_ENTRIES) {
|
||||
/* Evict/use the last entry in the lru list for this new entry */
|
||||
cache_entry =
|
||||
list_last_entry(&cache->lru, struct vn_image_reqs_cache_entry, head);
|
||||
|
|
@ -242,11 +249,11 @@ vn_image_store_reqs_in_cache(struct vn_device *dev,
|
|||
} else {
|
||||
cache_entry = vk_zalloc(alloc, sizeof(*cache_entry), VN_DEFAULT_ALIGN,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!cache_entry) {
|
||||
simple_mtx_unlock(&cache->mutex);
|
||||
return;
|
||||
}
|
||||
}
|
||||
simple_mtx_unlock(&cache->mutex);
|
||||
|
||||
if (!cache_entry)
|
||||
return;
|
||||
|
||||
for (uint32_t i = 0; i < plane_count; i++)
|
||||
cache_entry->requirements[i] = requirements[i];
|
||||
|
|
@ -254,12 +261,10 @@ vn_image_store_reqs_in_cache(struct vn_device *dev,
|
|||
memcpy(cache_entry->key, key, SHA1_DIGEST_LENGTH);
|
||||
cache_entry->plane_count = plane_count;
|
||||
|
||||
simple_mtx_lock(&cache->mutex);
|
||||
if (!_mesa_hash_table_search(cache->ht, cache_entry->key)) {
|
||||
_mesa_hash_table_insert(dev->image_reqs_cache.ht, cache_entry->key,
|
||||
cache_entry);
|
||||
list_add(&cache_entry->head, &cache->lru);
|
||||
}
|
||||
_mesa_hash_table_insert(dev->image_reqs_cache.ht, cache_entry->key,
|
||||
cache_entry);
|
||||
list_add(&cache_entry->head, &cache->lru);
|
||||
|
||||
simple_mtx_unlock(&cache->mutex);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -600,7 +600,7 @@ vn_queue_submission_add_query_feedback(struct vn_queue_submission *submit,
|
|||
VkCommandBuffer *feedback_cmd_handle =
|
||||
vn_get_feedback_cmd_handle(submit, feedback_cmds, cmd_count);
|
||||
const uint32_t stride = submit->batch_type == VK_STRUCTURE_TYPE_SUBMIT_INFO
|
||||
? sizeof(VkCommandBuffer *)
|
||||
? sizeof(VkCommandBuffer)
|
||||
: sizeof(VkCommandBufferSubmitInfo);
|
||||
|
||||
struct vn_feedback_cmd_pool *feedback_cmd_pool = NULL;
|
||||
|
|
|
|||
|
|
@ -174,6 +174,12 @@ struct vk_command_buffer {
|
|||
struct vk_framebuffer *framebuffer;
|
||||
VkRect2D render_area;
|
||||
|
||||
/**
|
||||
* True if we are currently inside a CmdPipelineBarrier() is inserted by
|
||||
* the runtime's vk_render_pass.c
|
||||
*/
|
||||
bool runtime_rp_barrier;
|
||||
|
||||
/* This uses the same trick as STACK_ARRAY */
|
||||
struct vk_attachment_state *attachments;
|
||||
struct vk_attachment_state _attachments[8];
|
||||
|
|
|
|||
|
|
@ -1392,13 +1392,40 @@ can_use_attachment_initial_layout(struct vk_command_buffer *cmd_buffer,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
set_attachment_layout(struct vk_command_buffer *cmd_buffer,
|
||||
uint32_t att_idx,
|
||||
uint32_t view_mask,
|
||||
VkImageLayout layout,
|
||||
VkImageLayout stencil_layout)
|
||||
uint32_t
|
||||
vk_command_buffer_get_attachment_layout(const struct vk_command_buffer *cmd_buffer,
|
||||
const struct vk_image *image,
|
||||
VkImageLayout *out_layout,
|
||||
VkImageLayout *out_stencil_layout)
|
||||
{
|
||||
const struct vk_render_pass *render_pass = cmd_buffer->render_pass;
|
||||
assert(render_pass != NULL);
|
||||
|
||||
const struct vk_subpass *subpass =
|
||||
&render_pass->subpasses[cmd_buffer->subpass_idx];
|
||||
int first_view = ffs(subpass->view_mask) - 1;
|
||||
|
||||
for (uint32_t a = 0; a < render_pass->attachment_count; a++) {
|
||||
if (cmd_buffer->attachments[a].image_view->image == image) {
|
||||
*out_layout = cmd_buffer->attachments[a].views[first_view].layout;
|
||||
*out_stencil_layout =
|
||||
cmd_buffer->attachments[a].views[first_view].stencil_layout;
|
||||
return a;
|
||||
}
|
||||
}
|
||||
unreachable("Image not found in attachments");
|
||||
}
|
||||
|
||||
void
|
||||
vk_command_buffer_set_attachment_layout(struct vk_command_buffer *cmd_buffer,
|
||||
uint32_t att_idx,
|
||||
VkImageLayout layout,
|
||||
VkImageLayout stencil_layout)
|
||||
{
|
||||
const struct vk_render_pass *render_pass = cmd_buffer->render_pass;
|
||||
const struct vk_subpass *subpass =
|
||||
&render_pass->subpasses[cmd_buffer->subpass_idx];
|
||||
uint32_t view_mask = subpass->view_mask;
|
||||
struct vk_attachment_state *att_state = &cmd_buffer->attachments[att_idx];
|
||||
|
||||
u_foreach_bit(view, view_mask) {
|
||||
|
|
@ -1650,9 +1677,10 @@ begin_subpass(struct vk_command_buffer *cmd_buffer,
|
|||
};
|
||||
__vk_append_struct(color_attachment, color_initial_layout);
|
||||
|
||||
set_attachment_layout(cmd_buffer, sp_att->attachment,
|
||||
subpass->view_mask,
|
||||
sp_att->layout, VK_IMAGE_LAYOUT_UNDEFINED);
|
||||
vk_command_buffer_set_attachment_layout(cmd_buffer,
|
||||
sp_att->attachment,
|
||||
sp_att->layout,
|
||||
VK_IMAGE_LAYOUT_UNDEFINED);
|
||||
}
|
||||
} else {
|
||||
/* We've seen at least one of the views of this attachment before so
|
||||
|
|
@ -1770,9 +1798,10 @@ begin_subpass(struct vk_command_buffer *cmd_buffer,
|
|||
&stencil_initial_layout);
|
||||
}
|
||||
|
||||
set_attachment_layout(cmd_buffer, sp_att->attachment,
|
||||
subpass->view_mask,
|
||||
sp_att->layout, sp_att->stencil_layout);
|
||||
vk_command_buffer_set_attachment_layout(cmd_buffer,
|
||||
sp_att->attachment,
|
||||
sp_att->layout,
|
||||
sp_att->stencil_layout);
|
||||
}
|
||||
} else {
|
||||
/* We've seen at least one of the views of this attachment before so
|
||||
|
|
@ -2048,8 +2077,10 @@ begin_subpass(struct vk_command_buffer *cmd_buffer,
|
|||
.pImageMemoryBarriers = image_barrier_count > 0 ?
|
||||
image_barriers : NULL,
|
||||
};
|
||||
cmd_buffer->runtime_rp_barrier = true;
|
||||
disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer),
|
||||
&dependency_info);
|
||||
cmd_buffer->runtime_rp_barrier = false;
|
||||
}
|
||||
|
||||
STACK_ARRAY_FINISH(image_barriers);
|
||||
|
|
@ -2227,8 +2258,10 @@ end_subpass(struct vk_command_buffer *cmd_buffer,
|
|||
.memoryBarrierCount = 1,
|
||||
.pMemoryBarriers = &mem_barrier,
|
||||
};
|
||||
cmd_buffer->runtime_rp_barrier = true;
|
||||
disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer),
|
||||
&dependency_info);
|
||||
cmd_buffer->runtime_rp_barrier = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2455,8 +2488,10 @@ vk_common_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
|
|||
.imageMemoryBarrierCount = image_barrier_count,
|
||||
.pImageMemoryBarriers = image_barriers,
|
||||
};
|
||||
cmd_buffer->runtime_rp_barrier = true;
|
||||
disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer),
|
||||
&dependency_info);
|
||||
cmd_buffer->runtime_rp_barrier = false;
|
||||
}
|
||||
|
||||
STACK_ARRAY_FINISH(image_barriers);
|
||||
|
|
|
|||
|
|
@ -29,6 +29,9 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct vk_command_buffer;
|
||||
struct vk_image;
|
||||
|
||||
/**
|
||||
* Pseudo-extension struct that may be chained into VkRenderingAttachmentInfo
|
||||
* to indicate an initial layout for the attachment. This is only allowed if
|
||||
|
|
@ -425,9 +428,9 @@ vk_subpass_dependency_is_fb_local(const VkSubpassDependency2 *dep,
|
|||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
|
||||
const VkPipelineStageFlags2 src_framebuffer_space_stages =
|
||||
const VkPipelineStageFlags2 src_framebuffer_space_stages =
|
||||
framebuffer_space_stages | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT;
|
||||
const VkPipelineStageFlags2 dst_framebuffer_space_stages =
|
||||
const VkPipelineStageFlags2 dst_framebuffer_space_stages =
|
||||
framebuffer_space_stages | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT;
|
||||
|
||||
/* Check for frambuffer-space dependency. */
|
||||
|
|
@ -439,6 +442,18 @@ vk_subpass_dependency_is_fb_local(const VkSubpassDependency2 *dep,
|
|||
return dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
vk_command_buffer_get_attachment_layout(const struct vk_command_buffer *cmd_buffer,
|
||||
const struct vk_image *image,
|
||||
VkImageLayout *out_layout,
|
||||
VkImageLayout *out_stencil_layout);
|
||||
|
||||
void
|
||||
vk_command_buffer_set_attachment_layout(struct vk_command_buffer *cmd_buffer,
|
||||
uint32_t att_idx,
|
||||
VkImageLayout layout,
|
||||
VkImageLayout stencil_layout);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1666,7 +1666,7 @@ x11_present_to_x11_sw(struct x11_swapchain *chain, uint32_t image_index,
|
|||
chain->gc,
|
||||
image->base.row_pitches[0] / 4,
|
||||
chain->extent.height,
|
||||
0,0,0,24,
|
||||
0,0,0,chain->depth,
|
||||
image->base.row_pitches[0] * chain->extent.height,
|
||||
image->base.cpu_map);
|
||||
xcb_discard_reply(chain->conn, cookie.sequence);
|
||||
|
|
@ -1681,7 +1681,7 @@ x11_present_to_x11_sw(struct x11_swapchain *chain, uint32_t image_index,
|
|||
chain->gc,
|
||||
image->base.row_pitches[0] / 4,
|
||||
this_lines,
|
||||
0,y_start,0,24,
|
||||
0,y_start,0,chain->depth,
|
||||
this_lines * stride_b,
|
||||
(const uint8_t *)myptr + (y_start * stride_b));
|
||||
xcb_discard_reply(chain->conn, cookie.sequence);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue