diff --git a/.gitignore b/.gitignore index 5a7eb0ed273..c533f037efd 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ *.pyo *.out /build +/.cache \ No newline at end of file diff --git a/.gitlab-ci/test/gitlab-ci.yml b/.gitlab-ci/test/gitlab-ci.yml index 87d379b26bc..629a1766e01 100644 --- a/.gitlab-ci/test/gitlab-ci.yml +++ b/.gitlab-ci/test/gitlab-ci.yml @@ -43,7 +43,7 @@ rustfmt: - rustfmt --verbose src/**/lib.rs - rustfmt --verbose src/**/main.rs -clang-format: +.clang-format: extends: - .formatting-check - .lint-clang-format-rules diff --git a/.pick_status.json b/.pick_status.json index 7cd88c93b22..66ab91119f3 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -124,7 +124,7 @@ "description": "nir: Mark nir_intrinsic_load_global_block_intel as divergent", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "9f44a264623461c98368185b023d99446676e039", "notes": null @@ -474,7 +474,7 @@ "description": "egl/wayland: Fix EGL_EXT_present_opaque", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "9aee7855d2ddf47169270d5d1e3e92ff6e5f65c2", "notes": null @@ -484,7 +484,7 @@ "description": "egl/wayland: Add opaque-equivalent FourCCs", "nominated": false, "nomination_type": 3, - "resolution": 4, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/VERSION b/VERSION index 1b3e74f84e7..a19540167eb 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.0.1 +24.0.2 diff --git a/build.sh b/build.sh new file mode 100755 index 00000000000..1ad850a1c0b --- /dev/null +++ b/build.sh @@ -0,0 +1,58 @@ +meson_options=( + --cross-file lib32 + -D android-libbacktrace=disabled + -D b_ndebug=true + -D dri3=enabled + -D egl=enabled + -D gallium-drivers=radeonsi,virgl,svga,swrast,crocus,zink + -D gallium-extra-hud=true + -D gallium-nine=true + -D gallium-omx=disabled + -D gallium-opencl=icd + -D gallium-rusticl=true + -D gallium-va=enabled + -D gallium-vdpau=enabled + -D gallium-xa=enabled + -D gbm=enabled + -D gles1=disabled + -D gles2=enabled + -D glvnd=true + -D glx=dri + -D intel-clc=enabled + -D libunwind=disabled + -D llvm=enabled + -D lmsensors=enabled + -D microsoft-clc=disabled + -D osmesa=true + -D platforms=x11,wayland + -D rust_std=2021 + -D shared-glapi=enabled + -D valgrind=disabled + -D video-codecs=vc1dec,h264dec,h264enc,h265dec,h265enc,av1enc,av1dec,vp9dec + -D vulkan-drivers=amd,swrast,virtio + -D vulkan-layers=device-select,overlay + -D vulkan-beta=true + -D opencl-spirv=true +) + +# Build only minimal debug info to reduce size +#CFLAGS+=' -g1' +#CXXFLAGS+=' -g1' + +export BINDGEN_EXTRA_CLANG_ARGS="-m32" + +arch-meson . build "${meson_options[@]}" +meson configure build --no-pager # Print config + +#if [ ! -f "build/build.ninja.bak" ]; then +# cp build/build.ninja build/build.ninja.back +#fi + +# Evil: Hack build to make proc-macro crate native +# Should become unnecessary with Meson 1.3 +#sed -e '/^rule rust_COMPILER$/irule rust_HACK\n command = rustc -C linker=gcc $ARGS $in\n deps = gcc\n depfile = $targetdep\n description = Compiling native Rust source $in\n' \ +# -e '/^build src\/gallium\/frontends\/rusticl\/librusticl_proc_macros\.so:/s/rust_COMPILER/rust_HACK/' \ +# -e '/^ LINK_ARGS =/s/ src\/gallium\/frontends\/rusticl\/librusticl_proc_macros\.so//' \ +# -i build/build.ninja + +$NINJAFLAGS meson compile -C build diff --git a/docs/envvars.rst b/docs/envvars.rst index a6af0532467..da19d87ceaf 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -1340,6 +1340,8 @@ RADV driver environment variables rt extensions with older hardware. ``gewave32`` enable wave32 for vertex/tess/geometry shaders (GFX10+) + ``gsfastlaunch2`` + use GS_FAST_LAUNCH=2 for Mesh shaders (GFX11+ dGPUs only) ``localbos`` enable local BOs ``nosam`` diff --git a/docs/features.txt b/docs/features.txt index 953696a58fc..ba75a34d38a 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -578,6 +578,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_global_priority DONE (anv, hasvk, radv, tu) VK_EXT_global_priority_query DONE (anv, hasvk, radv, tu) VK_EXT_graphics_pipeline_library DONE (anv, lvp, radv, tu, vn) + VK_EXT_headless_surface DONE (anv, dzn, hasvk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) VK_EXT_image_2d_view_of_3d DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_image_compression_control DONE (radv) VK_EXT_image_drm_format_modifier DONE (anv, hasvk, radv/gfx9+, tu, v3dv, vn) diff --git a/docs/relnotes.rst b/docs/relnotes.rst index 40164bf1d42..20f4672ab90 100644 --- a/docs/relnotes.rst +++ b/docs/relnotes.rst @@ -3,6 +3,7 @@ Release Notes The release notes summarize what's new or changed in each Mesa release. +- :doc:`24.0.2 release notes ` - :doc:`24.0.1 release notes ` - :doc:`24.0.0 release notes ` - :doc:`23.3.3 release notes ` @@ -409,6 +410,7 @@ The release notes summarize what's new or changed in each Mesa release. :maxdepth: 1 :hidden: + 24.0.2 24.0.1 24.0.0 23.3.3 diff --git a/docs/relnotes/24.0.2.rst b/docs/relnotes/24.0.2.rst new file mode 100644 index 00000000000..b3d4d74ae38 --- /dev/null +++ b/docs/relnotes/24.0.2.rst @@ -0,0 +1,230 @@ +Mesa 24.0.2 Release Notes / 2024-02-28 +====================================== + +Mesa 24.0.2 is a bug fix release which fixes bugs found since the 24.0.1 release. + +Mesa 24.0.2 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is **only** available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 24.0.2 implements the Vulkan 1.3 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- KHR-Single-GL46.arrays_of_arrays_gl.AtomicUsage fails on MTL +- GTF-GL46.gtf42.GL3Tests.texture_storage.texture_storage_texture_as_framebuffer_attachment fails on MTL +- [intel][anv][build][regression] - genX_grl.h:27:10: fatal error: grl/grl_cl_kernel.h: No such file or directory +- RX 6600 VDPAU not recognizing HEVC_MAIN_10 correctly +- Running an app on another AMD GPU (offload, DRI_PRIME) produces corrupted frames on Wayland. +- VDPAU declares a texture as "immutable" without also setting its ImmutableLevels attribute. +- RX6600 hardware HEVC video decode fails for VDPAU but works for VA-API. (Can lock up GPU!) +- Rusticl panics when getting program build logs using opencl.hpp +- ue5 game issues lighting Rog Ally 7080u (z1e) +- Missing textures in RoboCop: Rogue City with mesh shaders enabled +- radv: Multiview PSO forgets to export layer in some cases. +- zink: flickering artifacts in Selaco + + +Changes +------- + +Boyuan Zhang (1): + +- radeonsi/vcn: only use multi slices reflist when available + +Chia-I Wu (1): + +- radv: fix pipeline stats mask + +Chris Rankin (2): + +- vdpau: Declare texture object as immutable using helper function. +- vdpau: Refactor query for video surface formats. + +Connor Abbott (1): + +- tu: Follow pipeline compatibility rules for dynamic descriptors + +Daniel Schürmann (1): + +- spirv: Fix SpvOpExpectKHR + +Daniel Stone (2): + +- egl/wayland: Add opaque-equivalent FourCCs +- egl/wayland: Fix EGL_EXT_present_opaque + +Dave Airlie (2): + +- nouveau/winsys: fix bda heap leak. +- nvk: fix dri options leak. + +David Rosca (1): + +- frontends/va: Only set VP9 segmentation fields when segmentation is enabled + +Eric Engestrom (10): + +- docs: add sha256sum for 24.0.1 +- [24.0-only change] ci: increase the kernel+rootfs builds timeout to 2h +- .pick_status.json: Update to c6e855b64b9015235462959b2b7f3e9fc34b2f1f +- .pick_status.json: Update to dce20690542c84ac00509a6db7902dcfc90b25bb +- .pick_status.json: Update to c12300844d3f084ca011a3f54f0cbaa9807418f0 +- .pick_status.json: Mark 3b927567ac927316eb11901f50ee1573ead44fd2 as denominated +- .pick_status.json: Update to 423add61e2d5b6ab6b5505d1feec01b93609f8fc +- .pick_status.json: Update to 4071c399a27932ea9253eb8a65d5725504bac6f3 +- .pick_status.json: Update to 82ff9204abab5267f82a9ce73f9dca1541ef5ee6 +- [24.0 only] disable clang-format + +Erik Faye-Lund (1): + +- mesa/main: allow GL_BGRA for FBOs + +Faith Ekstrand (1): + +- nvk: Invalidate the texture cache before MSAA resolves + +Hans-Kristian Arntzen (1): + +- radv: export multiview in VS/TES/GS for depth-only rendering + +Iago Toral Quiroga (1): + +- v3d,v3dv: fix BO allocation for shared vars + +Ian Romanick (1): + +- nir: Mark nir_intrinsic_load_global_block_intel as divergent + +Jesse Natalie (1): + +- dzn: Don't set view instancing mask until after the PSO + +Jordan Justen (1): + +- intel/dev: Add 2 additional ADL-N PCI ids + +Juston Li (1): + +- venus: fix image reqs cache store locking + +Karol Herbst (3): + +- zink: lower unaligned memory accesses +- rusticl/program: fix CL_PROGRAM_BINARIES for devs with no builds +- meson: do not pull in clc for clover + +Konstantin Seurer (5): + +- zink: Always set mfence->submit_count to the fence submit_count +- Revert "zink: always force flushes when originating from api frontend" +- llvmpipe: Use full subgroups when possible +- gallivm: Consider the initial mask when terminating loops +- ci: Update llvmpipe trace checksums + +Lionel Landwerlin (8): + +- vulkan/runtime: add helper to query attachment layout +- anv: fixup push descriptor shader analysis +- anv: reenable ANV_ALWAYS_BINDLESS +- anv: fix Wa_16013994831 macros +- anv: disable Wa_16013994831 +- intel/nir: only consider ray query variables in lowering +- anv: limit depth flush on dynamic render pass suspend +- anv: add missing generated file dep + +Martin Roukala (né Peres) (1): + +- radv/ci: switch vkcts-polaris10 from mupuf to KWS' farm + +Michel Dänzer (1): + +- egl/wayland: Flush after blitting to linear copy + +Mike Blumenkrantz (25): + +- zink: prune dmabuf export tracking when adding resource binds +- zink: fix sparse bo placement +- zink: zero allocate resident_defs array in ntv +- zink: move sparse lowering up in file +- zink: run sparse lowering after all optimization passes +- zink: adjust swizzled deref loads by the variable component offset +- zink: clamp zink_gfx_lib_cache::stages_present for generated tcs +- zink: promote gpl libs freeing during shader destroy out of prog loop +- zink: don't add VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT for sparse textures +- zink: delete maxDescriptorBufferBindings checks +- zink: avoid infinite recursion on (very) small BAR systems in bo alloc +- zink: add checks/compat for low-spec descriptor buffer implementations +- zink: add a second fence disambiguation case +- zink: force host-visible allocations for MAP_COHERENT resources +- zink: handle stencil_fallback in zink_clear_depth_stencil +- zink: don't destroy the current batch state on context destroy +- mesa: check driver format support for certain GetInternalformativ queries +- vk/wsi/x11/sw: use swapchain depth for putimage +- zink: only scan active batch states for free states if > 1 exist +- zink: fix longstanding issue with active batch state recycling +- zink: assert that batch_id is valid in zink_screen_check_last_finished() +- zink: clamp in_rp clears to fb size +- zink: fix (dynamic rendering) execution of scissored clears during flush +- zink: lock buffer age when chundering swapchain for readback +- zink: flag acquired swapchain image as readback target on acquire, not present + +Patrick Lerda (3): + +- r300: fix vertex_buffer related refcnt imbalance +- r300: fix r300_destroy_context() related memory leaks +- r300: fix memory leaks when register allocation fails + +Pavel Ondračka (1): + +- r300: add explicit flrp lowering + +Rhys Perry (2): + +- aco/ra: don't initialize assigned in initializer list +- aco/ra: fix GFX9- writelane + +Sagar Ghuge (1): + +- nir: Allow nir_texop_tg4 in implicit derivative + +Samuel Pitoiset (4): + +- radv: fix RGP barrier reason for RP barriers inserted by the runtime +- radv: enable GS_FAST_LAUNCH=2 by default for RDNA3 APUs (Phoenix) +- spirv: only consider IO variables when adjusting patch locations for TES +- radv: fix indirect dispatches on compute queue with conditional rendering on GFX7 + +Tapani Pälli (2): + +- intel/blorp: disable use of REP16 independent of format +- iris: make sure DS and TE are sent in pairs on >= gfx125 + +Yiwei Zhang (2): + +- venus: force async pipeline create on threads creating descriptor pools +- venus: fix the cmd stride used for qfb recording + +thfrwn (1): + +- mesa: fix off-by-one for newblock allocation in dlist_alloc diff --git a/src/amd/common/ac_vcn_enc.h b/src/amd/common/ac_vcn_enc.h index 73f2154387e..589f690f5d7 100644 --- a/src/amd/common/ac_vcn_enc.h +++ b/src/amd/common/ac_vcn_enc.h @@ -200,6 +200,8 @@ #define RENCODE_COLOR_SPACE_YUV 0 #define RENCODE_COLOR_SPACE_RGB 1 +#define RENCODE_VCN4_AV1_MAX_NUM_LTR 2 + typedef struct rvcn_enc_session_info_s { uint32_t interface_version; uint32_t sw_context_address_hi; @@ -611,10 +613,13 @@ typedef struct rvcn_enc_av1_color_description_s typedef struct rvcn_enc_av1_ref_frame_s { bool in_use; + bool is_ltr; uint32_t frame_id; uint32_t temporal_id; uint32_t slot_id; uint32_t frame_type; + uint32_t ltr_seq; + void *frame_signature; } rvcn_enc_av1_ref_frame_t; typedef struct rvcn_enc_av1_recon_slot_s diff --git a/src/amd/vulkan/bvh/update.comp b/src/amd/vulkan/bvh/update.comp index 905f807ebe6..c3c740238f2 100644 --- a/src/amd/vulkan/bvh/update.comp +++ b/src/amd/vulkan/bvh/update.comp @@ -74,17 +74,9 @@ void main() { bool is_active; if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) { is_active = build_triangle(bounds, dst_ptr, args.geom_data, gl_GlobalInvocationID.x); - } else if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) { - VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset); - is_active = build_aabb(bounds, src_ptr, dst_ptr, args.geom_data.geometry_id, gl_GlobalInvocationID.x); } else { VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset); - /* arrayOfPointers */ - if (args.geom_data.stride == 8) { - src_ptr = DEREF(REF(VOID_REF)(src_ptr)); - } - - is_active = build_instance(bounds, src_ptr, dst_ptr, gl_GlobalInvocationID.x); + is_active = build_aabb(bounds, src_ptr, dst_ptr, args.geom_data.geometry_id, gl_GlobalInvocationID.x); } if (!is_active) @@ -110,10 +102,15 @@ void main() { gl_StorageSemanticsBuffer, gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); - radv_bvh_box32_node node = DEREF(REF(radv_bvh_box32_node)OFFSET(src_bvh, offset)); + REF(radv_bvh_box32_node) src_node = REF(radv_bvh_box32_node)OFFSET(src_bvh, offset); + REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)OFFSET(dst_bvh, offset); + uint32_t children[4]; + for (uint32_t i = 0; i < 4; ++i) + children[i] = DEREF(src_node).children[i]; + uint32_t valid_child_count = 0; for (uint32_t i = 0; i < 4; ++valid_child_count, ++i) - if (node.children[i] == RADV_BVH_INVALID_NODE) + if (children[i] == RADV_BVH_INVALID_NODE) break; /* Check if all children have been processed. As this is an atomic the last path coming from @@ -127,33 +124,37 @@ void main() { if (ready_child_count != valid_child_count - 1) break; + for (uint32_t i = 0; i < 4; ++i) + DEREF(dst_node).children[i] = children[i]; + for (uint32_t i = 0; i < valid_child_count; ++i) { - uint32_t child_offset = id_to_offset(node.children[i]); + uint32_t child_offset = id_to_offset(children[i]); + radv_aabb child_bounds; if (child_offset == dst_offset) - node.coords[i] = bounds; + child_bounds = bounds; else if (child_offset >= internal_nodes_offset) { - radv_aabb child_bounds = radv_aabb(vec3(INFINITY), vec3(-INFINITY)); - radv_bvh_box32_node child_node = DEREF(REF(radv_bvh_box32_node)OFFSET(dst_bvh, child_offset)); + child_bounds = radv_aabb(vec3(INFINITY), vec3(-INFINITY)); + REF(radv_bvh_box32_node) child_node = REF(radv_bvh_box32_node)OFFSET(dst_bvh, child_offset); for (uint32_t j = 0; j < 4; ++j) { - if (child_node.children[j] == RADV_BVH_INVALID_NODE) + if (DEREF(child_node).children[j] == RADV_BVH_INVALID_NODE) break; - child_bounds.min = min(child_bounds.min, child_node.coords[j].min); - child_bounds.max = max(child_bounds.max, child_node.coords[j].max); + child_bounds.min = min(child_bounds.min, DEREF(child_node).coords[j].min); + child_bounds.max = max(child_bounds.max, DEREF(child_node).coords[j].max); } - node.coords[i] = child_bounds; } else { uint32_t child_index = (child_offset - first_leaf_offset) / leaf_node_size; - node.coords[i] = DEREF(INDEX(radv_aabb, args.leaf_bounds, child_index)); + child_bounds = DEREF(INDEX(radv_aabb, args.leaf_bounds, child_index)); } - } - DEREF(REF(radv_bvh_box32_node)OFFSET(dst_bvh, offset)) = node; + DEREF(dst_node).coords[i] = child_bounds; + } if (parent_id == RADV_BVH_ROOT_NODE) { radv_aabb root_bounds = radv_aabb(vec3(INFINITY), vec3(-INFINITY)); for (uint32_t i = 0; i < valid_child_count; ++i) { - root_bounds.min = min(root_bounds.min, node.coords[i].min); - root_bounds.max = max(root_bounds.max, node.coords[i].max); + radv_aabb bounds = DEREF(dst_node).coords[i]; + root_bounds.min = min(root_bounds.min, bounds.min); + root_bounds.max = max(root_bounds.max, bounds.max); } DEREF(args.dst).aabb = root_bounds; } diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c index 6d0ba625385..0c735ef3fc7 100644 --- a/src/amd/vulkan/radv_instance.c +++ b/src/amd/vulkan/radv_instance.c @@ -292,6 +292,9 @@ static const struct vk_instance_extension_table radv_instance_extensions_support .EXT_display_surface_counter = true, .EXT_acquire_drm_display = true, #endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, +#endif }; static void diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 3f8b1970f44..d02410dda56 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -121,6 +121,9 @@ static const struct vk_instance_extension_table instance_extensions = { #endif #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT .EXT_acquire_xlib_display = true, +#endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, #endif .EXT_debug_report = true, .EXT_debug_utils = true, diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 4cb456bc747..7aa49801ced 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -189,7 +189,6 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) case nir_intrinsic_load_resume_shader_address_amd: case nir_intrinsic_load_global_const_block_intel: case nir_intrinsic_load_reloc_const_intel: - case nir_intrinsic_load_global_block_intel: case nir_intrinsic_load_btd_global_arg_addr_intel: case nir_intrinsic_load_btd_local_arg_addr_intel: case nir_intrinsic_load_mesh_inline_data_intel: @@ -219,6 +218,13 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) is_divergent = false; break; + /* This is divergent because it specifically loads sequential values into + * successive SIMD lanes. + */ + case nir_intrinsic_load_global_block_intel: + is_divergent = true; + break; + case nir_intrinsic_decl_reg: is_divergent = nir_intrinsic_divergent(instr); break; diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index 4105c2bfe4d..00d53e9b3fe 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -73,6 +73,7 @@ static const struct dri2_wl_visual { */ int alt_dri_image_format; int bpp; + int opaque_wl_drm_format; int rgba_shifts[4]; unsigned int rgba_sizes[4]; } dri2_wl_visuals[] = { @@ -83,6 +84,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ABGR16161616F, 0, 64, + WL_DRM_FORMAT_XBGR16F, {0, 16, 32, 48}, {16, 16, 16, 16}, }, @@ -93,6 +95,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XBGR16161616F, 0, 64, + WL_DRM_FORMAT_XBGR16F, {0, 16, 32, -1}, {16, 16, 16, 0}, }, @@ -103,6 +106,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XRGB2101010, __DRI_IMAGE_FORMAT_XBGR2101010, 32, + WL_DRM_FORMAT_XRGB2101010, {20, 10, 0, -1}, {10, 10, 10, 0}, }, @@ -113,6 +117,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ARGB2101010, __DRI_IMAGE_FORMAT_ABGR2101010, 32, + WL_DRM_FORMAT_XRGB2101010, {20, 10, 0, 30}, {10, 10, 10, 2}, }, @@ -123,6 +128,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XBGR2101010, __DRI_IMAGE_FORMAT_XRGB2101010, 32, + WL_DRM_FORMAT_XBGR2101010, {0, 10, 20, -1}, {10, 10, 10, 0}, }, @@ -133,6 +139,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ABGR2101010, __DRI_IMAGE_FORMAT_ARGB2101010, 32, + WL_DRM_FORMAT_XBGR2101010, {0, 10, 20, 30}, {10, 10, 10, 2}, }, @@ -143,6 +150,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XRGB8888, __DRI_IMAGE_FORMAT_NONE, 32, + WL_DRM_FORMAT_XRGB8888, {16, 8, 0, -1}, {8, 8, 8, 0}, }, @@ -153,6 +161,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ARGB8888, __DRI_IMAGE_FORMAT_NONE, 32, + WL_DRM_FORMAT_XRGB8888, {16, 8, 0, 24}, {8, 8, 8, 8}, }, @@ -163,6 +172,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ABGR8888, __DRI_IMAGE_FORMAT_NONE, 32, + WL_DRM_FORMAT_XBGR8888, {0, 8, 16, 24}, {8, 8, 8, 8}, }, @@ -173,6 +183,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XBGR8888, __DRI_IMAGE_FORMAT_NONE, 32, + WL_DRM_FORMAT_XBGR8888, {0, 8, 16, -1}, {8, 8, 8, 0}, }, @@ -183,6 +194,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_RGB565, __DRI_IMAGE_FORMAT_NONE, 16, + WL_DRM_FORMAT_RGB565, {11, 5, 0, -1}, {5, 6, 5, 0}, }, @@ -193,6 +205,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ARGB1555, __DRI_IMAGE_FORMAT_ABGR1555, 16, + WL_DRM_FORMAT_XRGB1555, {10, 5, 0, 15}, {5, 5, 5, 1}, }, @@ -203,6 +216,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XRGB1555, __DRI_IMAGE_FORMAT_XBGR1555, 16, + WL_DRM_FORMAT_XRGB1555, {10, 5, 0, -1}, {5, 5, 5, 0}, }, @@ -213,6 +227,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ARGB4444, __DRI_IMAGE_FORMAT_XBGR4444, 16, + WL_DRM_FORMAT_XRGB4444, {8, 4, 0, 12}, {4, 4, 4, 4}, }, @@ -223,6 +238,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XRGB4444, __DRI_IMAGE_FORMAT_XBGR4444, 16, + WL_DRM_FORMAT_XRGB4444, {8, 4, 0, -1}, {4, 4, 4, 0}, }, @@ -230,7 +246,7 @@ static const struct dri2_wl_visual { static int dri2_wl_visual_idx_from_config(struct dri2_egl_display *dri2_dpy, - const __DRIconfig *config, bool force_opaque) + const __DRIconfig *config) { int shifts[4]; unsigned int sizes[4]; @@ -240,16 +256,13 @@ dri2_wl_visual_idx_from_config(struct dri2_egl_display *dri2_dpy, for (unsigned int i = 0; i < ARRAY_SIZE(dri2_wl_visuals); i++) { const struct dri2_wl_visual *wl_visual = &dri2_wl_visuals[i]; - int cmp_rgb_shifts = - memcmp(shifts, wl_visual->rgba_shifts, 3 * sizeof(shifts[0])); - int cmp_rgb_sizes = - memcmp(sizes, wl_visual->rgba_sizes, 3 * sizeof(sizes[0])); + int cmp_rgba_shifts = + memcmp(shifts, wl_visual->rgba_shifts, 4 * sizeof(shifts[0])); + int cmp_rgba_sizes = + memcmp(sizes, wl_visual->rgba_sizes, 4 * sizeof(sizes[0])); - if (cmp_rgb_shifts == 0 && cmp_rgb_sizes == 0 && - wl_visual->rgba_shifts[3] == (force_opaque ? -1 : shifts[3]) && - wl_visual->rgba_sizes[3] == (force_opaque ? 0 : sizes[3])) { + if (cmp_rgba_shifts == 0 && cmp_rgba_sizes == 0) return i; - } } return -1; @@ -302,7 +315,7 @@ dri2_wl_is_format_supported(void *user_data, uint32_t format) for (int i = 0; dri2_dpy->driver_configs[i]; i++) if (j == dri2_wl_visual_idx_from_config( - dri2_dpy, dri2_dpy->driver_configs[i], false)) + dri2_dpy, dri2_dpy->driver_configs[i])) return true; return false; @@ -710,43 +723,10 @@ dri2_wl_create_window_surface(_EGLDisplay *disp, _EGLConfig *conf, dri2_surf->base.Width = window->width; dri2_surf->base.Height = window->height; -#ifndef NDEBUG - /* Enforce that every visual has an opaque variant (requirement to support - * EGL_EXT_present_opaque) - */ - for (unsigned int i = 0; i < ARRAY_SIZE(dri2_wl_visuals); i++) { - const struct dri2_wl_visual *transparent_visual = &dri2_wl_visuals[i]; - if (transparent_visual->rgba_sizes[3] == 0) { - continue; - } - - bool found_opaque_equivalent = false; - for (unsigned int j = 0; j < ARRAY_SIZE(dri2_wl_visuals); j++) { - const struct dri2_wl_visual *opaque_visual = &dri2_wl_visuals[j]; - if (opaque_visual->rgba_sizes[3] != 0) { - continue; - } - - int cmp_rgb_shifts = - memcmp(transparent_visual->rgba_shifts, opaque_visual->rgba_shifts, - 3 * sizeof(opaque_visual->rgba_shifts[0])); - int cmp_rgb_sizes = - memcmp(transparent_visual->rgba_sizes, opaque_visual->rgba_sizes, - 3 * sizeof(opaque_visual->rgba_sizes[0])); - - if (cmp_rgb_shifts == 0 && cmp_rgb_sizes == 0) { - found_opaque_equivalent = true; - break; - } - } - - assert(found_opaque_equivalent); - } -#endif - - visual_idx = dri2_wl_visual_idx_from_config(dri2_dpy, config, - dri2_surf->base.PresentOpaque); + visual_idx = dri2_wl_visual_idx_from_config(dri2_dpy, config); assert(visual_idx != -1); + assert(dri2_wl_visuals[visual_idx].dri_image_format != + __DRI_IMAGE_FORMAT_NONE); if (dri2_dpy->wl_dmabuf || dri2_dpy->wl_drm) { dri2_surf->format = dri2_wl_visuals[visual_idx].wl_drm_format; @@ -1501,6 +1481,9 @@ create_wl_buffer(struct dri2_egl_display *dri2_dpy, close(fd); } + if (dri2_surf && dri2_surf->base.PresentOpaque) + fourcc = dri2_wl_visuals[visual_idx].opaque_wl_drm_format; + ret = zwp_linux_buffer_params_v1_create_immed(params, width, height, fourcc, 0); zwp_linux_buffer_params_v1_destroy(params); @@ -2084,7 +2067,7 @@ dri2_wl_add_configs_for_visuals(_EGLDisplay *disp) /* No match for config. Try if we can blitImage convert to a visual */ c = dri2_wl_visual_idx_from_config(dri2_dpy, - dri2_dpy->driver_configs[i], false); + dri2_dpy->driver_configs[i]); if (c == -1) continue; diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 96af807661f..7c0569ad361 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -120,6 +120,9 @@ static const struct vk_instance_extension_table tu_instance_extensions_supported #ifdef VK_USE_PLATFORM_DISPLAY_KHR .EXT_direct_mode_display = true, .EXT_display_surface_counter = true, +#endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, #endif .EXT_swapchain_colorspace = TU_HAS_SURFACE, } }; diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c index 9d0aa41e342..86ab2457c93 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c @@ -769,7 +769,17 @@ static void radeon_vcn_enc_av1_get_param(struct radeon_encoder *enc, pic->seq.num_temporal_layers : RENCODE_MAX_NUM_TEMPORAL_LAYERS; /* 1, 2 layer needs 1 reference, and 3, 4 layer needs 2 references */ - enc->base.max_references = (enc_pic->num_temporal_layers + 1) / 2; + enc->base.max_references = (enc_pic->num_temporal_layers + 1) / 2 + + RENCODE_VCN4_AV1_MAX_NUM_LTR; + for (int i = 0; i < RENCDOE_AV1_REFS_PER_FRAME; i++) + enc_pic->av1_ref_frame_idx[i] = pic->ref_frame_idx[i]; + + for (int i = 0; i < RENCDOE_AV1_NUM_REF_FRAMES; i++) + enc_pic->av1_ref_list[i] = pic->ref_list[i]; + + enc_pic->av1_recon_frame = pic->recon_frame; + enc_pic->av1_ref_frame_ctrl_l0 = pic->ref_frame_ctrl_l0; + radeon_vcn_enc_quality_modes(enc, &pic->quality_modes); enc_pic->frame_id_numbers_present = pic->seq.seq_bits.frame_id_number_present_flag; enc_pic->enable_error_resilient_mode = pic->error_resilient_mode; @@ -785,6 +795,7 @@ static void radeon_vcn_enc_av1_get_param(struct radeon_encoder *enc, enc_pic->disable_screen_content_tools = !pic->allow_screen_content_tools; enc_pic->is_obu_frame = pic->enable_frame_obu; enc_pic->need_av1_seq = (pic->frame_type == PIPE_AV1_ENC_FRAME_TYPE_KEY); + enc_pic->av1_mark_long_term_reference = pic->long_term_reference; radeon_vcn_enc_av1_get_spec_misc_param(enc, pic); radeon_vcn_enc_av1_timing_info(enc, pic); diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.h b/src/gallium/drivers/radeonsi/radeon_vcn_enc.h index 114315ac948..b3b18080207 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.h +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.h @@ -124,6 +124,7 @@ struct radeon_enc_pic { uint32_t is_obu_frame:1; uint32_t stream_obu_frame:1; /* all frames have the same number of tiles */ uint32_t need_av1_seq:1; + uint32_t av1_mark_long_term_reference:1; }; uint32_t render_width; uint32_t render_height; @@ -131,6 +132,7 @@ struct radeon_enc_pic { enum pipe_av1_enc_frame_type last_frame_type; uint32_t display_frame_id; uint32_t frame_id; + uint32_t temporal_seq_num; uint32_t order_hint; uint32_t order_hint_bits; uint32_t refresh_frame_flags; @@ -146,6 +148,12 @@ struct radeon_enc_pic { uint32_t count_last_layer; rvcn_enc_av1_ref_frame_t frames[RENCDOE_AV1_NUM_REF_FRAMES]; rvcn_enc_av1_recon_slot_t recon_slots[RENCDOE_AV1_NUM_REF_FRAMES + 1]; + uint8_t av1_ref_frame_idx[RENCDOE_AV1_REFS_PER_FRAME]; + void *av1_ref_list[RENCDOE_AV1_NUM_REF_FRAMES]; + void *av1_recon_frame; + uint32_t av1_ref_frame_ctrl_l0; + uint32_t av1_ref_frame_ctrl_l1; + uint32_t av1_ltr_seq; }; rvcn_enc_session_info_t session_info; diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c index 5255922a555..05a3aa2f055 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c @@ -191,10 +191,14 @@ static void redeon_enc_av1_release_recon_slot(struct radeon_encoder *enc, static uint32_t radeon_enc_av1_alloc_curr_frame(struct radeon_encoder *enc, uint32_t frame_id, uint32_t temporal_id, + uint32_t mark_long_term, /* mark it as long term reference */ + void *frame_signature, enum pipe_av1_enc_frame_type frame_type) { uint32_t i = 0; + assert(frame_signature); + for (i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) { rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i]; if (!frame->in_use) { @@ -203,6 +207,10 @@ static uint32_t radeon_enc_av1_alloc_curr_frame(struct radeon_encoder *enc, frame->temporal_id = temporal_id; frame->slot_id = radeon_enc_av1_alloc_recon_slot(enc); frame->frame_type = frame_type; + frame->frame_signature = frame_signature; + frame->is_ltr = !!(mark_long_term); + if (frame->is_ltr) + frame->ltr_seq = enc->enc_pic.av1_ltr_seq++; break; } } @@ -251,6 +259,33 @@ static void radeon_enc_av1_pre_scan_frames(struct radeon_encoder *enc, { uint32_t i = 0; + /* checking long term frames if it reached the limit, it needs to + * release the oldest. */ + if (enc->enc_pic.av1_mark_long_term_reference) { + int cnt = 0; + uint32_t min_seq = (uint32_t)-1; + uint32_t min_seq_idx = 0; + for (i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) { + rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i]; + if (frame->in_use && frame->is_ltr) { + if (frame->ltr_seq < min_seq) { + min_seq = frame->ltr_seq; + min_seq_idx = i; + } + cnt++; + + /* this means some LTR ref buffer has been re-used. */ + if (enc->enc_pic.av1_recon_frame == frame->frame_signature) + RVID_ERR("recon duplicated! it could refer to a wrong frame!\n"); + } + } + /* release the frame with minimum ltr seq number (oldest), + * this check is happening on each frame, the total number + * of LTR is limited by RENCODE_VCN4_AV1_MAX_NUM_LTR.*/ + if (cnt > RENCODE_VCN4_AV1_MAX_NUM_LTR) + radeon_enc_av1_release_ref_frame(enc, min_seq_idx, false); + } + for (i = 0; i < ARRAY_SIZE(enc->enc_pic.recon_slots); i++) { rvcn_enc_av1_recon_slot_t *slot = &enc->enc_pic.recon_slots[i]; if (slot->in_use && slot->is_orphaned) { @@ -264,39 +299,102 @@ static void radeon_enc_av1_pre_scan_frames(struct radeon_encoder *enc, if (frame->in_use) { if (temporal_id < frame->temporal_id) radeon_enc_av1_release_ref_frame(enc, i, false); - else if (temporal_id == frame->temporal_id) + else if (temporal_id == frame->temporal_id && (!frame->is_ltr)) radeon_enc_av1_release_ref_frame(enc, i, true); } } } +static bool radeon_enc_av1_search_requested_reference( + struct radeon_encoder *enc, + uint32_t *idx) +{ + bool find = false; + /* Here is the assumption, the 3rd item of ref_frame_ctrl_l0 + indicates which slot it needs to find in ref_frame_idx[], and + from ref_frame_idx to find the requested reference frame + in ref_list[] */ + #define RENCODE_AV1_REF_CTRL_L0_THIRD_ITEM (0x1c0) /* 111 000 000 */ + uint32_t marked_ref_frame_idx = (RENCODE_AV1_REF_CTRL_L0_THIRD_ITEM & + enc->enc_pic.av1_ref_frame_ctrl_l0) >> 6; + /* valid marked_ref_frame_idx > 0 */ + if (marked_ref_frame_idx) { + uint32_t requested_frame_idx = + enc->enc_pic.av1_ref_frame_idx[marked_ref_frame_idx - 1]; + void *request_signature = NULL; + + if (requested_frame_idx >= RENCDOE_AV1_NUM_REF_FRAMES) + goto end; + + request_signature = enc->enc_pic.av1_ref_list[requested_frame_idx]; + for (uint32_t i = 0; i < ARRAY_SIZE((enc->enc_pic.frames)); i++) { + rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i]; + if (frame->in_use && + frame->is_ltr && + (request_signature == frame->frame_signature)) { + find = true; + /* increase the frame seq number after found, when it + * reaches the maximum limit, this found one will not + * be released. */ + frame->ltr_seq = enc->enc_pic.av1_ltr_seq++; + *idx = i; + break; + } + } + } +end: + return find; +} + static uint32_t radeon_enc_av1_obtain_ref0_frame(struct radeon_encoder *enc, uint32_t temporal_id) { + /* when only ltr frames in DPB, it needs to use the biggest ltr_seq + * one (latest) for reference, instead of the first one met. */ uint32_t i = 0; + uint32_t ret_idx = 0; + uint32_t max_seq = 0; + uint32_t max_seq_idx = 0; + for (i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) { + rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i]; + if (frame->in_use && frame->is_ltr && (frame->ltr_seq >= max_seq)) { + max_seq = frame->ltr_seq; + max_seq_idx = i; + } + } for (i = ARRAY_SIZE(enc->enc_pic.frames); i > 0; i--) { rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i - 1]; - if (frame->in_use && frame->temporal_id <= temporal_id) + if (frame->in_use && frame->temporal_id <= temporal_id) { + if (frame->is_ltr) + ret_idx = max_seq_idx; + else + ret_idx = i - 1; + break; + } } - /* not find, ref = 0, or ref = i - 1 */ - return i == 0 ? i : i - 1; + return ret_idx; } static void radeon_enc_reset_av1_dpb_frames(struct radeon_encoder *enc) { - for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) { - enc->enc_pic.frames[i].in_use = false; - enc->enc_pic.frames[i].frame_id = 0; - enc->enc_pic.frames[i].temporal_id = 0; - enc->enc_pic.frames[i].slot_id = 0; - enc->enc_pic.frames[i].frame_type = 0; - } + for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) + enc->enc_pic.frames[i] = (rvcn_enc_av1_ref_frame_t) { + .in_use = false, + .is_ltr = false, + .ltr_seq = 0, + .frame_id = 0, + .temporal_id = 0, + .slot_id = 0, + .frame_type = 0, + .frame_signature = NULL, + }; - for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.recon_slots); i++) { - enc->enc_pic.recon_slots[i].in_use = false; - enc->enc_pic.recon_slots[i].is_orphaned = false; - } + for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.recon_slots); i++) + enc->enc_pic.recon_slots[i] = (rvcn_enc_av1_recon_slot_t) { + .in_use = false, + .is_orphaned = false, + }; } static void radeon_enc_av1_dpb_management(struct radeon_encoder *enc) @@ -304,21 +402,35 @@ static void radeon_enc_av1_dpb_management(struct radeon_encoder *enc) struct radeon_enc_pic *pic = &enc->enc_pic; uint32_t current_slot; uint32_t ref_slot; + uint32_t request_idx; + bool find = false; if (pic->frame_type == PIPE_AV1_ENC_FRAME_TYPE_KEY) { pic->frame_id = 0; + pic->temporal_seq_num = 0; pic->temporal_id = 0; pic->reference_delta_frame_id = 0; pic->reference_frame_index = 0; pic->last_frame_type = PIPE_AV1_ENC_FRAME_TYPE_KEY; + pic->av1_ltr_seq = 0; current_slot = 0; ref_slot = 0; + request_idx = 0; radeon_enc_reset_av1_dpb_frames(enc); } else { - pic->temporal_id = radeon_enc_av1_calculate_temporal_id(pic->frame_id, + find = radeon_enc_av1_search_requested_reference(enc, &request_idx); + if (pic->av1_mark_long_term_reference || find) + pic->temporal_seq_num = 0; /*for ltr, always temporal_id = 0 */ + else + pic->temporal_seq_num++; + + pic->temporal_id = radeon_enc_av1_calculate_temporal_id(pic->temporal_seq_num, pic->num_temporal_layers - 1); - pic->reference_frame_index = - radeon_enc_av1_obtain_ref0_frame(enc, pic->temporal_id); + if (find) + pic->reference_frame_index = request_idx; + else + pic->reference_frame_index = + radeon_enc_av1_obtain_ref0_frame(enc, pic->temporal_id); ref_slot = pic->frames[pic->reference_frame_index].slot_id; pic->last_frame_type = pic->frames[pic->reference_frame_index].frame_type; radeon_enc_av1_pre_scan_frames(enc, pic->temporal_id); @@ -336,6 +448,8 @@ static void radeon_enc_av1_dpb_management(struct radeon_encoder *enc) pic->frames[pic->reference_frame_index].frame_id; current_slot = radeon_enc_av1_alloc_curr_frame(enc, pic->frame_id, pic->temporal_id, + pic->av1_mark_long_term_reference, + pic->av1_recon_frame, pic->frame_type); if (pic->frame_type == PIPE_AV1_ENC_FRAME_TYPE_KEY || pic->frame_type == PIPE_AV1_ENC_FRAME_TYPE_SWITCH || diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index f982d6dc64f..477654f8bcc 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -88,6 +88,9 @@ static const struct vk_instance_extension_table lvp_instance_extensions_supporte #ifdef VK_USE_PLATFORM_XLIB_KHR .KHR_xlib_surface = true, #endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, +#endif }; static const struct vk_device_extension_table lvp_device_extensions_supported = { diff --git a/src/gallium/frontends/va/picture_av1_enc.c b/src/gallium/frontends/va/picture_av1_enc.c index e34190f6266..3eb119738aa 100644 --- a/src/gallium/frontends/va/picture_av1_enc.c +++ b/src/gallium/frontends/va/picture_av1_enc.c @@ -133,6 +133,7 @@ VAStatus vlVaHandleVAEncSequenceParameterBufferTypeAV1(vlVaDriver *drv, vlVaCont VAStatus vlVaHandleVAEncPictureParameterBufferTypeAV1(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf) { VAEncPictureParameterBufferAV1 *av1 = buf->data; + struct pipe_video_buffer *video_buf = NULL; vlVaBuffer *coded_buf; int i; @@ -142,6 +143,7 @@ VAStatus vlVaHandleVAEncPictureParameterBufferTypeAV1(vlVaDriver *drv, vlVaConte context->desc.av1enc.enable_frame_obu = av1->picture_flags.bits.enable_frame_obu; context->desc.av1enc.allow_high_precision_mv = av1->picture_flags.bits.allow_high_precision_mv; context->desc.av1enc.palette_mode_enable = av1->picture_flags.bits.palette_mode_enable; + context->desc.av1enc.long_term_reference = av1->picture_flags.bits.long_term_reference; context->desc.av1enc.num_tiles_in_pic = av1->tile_cols * av1->tile_rows; context->desc.av1enc.tile_rows = av1->tile_rows; context->desc.av1enc.tile_cols = av1->tile_cols; @@ -157,14 +159,14 @@ VAStatus vlVaHandleVAEncPictureParameterBufferTypeAV1(vlVaDriver *drv, vlVaConte /* The last tile column or row size needs to be derived. */ for (uint8_t i = 0 ; i < ARRAY_SIZE(av1->width_in_sbs_minus_1); i++) context->desc.av1enc.width_in_sbs_minus_1[i] = av1->width_in_sbs_minus_1[i]; - + /* The last tile column or row size needs to be derived. */ for (uint8_t i = 0 ; i < ARRAY_SIZE(av1->height_in_sbs_minus_1); i++) context->desc.av1enc.height_in_sbs_minus_1[i] = av1->height_in_sbs_minus_1[i]; context->desc.av1enc.cdef.cdef_damping_minus_3 = av1->cdef_damping_minus_3; context->desc.av1enc.cdef.cdef_bits = av1->cdef_bits; - + for (uint8_t i = 0 ; i < ARRAY_SIZE(av1->cdef_y_strengths); i++) context->desc.av1enc.cdef.cdef_y_strengths[i] = av1->cdef_y_strengths[i]; @@ -226,7 +228,7 @@ VAStatus vlVaHandleVAEncPictureParameterBufferTypeAV1(vlVaDriver *drv, vlVaConte context->desc.av1enc.rc[i].min_qp = av1->min_base_qindex ? av1->min_base_qindex : 1; context->desc.av1enc.rc[i].max_qp = av1->max_base_qindex ? av1->max_base_qindex : 255; /* Distinguishes from the default params set for these values and app specific params passed down */ - context->desc.av1enc.rc[i].app_requested_qp_range = + context->desc.av1enc.rc[i].app_requested_qp_range = ((context->desc.av1enc.rc[i].max_qp != AV1_MAX_QP_DEFAULT) || (context->desc.av1enc.rc[i].min_qp != AV1_MIN_QP_DEFAULT)); } @@ -250,7 +252,25 @@ VAStatus vlVaHandleVAEncPictureParameterBufferTypeAV1(vlVaDriver *drv, vlVaConte if (context->desc.av1enc.frame_type == FRAME_TYPE_KEY_FRAME) context->desc.av1enc.last_key_frame_num = context->desc.av1enc.frame_num; - for (uint8_t i = 0 ; i < ARRAY_SIZE(av1->ref_frame_idx); i++) + if (av1->reconstructed_frame != VA_INVALID_ID) { + vlVaGetReferenceFrame(drv, av1->reconstructed_frame, &video_buf); + context->desc.av1enc.recon_frame = video_buf; + } + else + context->desc.av1enc.recon_frame = NULL; + + for (int i = 0 ; i < ARRAY_SIZE(context->desc.av1enc.ref_list); i++) { + if (av1->reference_frames[i] != VA_INVALID_ID) { + vlVaGetReferenceFrame(drv, av1->reference_frames[i], &video_buf); + context->desc.av1enc.ref_list[i] = video_buf; + } + else + context->desc.av1enc.ref_list[i] = NULL; + } + + context->desc.av1enc.ref_frame_ctrl_l0 = av1->ref_frame_ctrl_l0.value; + + for (int i = 0 ; i < ARRAY_SIZE(av1->ref_frame_idx); i++) context->desc.av1enc.ref_frame_idx[i] = av1->ref_frame_idx[i]; /* Initialize slice descriptors for this picture */ @@ -817,7 +837,7 @@ void getEncParamPresetAV1(vlVaContext *context) VAStatus vlVaHandleVAEncSliceParameterBufferTypeAV1(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf) { VAEncTileGroupBufferAV1 *tile_buf = (VAEncTileGroupBufferAV1*) buf->data; - + if (context->desc.av1enc.num_tile_groups < ARRAY_SIZE(context->desc.av1enc.tile_groups)) { context->desc.av1enc.tile_groups[context->desc.av1enc.num_tile_groups].tile_group_start = tile_buf->tg_start; context->desc.av1enc.tile_groups[context->desc.av1enc.num_tile_groups].tile_group_end = tile_buf->tg_end; @@ -825,7 +845,7 @@ VAStatus vlVaHandleVAEncSliceParameterBufferTypeAV1(vlVaDriver *drv, vlVaContext } else { return VA_STATUS_ERROR_NOT_ENOUGH_BUFFER; } - + return VA_STATUS_SUCCESS; } #endif /* VA_CHECK_VERSION(1, 16, 0) */ diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 1afa57dadf4..a6592dd2722 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -996,6 +996,7 @@ struct pipe_av1_enc_picture_desc uint32_t use_superres:1; uint32_t reduced_tx_set:1; uint32_t skip_mode_present:1; + uint32_t long_term_reference:1; }; struct pipe_enc_quality_modes quality_modes; struct pipe_enc_intra_refresh intra_refresh; @@ -1028,6 +1029,9 @@ struct pipe_av1_enc_picture_desc uint32_t primary_ref_frame; uint8_t refresh_frame_flags; uint8_t ref_frame_idx[7]; + uint32_t ref_frame_ctrl_l0; /* forward prediction only */ + void *ref_list[8]; /* for tracking ref frames */ + void *recon_frame; struct { uint8_t cdef_damping_minus_3; diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c index 34f1fa19378..ab7ff36420c 100644 --- a/src/imagination/vulkan/pvr_device.c +++ b/src/imagination/vulkan/pvr_device.c @@ -155,6 +155,9 @@ static const struct vk_instance_extension_table pvr_instance_extensions = { .KHR_get_physical_device_properties2 = true, .KHR_get_surface_capabilities2 = PVR_USE_WSI_PLATFORM, .KHR_surface = PVR_USE_WSI_PLATFORM, +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = PVR_USE_WSI_PLATFORM, +#endif .EXT_debug_report = true, .EXT_debug_utils = true, }; diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 02010295343..d486c40ca32 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -208,6 +208,9 @@ static const struct vk_instance_extension_table instance_extensions = { .EXT_display_surface_counter = true, .EXT_acquire_drm_display = true, #endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, +#endif }; static void diff --git a/src/intel/vulkan_hasvk/anv_device.c b/src/intel/vulkan_hasvk/anv_device.c index 2819dd634b2..fce5bc26b5a 100644 --- a/src/intel/vulkan_hasvk/anv_device.c +++ b/src/intel/vulkan_hasvk/anv_device.c @@ -183,6 +183,9 @@ static const struct vk_instance_extension_table instance_extensions = { .EXT_display_surface_counter = true, .EXT_acquire_drm_display = true, #endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, +#endif }; static void diff --git a/src/microsoft/vulkan/dzn_device.c b/src/microsoft/vulkan/dzn_device.c index be9b0ee7014..baccaf21e39 100644 --- a/src/microsoft/vulkan/dzn_device.c +++ b/src/microsoft/vulkan/dzn_device.c @@ -96,6 +96,9 @@ static const struct vk_instance_extension_table instance_extensions = { #endif #ifdef VK_USE_PLATFORM_XLIB_KHR .KHR_xlib_surface = true, +#endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, #endif .EXT_debug_report = true, .EXT_debug_utils = true, diff --git a/src/nouveau/vulkan/nvk_instance.c b/src/nouveau/vulkan/nvk_instance.c index 2fcea916fe5..d1259dae2e0 100644 --- a/src/nouveau/vulkan/nvk_instance.c +++ b/src/nouveau/vulkan/nvk_instance.c @@ -40,6 +40,9 @@ static const struct vk_instance_extension_table instance_extensions = { #endif #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT .EXT_acquire_xlib_display = true, +#endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, #endif .KHR_device_group_creation = true, .KHR_external_fence_capabilities = true, diff --git a/src/panfrost/vulkan/panvk_device.c b/src/panfrost/vulkan/panvk_device.c index ccf92a17fbe..37c548d8e80 100644 --- a/src/panfrost/vulkan/panvk_device.c +++ b/src/panfrost/vulkan/panvk_device.c @@ -146,6 +146,9 @@ static const struct vk_instance_extension_table panvk_instance_extensions = { #ifdef VK_USE_PLATFORM_WAYLAND_KHR .KHR_wayland_surface = true, #endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, +#endif }; static void diff --git a/src/virtio/vulkan/vn_instance.c b/src/virtio/vulkan/vn_instance.c index 06627aeeb5b..8fd6d71ef35 100644 --- a/src/virtio/vulkan/vn_instance.c +++ b/src/virtio/vulkan/vn_instance.c @@ -49,6 +49,9 @@ static const struct vk_instance_extension_table #endif #ifdef VK_USE_PLATFORM_XLIB_KHR .KHR_xlib_surface = true, +#endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, #endif };