From d990462c9a026e4b4980edddb5ab03b0373b9982 Mon Sep 17 00:00:00 2001 From: Denis Date: Wed, 7 Feb 2024 23:08:45 +0100 Subject: [PATCH 01/25] .gitignore .cache --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5a7eb0ed273..c533f037efd 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ *.pyo *.out /build +/.cache \ No newline at end of file From 13d77565a97b50b4f85669d66950708ce15d4fbf Mon Sep 17 00:00:00 2001 From: Denis Date: Wed, 7 Feb 2024 14:55:00 +0100 Subject: [PATCH 02/25] Add build script --- build.sh | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100755 build.sh diff --git a/build.sh b/build.sh new file mode 100755 index 00000000000..30b7fd71fc3 --- /dev/null +++ b/build.sh @@ -0,0 +1,58 @@ +meson_options=( + --cross-file lib32 + -D android-libbacktrace=disabled + -D b_ndebug=true + -D dri3=enabled + -D egl=enabled + -D gallium-drivers=radeonsi,virgl,svga,swrast,crocus,zink + -D gallium-extra-hud=true + -D gallium-nine=true + -D gallium-omx=disabled + -D gallium-opencl=icd + -D gallium-rusticl=true + -D gallium-va=enabled + -D gallium-vdpau=enabled + -D gallium-xa=enabled + -D gbm=enabled + -D gles1=disabled + -D gles2=enabled + -D glvnd=true + -D glx=dri + -D intel-clc=enabled + -D libunwind=disabled + -D llvm=enabled + -D lmsensors=enabled + -D microsoft-clc=disabled + -D osmesa=true + -D platforms=x11,wayland + -D rust_std=2021 + -D shared-glapi=enabled + -D valgrind=disabled + -D video-codecs=vc1dec,h264dec,h264enc,h265dec,h265enc + -D vulkan-drivers=amd,swrast,virtio + -D vulkan-layers=device-select,overlay + -D vulkan-beta=true + -D opencl-spirv=true +) + +# Build only minimal debug info to reduce size +#CFLAGS+=' -g1' +#CXXFLAGS+=' -g1' + +export BINDGEN_EXTRA_CLANG_ARGS="-m32" + +arch-meson . build "${meson_options[@]}" +meson configure build --no-pager # Print config + +#if [ ! -f "build/build.ninja.bak" ]; then +# cp build/build.ninja build/build.ninja.back +#fi + +# Evil: Hack build to make proc-macro crate native +# Should become unnecessary with Meson 1.3 +#sed -e '/^rule rust_COMPILER$/irule rust_HACK\n command = rustc -C linker=gcc $ARGS $in\n deps = gcc\n depfile = $targetdep\n description = Compiling native Rust source $in\n' \ +# -e '/^build src\/gallium\/frontends\/rusticl\/librusticl_proc_macros\.so:/s/rust_COMPILER/rust_HACK/' \ +# -e '/^ LINK_ARGS =/s/ src\/gallium\/frontends\/rusticl\/librusticl_proc_macros\.so//' \ +# -i build/build.ninja + +$NINJAFLAGS meson compile -C build From 4d0048112703dff01d946dc7e3d336b6255e52fc Mon Sep 17 00:00:00 2001 From: Denis Date: Thu, 8 Feb 2024 23:06:03 +0100 Subject: [PATCH 03/25] Enable vp9dec as well as av1enc and av1dec --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 30b7fd71fc3..1ad850a1c0b 100755 --- a/build.sh +++ b/build.sh @@ -28,7 +28,7 @@ meson_options=( -D rust_std=2021 -D shared-glapi=enabled -D valgrind=disabled - -D video-codecs=vc1dec,h264dec,h264enc,h265dec,h265enc + -D video-codecs=vc1dec,h264dec,h264enc,h265dec,h265enc,av1enc,av1dec,vp9dec -D vulkan-drivers=amd,swrast,virtio -D vulkan-layers=device-select,overlay -D vulkan-beta=true From 8fe6a8d395cf2f63518911891fe08bb987864eac Mon Sep 17 00:00:00 2001 From: Friedrich Vock Date: Mon, 5 Feb 2024 19:10:44 +0100 Subject: [PATCH 04/25] radv/rt: Optimize update shader VGPR usage Brings VGPR allocation down from 72 (absolutely insane) to 32. We can now reach the theoretical maximum occupancy of 16 waves per SIMD. Part-of: --- src/amd/vulkan/bvh/update.comp | 49 +++++++++++++++++----------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/amd/vulkan/bvh/update.comp b/src/amd/vulkan/bvh/update.comp index 905f807ebe6..c3c740238f2 100644 --- a/src/amd/vulkan/bvh/update.comp +++ b/src/amd/vulkan/bvh/update.comp @@ -74,17 +74,9 @@ void main() { bool is_active; if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) { is_active = build_triangle(bounds, dst_ptr, args.geom_data, gl_GlobalInvocationID.x); - } else if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) { - VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset); - is_active = build_aabb(bounds, src_ptr, dst_ptr, args.geom_data.geometry_id, gl_GlobalInvocationID.x); } else { VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset); - /* arrayOfPointers */ - if (args.geom_data.stride == 8) { - src_ptr = DEREF(REF(VOID_REF)(src_ptr)); - } - - is_active = build_instance(bounds, src_ptr, dst_ptr, gl_GlobalInvocationID.x); + is_active = build_aabb(bounds, src_ptr, dst_ptr, args.geom_data.geometry_id, gl_GlobalInvocationID.x); } if (!is_active) @@ -110,10 +102,15 @@ void main() { gl_StorageSemanticsBuffer, gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); - radv_bvh_box32_node node = DEREF(REF(radv_bvh_box32_node)OFFSET(src_bvh, offset)); + REF(radv_bvh_box32_node) src_node = REF(radv_bvh_box32_node)OFFSET(src_bvh, offset); + REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)OFFSET(dst_bvh, offset); + uint32_t children[4]; + for (uint32_t i = 0; i < 4; ++i) + children[i] = DEREF(src_node).children[i]; + uint32_t valid_child_count = 0; for (uint32_t i = 0; i < 4; ++valid_child_count, ++i) - if (node.children[i] == RADV_BVH_INVALID_NODE) + if (children[i] == RADV_BVH_INVALID_NODE) break; /* Check if all children have been processed. As this is an atomic the last path coming from @@ -127,33 +124,37 @@ void main() { if (ready_child_count != valid_child_count - 1) break; + for (uint32_t i = 0; i < 4; ++i) + DEREF(dst_node).children[i] = children[i]; + for (uint32_t i = 0; i < valid_child_count; ++i) { - uint32_t child_offset = id_to_offset(node.children[i]); + uint32_t child_offset = id_to_offset(children[i]); + radv_aabb child_bounds; if (child_offset == dst_offset) - node.coords[i] = bounds; + child_bounds = bounds; else if (child_offset >= internal_nodes_offset) { - radv_aabb child_bounds = radv_aabb(vec3(INFINITY), vec3(-INFINITY)); - radv_bvh_box32_node child_node = DEREF(REF(radv_bvh_box32_node)OFFSET(dst_bvh, child_offset)); + child_bounds = radv_aabb(vec3(INFINITY), vec3(-INFINITY)); + REF(radv_bvh_box32_node) child_node = REF(radv_bvh_box32_node)OFFSET(dst_bvh, child_offset); for (uint32_t j = 0; j < 4; ++j) { - if (child_node.children[j] == RADV_BVH_INVALID_NODE) + if (DEREF(child_node).children[j] == RADV_BVH_INVALID_NODE) break; - child_bounds.min = min(child_bounds.min, child_node.coords[j].min); - child_bounds.max = max(child_bounds.max, child_node.coords[j].max); + child_bounds.min = min(child_bounds.min, DEREF(child_node).coords[j].min); + child_bounds.max = max(child_bounds.max, DEREF(child_node).coords[j].max); } - node.coords[i] = child_bounds; } else { uint32_t child_index = (child_offset - first_leaf_offset) / leaf_node_size; - node.coords[i] = DEREF(INDEX(radv_aabb, args.leaf_bounds, child_index)); + child_bounds = DEREF(INDEX(radv_aabb, args.leaf_bounds, child_index)); } - } - DEREF(REF(radv_bvh_box32_node)OFFSET(dst_bvh, offset)) = node; + DEREF(dst_node).coords[i] = child_bounds; + } if (parent_id == RADV_BVH_ROOT_NODE) { radv_aabb root_bounds = radv_aabb(vec3(INFINITY), vec3(-INFINITY)); for (uint32_t i = 0; i < valid_child_count; ++i) { - root_bounds.min = min(root_bounds.min, node.coords[i].min); - root_bounds.max = max(root_bounds.max, node.coords[i].max); + radv_aabb bounds = DEREF(dst_node).coords[i]; + root_bounds.min = min(root_bounds.min, bounds.min); + root_bounds.max = max(root_bounds.max, bounds.max); } DEREF(args.dst).aabb = root_bounds; } From 68b3e0c078b9d4e41f34d475ed519d25b4f9fb88 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 14 Feb 2024 16:54:50 +0000 Subject: [PATCH 05/25] radv: enable GS_FAST_LAUNCH=2 by default for RDNA3 APUs (Phoenix) GS_FAST_LAUNCH=1 shouldn't be used on GFX11 but it's still needed for dGPUs (eg. NAVI31) because it destroys performance for unknown reasons. On RDNA3 APUs, GS_FAST_LAUNCH=2 seems to be required for working mesh shaders and performance is fine. There is possibly a firmware bug on APUs that would explain why GS_FAST_LAUNCH=1 doesn't work on Phoenix. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10583 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10397 Cc: mesa-stable Signed-off-by: Samuel Pitoiset --- docs/envvars.rst | 2 ++ src/amd/vulkan/radv_device.c | 13 +++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/docs/envvars.rst b/docs/envvars.rst index 300f2cd40e1..b7ad3a87eb7 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -1340,6 +1340,8 @@ RADV driver environment variables rt extensions with older hardware. ``gewave32`` enable wave32 for vertex/tess/geometry shaders (GFX10+) + ``gsfastlaunch2`` + use GS_FAST_LAUNCH=2 for Mesh shaders (GFX11+ dGPUs only) ``localbos`` enable local BOs ``nosam`` diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 261cf4f6856..61dd5ebc791 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1001,8 +1001,17 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr device->pbb_allowed = device->physical_device->rad_info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING); - device->mesh_fast_launch_2 = (device->instance->perftest_flags & RADV_PERFTEST_GS_FAST_LAUNCH_2) && - device->physical_device->rad_info.gfx_level >= GFX11; + /* GS_FAST_LAUNCH=2 mode is supposed to be used on GFX11 but it turns + * out it has severe impact on performance for unknown reasons (tested on + * NAVI31 dGPU). It's disabled by default. + * + * On RDNA3 APUs (Phoenix) it turns GS_FAST_LAUNCH=1 doesn't work at all, + * and using mode2 fixes everything without any performance impact. + */ + device->mesh_fast_launch_2 = ((device->instance->perftest_flags & RADV_PERFTEST_GS_FAST_LAUNCH_2) && + device->physical_device->rad_info.gfx_level >= GFX11) || + device->physical_device->rad_info.family == CHIP_GFX1103_R1 || + device->physical_device->rad_info.family == CHIP_GFX1103_R2; device->disable_trunc_coord = device->instance->drirc.disable_trunc_coord; From 0c6696101e77bf28b10d26ce00791cca6870dc31 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Fri, 2 Feb 2024 22:35:11 +0000 Subject: [PATCH 06/25] radv: enable VK_EXT_headless_surface on all platforms except Windows Part-of: --- docs/features.txt | 1 + src/amd/vulkan/radv_instance.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/docs/features.txt b/docs/features.txt index 953696a58fc..e7bce13ae81 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -578,6 +578,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_global_priority DONE (anv, hasvk, radv, tu) VK_EXT_global_priority_query DONE (anv, hasvk, radv, tu) VK_EXT_graphics_pipeline_library DONE (anv, lvp, radv, tu, vn) + VK_EXT_headless_surface DONE (radv) VK_EXT_image_2d_view_of_3d DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_image_compression_control DONE (radv) VK_EXT_image_drm_format_modifier DONE (anv, hasvk, radv/gfx9+, tu, v3dv, vn) diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c index 6d0ba625385..0c735ef3fc7 100644 --- a/src/amd/vulkan/radv_instance.c +++ b/src/amd/vulkan/radv_instance.c @@ -292,6 +292,9 @@ static const struct vk_instance_extension_table radv_instance_extensions_support .EXT_display_surface_counter = true, .EXT_acquire_drm_display = true, #endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, +#endif }; static void From 5879220880b032346497d8e7ede950cd4390f4b9 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Fri, 2 Feb 2024 22:35:11 +0000 Subject: [PATCH 07/25] v3dv: enable VK_EXT_headless_surface on all platforms except Windows Part-of: --- docs/features.txt | 2 +- src/broadcom/vulkan/v3dv_device.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index e7bce13ae81..5f64d6f8c13 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -578,7 +578,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_global_priority DONE (anv, hasvk, radv, tu) VK_EXT_global_priority_query DONE (anv, hasvk, radv, tu) VK_EXT_graphics_pipeline_library DONE (anv, lvp, radv, tu, vn) - VK_EXT_headless_surface DONE (radv) + VK_EXT_headless_surface DONE (radv, v3dv) VK_EXT_image_2d_view_of_3d DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_image_compression_control DONE (radv) VK_EXT_image_drm_format_modifier DONE (anv, hasvk, radv/gfx9+, tu, v3dv, vn) diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 3f8b1970f44..d02410dda56 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -121,6 +121,9 @@ static const struct vk_instance_extension_table instance_extensions = { #endif #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT .EXT_acquire_xlib_display = true, +#endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, #endif .EXT_debug_report = true, .EXT_debug_utils = true, From a975b383206d27a6b89f1050cb2a1f72c1c31d9b Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Fri, 2 Feb 2024 22:35:11 +0000 Subject: [PATCH 08/25] tu: enable VK_EXT_headless_surface on all platforms except Windows Part-of: --- docs/features.txt | 2 +- src/freedreno/vulkan/tu_device.cc | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 5f64d6f8c13..b2b4b0e6abd 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -578,7 +578,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_global_priority DONE (anv, hasvk, radv, tu) VK_EXT_global_priority_query DONE (anv, hasvk, radv, tu) VK_EXT_graphics_pipeline_library DONE (anv, lvp, radv, tu, vn) - VK_EXT_headless_surface DONE (radv, v3dv) + VK_EXT_headless_surface DONE (radv, tu, v3dv) VK_EXT_image_2d_view_of_3d DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_image_compression_control DONE (radv) VK_EXT_image_drm_format_modifier DONE (anv, hasvk, radv/gfx9+, tu, v3dv, vn) diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 96af807661f..7c0569ad361 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -120,6 +120,9 @@ static const struct vk_instance_extension_table tu_instance_extensions_supported #ifdef VK_USE_PLATFORM_DISPLAY_KHR .EXT_direct_mode_display = true, .EXT_display_surface_counter = true, +#endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, #endif .EXT_swapchain_colorspace = TU_HAS_SURFACE, } }; From f0885ca4579481d178a9cb4426759e22a87fcf02 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Fri, 2 Feb 2024 22:35:11 +0000 Subject: [PATCH 09/25] anv: enable VK_EXT_headless_surface on all platforms except Windows Part-of: --- docs/features.txt | 2 +- src/intel/vulkan/anv_device.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index b2b4b0e6abd..531f0f26d51 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -578,7 +578,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_global_priority DONE (anv, hasvk, radv, tu) VK_EXT_global_priority_query DONE (anv, hasvk, radv, tu) VK_EXT_graphics_pipeline_library DONE (anv, lvp, radv, tu, vn) - VK_EXT_headless_surface DONE (radv, tu, v3dv) + VK_EXT_headless_surface DONE (anv, radv, tu, v3dv) VK_EXT_image_2d_view_of_3d DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_image_compression_control DONE (radv) VK_EXT_image_drm_format_modifier DONE (anv, hasvk, radv/gfx9+, tu, v3dv, vn) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 94bead94ce7..57b179fd0c4 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -207,6 +207,9 @@ static const struct vk_instance_extension_table instance_extensions = { .EXT_display_surface_counter = true, .EXT_acquire_drm_display = true, #endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, +#endif }; static void From 754fd7b104329617a9fefeb55e613d9323fcd75b Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Fri, 2 Feb 2024 22:35:11 +0000 Subject: [PATCH 10/25] hasvk: enable VK_EXT_headless_surface on all platforms except Windows Part-of: --- docs/features.txt | 2 +- src/intel/vulkan_hasvk/anv_device.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 531f0f26d51..adbc8c21f4c 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -578,7 +578,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_global_priority DONE (anv, hasvk, radv, tu) VK_EXT_global_priority_query DONE (anv, hasvk, radv, tu) VK_EXT_graphics_pipeline_library DONE (anv, lvp, radv, tu, vn) - VK_EXT_headless_surface DONE (anv, radv, tu, v3dv) + VK_EXT_headless_surface DONE (anv, hasvk, radv, tu, v3dv) VK_EXT_image_2d_view_of_3d DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_image_compression_control DONE (radv) VK_EXT_image_drm_format_modifier DONE (anv, hasvk, radv/gfx9+, tu, v3dv, vn) diff --git a/src/intel/vulkan_hasvk/anv_device.c b/src/intel/vulkan_hasvk/anv_device.c index 2819dd634b2..fce5bc26b5a 100644 --- a/src/intel/vulkan_hasvk/anv_device.c +++ b/src/intel/vulkan_hasvk/anv_device.c @@ -183,6 +183,9 @@ static const struct vk_instance_extension_table instance_extensions = { .EXT_display_surface_counter = true, .EXT_acquire_drm_display = true, #endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, +#endif }; static void From 77984b13337c60e136f9e5b78aec4055d7164fdd Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Fri, 2 Feb 2024 22:35:12 +0000 Subject: [PATCH 11/25] dzn: enable VK_EXT_headless_surface on all platforms except Windows Part-of: --- docs/features.txt | 2 +- src/microsoft/vulkan/dzn_device.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index adbc8c21f4c..51ae2797f93 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -578,7 +578,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_global_priority DONE (anv, hasvk, radv, tu) VK_EXT_global_priority_query DONE (anv, hasvk, radv, tu) VK_EXT_graphics_pipeline_library DONE (anv, lvp, radv, tu, vn) - VK_EXT_headless_surface DONE (anv, hasvk, radv, tu, v3dv) + VK_EXT_headless_surface DONE (anv, dzn, hasvk, radv, tu, v3dv) VK_EXT_image_2d_view_of_3d DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_image_compression_control DONE (radv) VK_EXT_image_drm_format_modifier DONE (anv, hasvk, radv/gfx9+, tu, v3dv, vn) diff --git a/src/microsoft/vulkan/dzn_device.c b/src/microsoft/vulkan/dzn_device.c index be9b0ee7014..baccaf21e39 100644 --- a/src/microsoft/vulkan/dzn_device.c +++ b/src/microsoft/vulkan/dzn_device.c @@ -96,6 +96,9 @@ static const struct vk_instance_extension_table instance_extensions = { #endif #ifdef VK_USE_PLATFORM_XLIB_KHR .KHR_xlib_surface = true, +#endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, #endif .EXT_debug_report = true, .EXT_debug_utils = true, From 600bd9e90571a31c95457721d011a1286ab90ca8 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Fri, 2 Feb 2024 22:35:12 +0000 Subject: [PATCH 12/25] nvk: enable VK_EXT_headless_surface on all platforms except Windows Part-of: --- docs/features.txt | 2 +- src/nouveau/vulkan/nvk_instance.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 51ae2797f93..4e0a33e6b4d 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -578,7 +578,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_global_priority DONE (anv, hasvk, radv, tu) VK_EXT_global_priority_query DONE (anv, hasvk, radv, tu) VK_EXT_graphics_pipeline_library DONE (anv, lvp, radv, tu, vn) - VK_EXT_headless_surface DONE (anv, dzn, hasvk, radv, tu, v3dv) + VK_EXT_headless_surface DONE (anv, dzn, hasvk, nvk, radv, tu, v3dv) VK_EXT_image_2d_view_of_3d DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_image_compression_control DONE (radv) VK_EXT_image_drm_format_modifier DONE (anv, hasvk, radv/gfx9+, tu, v3dv, vn) diff --git a/src/nouveau/vulkan/nvk_instance.c b/src/nouveau/vulkan/nvk_instance.c index 5340b1b6837..6dd9883f4c3 100644 --- a/src/nouveau/vulkan/nvk_instance.c +++ b/src/nouveau/vulkan/nvk_instance.c @@ -40,6 +40,9 @@ static const struct vk_instance_extension_table instance_extensions = { #endif #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT .EXT_acquire_xlib_display = true, +#endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, #endif .KHR_device_group_creation = true, .KHR_external_fence_capabilities = true, From ab7616e62029874bae95b3219a435e56c36a7f77 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Fri, 2 Feb 2024 22:35:12 +0000 Subject: [PATCH 13/25] panvk: enable VK_EXT_headless_surface on all platforms except Windows Part-of: --- docs/features.txt | 2 +- src/panfrost/vulkan/panvk_device.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 4e0a33e6b4d..91ba7d550b8 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -578,7 +578,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_global_priority DONE (anv, hasvk, radv, tu) VK_EXT_global_priority_query DONE (anv, hasvk, radv, tu) VK_EXT_graphics_pipeline_library DONE (anv, lvp, radv, tu, vn) - VK_EXT_headless_surface DONE (anv, dzn, hasvk, nvk, radv, tu, v3dv) + VK_EXT_headless_surface DONE (anv, dzn, hasvk, nvk, panvk, radv, tu, v3dv) VK_EXT_image_2d_view_of_3d DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_image_compression_control DONE (radv) VK_EXT_image_drm_format_modifier DONE (anv, hasvk, radv/gfx9+, tu, v3dv, vn) diff --git a/src/panfrost/vulkan/panvk_device.c b/src/panfrost/vulkan/panvk_device.c index ccf92a17fbe..37c548d8e80 100644 --- a/src/panfrost/vulkan/panvk_device.c +++ b/src/panfrost/vulkan/panvk_device.c @@ -146,6 +146,9 @@ static const struct vk_instance_extension_table panvk_instance_extensions = { #ifdef VK_USE_PLATFORM_WAYLAND_KHR .KHR_wayland_surface = true, #endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, +#endif }; static void From 4f25221893ec5983fb2c4d727fb426d979ae831b Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Fri, 2 Feb 2024 22:35:12 +0000 Subject: [PATCH 14/25] vn: enable VK_EXT_headless_surface on all platforms except Windows Part-of: --- docs/features.txt | 2 +- src/virtio/vulkan/vn_instance.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 91ba7d550b8..9156075a703 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -578,7 +578,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_global_priority DONE (anv, hasvk, radv, tu) VK_EXT_global_priority_query DONE (anv, hasvk, radv, tu) VK_EXT_graphics_pipeline_library DONE (anv, lvp, radv, tu, vn) - VK_EXT_headless_surface DONE (anv, dzn, hasvk, nvk, panvk, radv, tu, v3dv) + VK_EXT_headless_surface DONE (anv, dzn, hasvk, nvk, panvk, radv, tu, v3dv, vn) VK_EXT_image_2d_view_of_3d DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_image_compression_control DONE (radv) VK_EXT_image_drm_format_modifier DONE (anv, hasvk, radv/gfx9+, tu, v3dv, vn) diff --git a/src/virtio/vulkan/vn_instance.c b/src/virtio/vulkan/vn_instance.c index 06627aeeb5b..8fd6d71ef35 100644 --- a/src/virtio/vulkan/vn_instance.c +++ b/src/virtio/vulkan/vn_instance.c @@ -49,6 +49,9 @@ static const struct vk_instance_extension_table #endif #ifdef VK_USE_PLATFORM_XLIB_KHR .KHR_xlib_surface = true, +#endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, #endif }; From 52bd7682d283e51dce79e9b97e43724b358b1c2c Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Fri, 2 Feb 2024 22:35:12 +0000 Subject: [PATCH 15/25] lvp: enable VK_EXT_headless_surface on all platforms except Windows Part-of: --- docs/features.txt | 2 +- src/gallium/frontends/lavapipe/lvp_device.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 9156075a703..0e71665e4e2 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -578,7 +578,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_global_priority DONE (anv, hasvk, radv, tu) VK_EXT_global_priority_query DONE (anv, hasvk, radv, tu) VK_EXT_graphics_pipeline_library DONE (anv, lvp, radv, tu, vn) - VK_EXT_headless_surface DONE (anv, dzn, hasvk, nvk, panvk, radv, tu, v3dv, vn) + VK_EXT_headless_surface DONE (anv, dzn, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn) VK_EXT_image_2d_view_of_3d DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_image_compression_control DONE (radv) VK_EXT_image_drm_format_modifier DONE (anv, hasvk, radv/gfx9+, tu, v3dv, vn) diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index f982d6dc64f..477654f8bcc 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -88,6 +88,9 @@ static const struct vk_instance_extension_table lvp_instance_extensions_supporte #ifdef VK_USE_PLATFORM_XLIB_KHR .KHR_xlib_surface = true, #endif +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = true, +#endif }; static const struct vk_device_extension_table lvp_device_extensions_supported = { From 98460131bdae64d1fb88820919c534c0a8bf9b86 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Tue, 6 Feb 2024 13:31:23 +0000 Subject: [PATCH 16/25] pvr: enable VK_EXT_headless_surface on all platforms except Windows Part-of: --- docs/features.txt | 2 +- src/imagination/vulkan/pvr_device.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 0e71665e4e2..ba75a34d38a 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -578,7 +578,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_global_priority DONE (anv, hasvk, radv, tu) VK_EXT_global_priority_query DONE (anv, hasvk, radv, tu) VK_EXT_graphics_pipeline_library DONE (anv, lvp, radv, tu, vn) - VK_EXT_headless_surface DONE (anv, dzn, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn) + VK_EXT_headless_surface DONE (anv, dzn, hasvk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) VK_EXT_image_2d_view_of_3d DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_image_compression_control DONE (radv) VK_EXT_image_drm_format_modifier DONE (anv, hasvk, radv/gfx9+, tu, v3dv, vn) diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c index 34f1fa19378..ab7ff36420c 100644 --- a/src/imagination/vulkan/pvr_device.c +++ b/src/imagination/vulkan/pvr_device.c @@ -155,6 +155,9 @@ static const struct vk_instance_extension_table pvr_instance_extensions = { .KHR_get_physical_device_properties2 = true, .KHR_get_surface_capabilities2 = PVR_USE_WSI_PLATFORM, .KHR_surface = PVR_USE_WSI_PLATFORM, +#ifndef VK_USE_PLATFORM_WIN32_KHR + .EXT_headless_surface = PVR_USE_WSI_PLATFORM, +#endif .EXT_debug_report = true, .EXT_debug_utils = true, }; From b067a61c07839928f3812fc90a14df80cb59efc3 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 11 Jan 2024 13:14:47 -0800 Subject: [PATCH 17/25] nir: Mark nir_intrinsic_load_global_block_intel as divergent This is divergent because it specifically loads sequential values into successive SIMD lanes. No shader-db or fossil-db changes on any Intel platform. Fixes: 9f44a264623 ("nir/divergence: handle load_global_block_intel") Reviewed-by: Lionel Landwerlin Part-of: (cherry picked from commit 75de4458a1350ac6f3843e4f8da7a69717c92687) --- .pick_status.json | 2 +- src/compiler/nir/nir_divergence_analysis.c | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 7cd88c93b22..aad39c6da1f 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -124,7 +124,7 @@ "description": "nir: Mark nir_intrinsic_load_global_block_intel as divergent", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "9f44a264623461c98368185b023d99446676e039", "notes": null diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 4cb456bc747..7aa49801ced 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -189,7 +189,6 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) case nir_intrinsic_load_resume_shader_address_amd: case nir_intrinsic_load_global_const_block_intel: case nir_intrinsic_load_reloc_const_intel: - case nir_intrinsic_load_global_block_intel: case nir_intrinsic_load_btd_global_arg_addr_intel: case nir_intrinsic_load_btd_local_arg_addr_intel: case nir_intrinsic_load_mesh_inline_data_intel: @@ -219,6 +218,13 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) is_divergent = false; break; + /* This is divergent because it specifically loads sequential values into + * successive SIMD lanes. + */ + case nir_intrinsic_load_global_block_intel: + is_divergent = true; + break; + case nir_intrinsic_decl_reg: is_divergent = nir_intrinsic_divergent(instr); break; From b137afc1e6d444603bdacb74fb982818c05628e7 Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Fri, 23 Feb 2024 11:44:09 +0000 Subject: [PATCH 18/25] egl/wayland: Add opaque-equivalent FourCCs Add a mapping for the opaque version of an alphaful format (e.g. ARGB8888 -> XRGB8888) to better support EGL_EXT_present_opaque. Part-of: (cherry picked from commit c74f4803911d8cbc9e9617ed79ea9480f335addd) --- .pick_status.json | 2 +- src/egl/drivers/dri2/platform_wayland.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index aad39c6da1f..64482bf2177 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -484,7 +484,7 @@ "description": "egl/wayland: Add opaque-equivalent FourCCs", "nominated": false, "nomination_type": 3, - "resolution": 4, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index 4105c2bfe4d..d243b6f1e5d 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -73,6 +73,7 @@ static const struct dri2_wl_visual { */ int alt_dri_image_format; int bpp; + int opaque_wl_drm_format; int rgba_shifts[4]; unsigned int rgba_sizes[4]; } dri2_wl_visuals[] = { @@ -83,6 +84,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ABGR16161616F, 0, 64, + WL_DRM_FORMAT_XBGR16F, {0, 16, 32, 48}, {16, 16, 16, 16}, }, @@ -93,6 +95,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XBGR16161616F, 0, 64, + WL_DRM_FORMAT_XBGR16F, {0, 16, 32, -1}, {16, 16, 16, 0}, }, @@ -103,6 +106,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XRGB2101010, __DRI_IMAGE_FORMAT_XBGR2101010, 32, + WL_DRM_FORMAT_XRGB2101010, {20, 10, 0, -1}, {10, 10, 10, 0}, }, @@ -113,6 +117,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ARGB2101010, __DRI_IMAGE_FORMAT_ABGR2101010, 32, + WL_DRM_FORMAT_XRGB2101010, {20, 10, 0, 30}, {10, 10, 10, 2}, }, @@ -123,6 +128,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XBGR2101010, __DRI_IMAGE_FORMAT_XRGB2101010, 32, + WL_DRM_FORMAT_XBGR2101010, {0, 10, 20, -1}, {10, 10, 10, 0}, }, @@ -133,6 +139,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ABGR2101010, __DRI_IMAGE_FORMAT_ARGB2101010, 32, + WL_DRM_FORMAT_XBGR2101010, {0, 10, 20, 30}, {10, 10, 10, 2}, }, @@ -143,6 +150,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XRGB8888, __DRI_IMAGE_FORMAT_NONE, 32, + WL_DRM_FORMAT_XRGB8888, {16, 8, 0, -1}, {8, 8, 8, 0}, }, @@ -153,6 +161,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ARGB8888, __DRI_IMAGE_FORMAT_NONE, 32, + WL_DRM_FORMAT_XRGB8888, {16, 8, 0, 24}, {8, 8, 8, 8}, }, @@ -163,6 +172,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ABGR8888, __DRI_IMAGE_FORMAT_NONE, 32, + WL_DRM_FORMAT_XBGR8888, {0, 8, 16, 24}, {8, 8, 8, 8}, }, @@ -173,6 +183,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XBGR8888, __DRI_IMAGE_FORMAT_NONE, 32, + WL_DRM_FORMAT_XBGR8888, {0, 8, 16, -1}, {8, 8, 8, 0}, }, @@ -183,6 +194,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_RGB565, __DRI_IMAGE_FORMAT_NONE, 16, + WL_DRM_FORMAT_RGB565, {11, 5, 0, -1}, {5, 6, 5, 0}, }, @@ -193,6 +205,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ARGB1555, __DRI_IMAGE_FORMAT_ABGR1555, 16, + WL_DRM_FORMAT_XRGB1555, {10, 5, 0, 15}, {5, 5, 5, 1}, }, @@ -203,6 +216,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XRGB1555, __DRI_IMAGE_FORMAT_XBGR1555, 16, + WL_DRM_FORMAT_XRGB1555, {10, 5, 0, -1}, {5, 5, 5, 0}, }, @@ -213,6 +227,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_ARGB4444, __DRI_IMAGE_FORMAT_XBGR4444, 16, + WL_DRM_FORMAT_XRGB4444, {8, 4, 0, 12}, {4, 4, 4, 4}, }, @@ -223,6 +238,7 @@ static const struct dri2_wl_visual { __DRI_IMAGE_FORMAT_XRGB4444, __DRI_IMAGE_FORMAT_XBGR4444, 16, + WL_DRM_FORMAT_XRGB4444, {8, 4, 0, -1}, {4, 4, 4, 0}, }, From 7e6f55f336b68dab71185253de4d1e4c22f9ef4a Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Fri, 23 Feb 2024 11:45:15 +0000 Subject: [PATCH 19/25] egl/wayland: Fix EGL_EXT_present_opaque This extension has been broken ever since the initial commit. It created an XRGB DRIImage for the driver to render to, so whilst the presentation was opaque, the buffer also completely lacked an alpha channel. Fix it by making sure we only modify the FourCC we send to the Wayland server when creating a buffer. Closes: mesa/mesa#5886 Fixes: 9aee7855d2dd ("egl: implement EGL_EXT_present_opaque on wayland") Part-of: (cherry picked from commit 9ea9a963aa142910ed3a0fcea9060d3a92ee5ab7) --- .pick_status.json | 2 +- src/egl/drivers/dri2/platform_wayland.c | 61 ++++++------------------- 2 files changed, 15 insertions(+), 48 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 64482bf2177..66ab91119f3 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -474,7 +474,7 @@ "description": "egl/wayland: Fix EGL_EXT_present_opaque", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "9aee7855d2ddf47169270d5d1e3e92ff6e5f65c2", "notes": null diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index d243b6f1e5d..00d53e9b3fe 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -246,7 +246,7 @@ static const struct dri2_wl_visual { static int dri2_wl_visual_idx_from_config(struct dri2_egl_display *dri2_dpy, - const __DRIconfig *config, bool force_opaque) + const __DRIconfig *config) { int shifts[4]; unsigned int sizes[4]; @@ -256,16 +256,13 @@ dri2_wl_visual_idx_from_config(struct dri2_egl_display *dri2_dpy, for (unsigned int i = 0; i < ARRAY_SIZE(dri2_wl_visuals); i++) { const struct dri2_wl_visual *wl_visual = &dri2_wl_visuals[i]; - int cmp_rgb_shifts = - memcmp(shifts, wl_visual->rgba_shifts, 3 * sizeof(shifts[0])); - int cmp_rgb_sizes = - memcmp(sizes, wl_visual->rgba_sizes, 3 * sizeof(sizes[0])); + int cmp_rgba_shifts = + memcmp(shifts, wl_visual->rgba_shifts, 4 * sizeof(shifts[0])); + int cmp_rgba_sizes = + memcmp(sizes, wl_visual->rgba_sizes, 4 * sizeof(sizes[0])); - if (cmp_rgb_shifts == 0 && cmp_rgb_sizes == 0 && - wl_visual->rgba_shifts[3] == (force_opaque ? -1 : shifts[3]) && - wl_visual->rgba_sizes[3] == (force_opaque ? 0 : sizes[3])) { + if (cmp_rgba_shifts == 0 && cmp_rgba_sizes == 0) return i; - } } return -1; @@ -318,7 +315,7 @@ dri2_wl_is_format_supported(void *user_data, uint32_t format) for (int i = 0; dri2_dpy->driver_configs[i]; i++) if (j == dri2_wl_visual_idx_from_config( - dri2_dpy, dri2_dpy->driver_configs[i], false)) + dri2_dpy, dri2_dpy->driver_configs[i])) return true; return false; @@ -726,43 +723,10 @@ dri2_wl_create_window_surface(_EGLDisplay *disp, _EGLConfig *conf, dri2_surf->base.Width = window->width; dri2_surf->base.Height = window->height; -#ifndef NDEBUG - /* Enforce that every visual has an opaque variant (requirement to support - * EGL_EXT_present_opaque) - */ - for (unsigned int i = 0; i < ARRAY_SIZE(dri2_wl_visuals); i++) { - const struct dri2_wl_visual *transparent_visual = &dri2_wl_visuals[i]; - if (transparent_visual->rgba_sizes[3] == 0) { - continue; - } - - bool found_opaque_equivalent = false; - for (unsigned int j = 0; j < ARRAY_SIZE(dri2_wl_visuals); j++) { - const struct dri2_wl_visual *opaque_visual = &dri2_wl_visuals[j]; - if (opaque_visual->rgba_sizes[3] != 0) { - continue; - } - - int cmp_rgb_shifts = - memcmp(transparent_visual->rgba_shifts, opaque_visual->rgba_shifts, - 3 * sizeof(opaque_visual->rgba_shifts[0])); - int cmp_rgb_sizes = - memcmp(transparent_visual->rgba_sizes, opaque_visual->rgba_sizes, - 3 * sizeof(opaque_visual->rgba_sizes[0])); - - if (cmp_rgb_shifts == 0 && cmp_rgb_sizes == 0) { - found_opaque_equivalent = true; - break; - } - } - - assert(found_opaque_equivalent); - } -#endif - - visual_idx = dri2_wl_visual_idx_from_config(dri2_dpy, config, - dri2_surf->base.PresentOpaque); + visual_idx = dri2_wl_visual_idx_from_config(dri2_dpy, config); assert(visual_idx != -1); + assert(dri2_wl_visuals[visual_idx].dri_image_format != + __DRI_IMAGE_FORMAT_NONE); if (dri2_dpy->wl_dmabuf || dri2_dpy->wl_drm) { dri2_surf->format = dri2_wl_visuals[visual_idx].wl_drm_format; @@ -1517,6 +1481,9 @@ create_wl_buffer(struct dri2_egl_display *dri2_dpy, close(fd); } + if (dri2_surf && dri2_surf->base.PresentOpaque) + fourcc = dri2_wl_visuals[visual_idx].opaque_wl_drm_format; + ret = zwp_linux_buffer_params_v1_create_immed(params, width, height, fourcc, 0); zwp_linux_buffer_params_v1_destroy(params); @@ -2100,7 +2067,7 @@ dri2_wl_add_configs_for_visuals(_EGLDisplay *disp) /* No match for config. Try if we can blitImage convert to a visual */ c = dri2_wl_visual_idx_from_config(dri2_dpy, - dri2_dpy->driver_configs[i], false); + dri2_dpy->driver_configs[i]); if (c == -1) continue; From 8a5c89a29477c6e77cd114c860b19d5b3eeb7a48 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Wed, 28 Feb 2024 13:29:30 +0000 Subject: [PATCH 20/25] [24.0 only] disable clang-format It's been disabled on main which means I'm now getting backports that do not pass this check anymore. --- .gitlab-ci/test/gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci/test/gitlab-ci.yml b/.gitlab-ci/test/gitlab-ci.yml index 87d379b26bc..629a1766e01 100644 --- a/.gitlab-ci/test/gitlab-ci.yml +++ b/.gitlab-ci/test/gitlab-ci.yml @@ -43,7 +43,7 @@ rustfmt: - rustfmt --verbose src/**/lib.rs - rustfmt --verbose src/**/main.rs -clang-format: +.clang-format: extends: - .formatting-check - .lint-clang-format-rules From 11367cc87a816ede7c4bf235da2091067a844130 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Wed, 28 Feb 2024 18:29:07 +0000 Subject: [PATCH 21/25] docs: add release notes for 24.0.2 --- docs/relnotes.rst | 2 + docs/relnotes/24.0.2.rst | 230 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 232 insertions(+) create mode 100644 docs/relnotes/24.0.2.rst diff --git a/docs/relnotes.rst b/docs/relnotes.rst index 40164bf1d42..20f4672ab90 100644 --- a/docs/relnotes.rst +++ b/docs/relnotes.rst @@ -3,6 +3,7 @@ Release Notes The release notes summarize what's new or changed in each Mesa release. +- :doc:`24.0.2 release notes ` - :doc:`24.0.1 release notes ` - :doc:`24.0.0 release notes ` - :doc:`23.3.3 release notes ` @@ -409,6 +410,7 @@ The release notes summarize what's new or changed in each Mesa release. :maxdepth: 1 :hidden: + 24.0.2 24.0.1 24.0.0 23.3.3 diff --git a/docs/relnotes/24.0.2.rst b/docs/relnotes/24.0.2.rst new file mode 100644 index 00000000000..b3d4d74ae38 --- /dev/null +++ b/docs/relnotes/24.0.2.rst @@ -0,0 +1,230 @@ +Mesa 24.0.2 Release Notes / 2024-02-28 +====================================== + +Mesa 24.0.2 is a bug fix release which fixes bugs found since the 24.0.1 release. + +Mesa 24.0.2 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is **only** available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 24.0.2 implements the Vulkan 1.3 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- KHR-Single-GL46.arrays_of_arrays_gl.AtomicUsage fails on MTL +- GTF-GL46.gtf42.GL3Tests.texture_storage.texture_storage_texture_as_framebuffer_attachment fails on MTL +- [intel][anv][build][regression] - genX_grl.h:27:10: fatal error: grl/grl_cl_kernel.h: No such file or directory +- RX 6600 VDPAU not recognizing HEVC_MAIN_10 correctly +- Running an app on another AMD GPU (offload, DRI_PRIME) produces corrupted frames on Wayland. +- VDPAU declares a texture as "immutable" without also setting its ImmutableLevels attribute. +- RX6600 hardware HEVC video decode fails for VDPAU but works for VA-API. (Can lock up GPU!) +- Rusticl panics when getting program build logs using opencl.hpp +- ue5 game issues lighting Rog Ally 7080u (z1e) +- Missing textures in RoboCop: Rogue City with mesh shaders enabled +- radv: Multiview PSO forgets to export layer in some cases. +- zink: flickering artifacts in Selaco + + +Changes +------- + +Boyuan Zhang (1): + +- radeonsi/vcn: only use multi slices reflist when available + +Chia-I Wu (1): + +- radv: fix pipeline stats mask + +Chris Rankin (2): + +- vdpau: Declare texture object as immutable using helper function. +- vdpau: Refactor query for video surface formats. + +Connor Abbott (1): + +- tu: Follow pipeline compatibility rules for dynamic descriptors + +Daniel Schürmann (1): + +- spirv: Fix SpvOpExpectKHR + +Daniel Stone (2): + +- egl/wayland: Add opaque-equivalent FourCCs +- egl/wayland: Fix EGL_EXT_present_opaque + +Dave Airlie (2): + +- nouveau/winsys: fix bda heap leak. +- nvk: fix dri options leak. + +David Rosca (1): + +- frontends/va: Only set VP9 segmentation fields when segmentation is enabled + +Eric Engestrom (10): + +- docs: add sha256sum for 24.0.1 +- [24.0-only change] ci: increase the kernel+rootfs builds timeout to 2h +- .pick_status.json: Update to c6e855b64b9015235462959b2b7f3e9fc34b2f1f +- .pick_status.json: Update to dce20690542c84ac00509a6db7902dcfc90b25bb +- .pick_status.json: Update to c12300844d3f084ca011a3f54f0cbaa9807418f0 +- .pick_status.json: Mark 3b927567ac927316eb11901f50ee1573ead44fd2 as denominated +- .pick_status.json: Update to 423add61e2d5b6ab6b5505d1feec01b93609f8fc +- .pick_status.json: Update to 4071c399a27932ea9253eb8a65d5725504bac6f3 +- .pick_status.json: Update to 82ff9204abab5267f82a9ce73f9dca1541ef5ee6 +- [24.0 only] disable clang-format + +Erik Faye-Lund (1): + +- mesa/main: allow GL_BGRA for FBOs + +Faith Ekstrand (1): + +- nvk: Invalidate the texture cache before MSAA resolves + +Hans-Kristian Arntzen (1): + +- radv: export multiview in VS/TES/GS for depth-only rendering + +Iago Toral Quiroga (1): + +- v3d,v3dv: fix BO allocation for shared vars + +Ian Romanick (1): + +- nir: Mark nir_intrinsic_load_global_block_intel as divergent + +Jesse Natalie (1): + +- dzn: Don't set view instancing mask until after the PSO + +Jordan Justen (1): + +- intel/dev: Add 2 additional ADL-N PCI ids + +Juston Li (1): + +- venus: fix image reqs cache store locking + +Karol Herbst (3): + +- zink: lower unaligned memory accesses +- rusticl/program: fix CL_PROGRAM_BINARIES for devs with no builds +- meson: do not pull in clc for clover + +Konstantin Seurer (5): + +- zink: Always set mfence->submit_count to the fence submit_count +- Revert "zink: always force flushes when originating from api frontend" +- llvmpipe: Use full subgroups when possible +- gallivm: Consider the initial mask when terminating loops +- ci: Update llvmpipe trace checksums + +Lionel Landwerlin (8): + +- vulkan/runtime: add helper to query attachment layout +- anv: fixup push descriptor shader analysis +- anv: reenable ANV_ALWAYS_BINDLESS +- anv: fix Wa_16013994831 macros +- anv: disable Wa_16013994831 +- intel/nir: only consider ray query variables in lowering +- anv: limit depth flush on dynamic render pass suspend +- anv: add missing generated file dep + +Martin Roukala (né Peres) (1): + +- radv/ci: switch vkcts-polaris10 from mupuf to KWS' farm + +Michel Dänzer (1): + +- egl/wayland: Flush after blitting to linear copy + +Mike Blumenkrantz (25): + +- zink: prune dmabuf export tracking when adding resource binds +- zink: fix sparse bo placement +- zink: zero allocate resident_defs array in ntv +- zink: move sparse lowering up in file +- zink: run sparse lowering after all optimization passes +- zink: adjust swizzled deref loads by the variable component offset +- zink: clamp zink_gfx_lib_cache::stages_present for generated tcs +- zink: promote gpl libs freeing during shader destroy out of prog loop +- zink: don't add VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT for sparse textures +- zink: delete maxDescriptorBufferBindings checks +- zink: avoid infinite recursion on (very) small BAR systems in bo alloc +- zink: add checks/compat for low-spec descriptor buffer implementations +- zink: add a second fence disambiguation case +- zink: force host-visible allocations for MAP_COHERENT resources +- zink: handle stencil_fallback in zink_clear_depth_stencil +- zink: don't destroy the current batch state on context destroy +- mesa: check driver format support for certain GetInternalformativ queries +- vk/wsi/x11/sw: use swapchain depth for putimage +- zink: only scan active batch states for free states if > 1 exist +- zink: fix longstanding issue with active batch state recycling +- zink: assert that batch_id is valid in zink_screen_check_last_finished() +- zink: clamp in_rp clears to fb size +- zink: fix (dynamic rendering) execution of scissored clears during flush +- zink: lock buffer age when chundering swapchain for readback +- zink: flag acquired swapchain image as readback target on acquire, not present + +Patrick Lerda (3): + +- r300: fix vertex_buffer related refcnt imbalance +- r300: fix r300_destroy_context() related memory leaks +- r300: fix memory leaks when register allocation fails + +Pavel Ondračka (1): + +- r300: add explicit flrp lowering + +Rhys Perry (2): + +- aco/ra: don't initialize assigned in initializer list +- aco/ra: fix GFX9- writelane + +Sagar Ghuge (1): + +- nir: Allow nir_texop_tg4 in implicit derivative + +Samuel Pitoiset (4): + +- radv: fix RGP barrier reason for RP barriers inserted by the runtime +- radv: enable GS_FAST_LAUNCH=2 by default for RDNA3 APUs (Phoenix) +- spirv: only consider IO variables when adjusting patch locations for TES +- radv: fix indirect dispatches on compute queue with conditional rendering on GFX7 + +Tapani Pälli (2): + +- intel/blorp: disable use of REP16 independent of format +- iris: make sure DS and TE are sent in pairs on >= gfx125 + +Yiwei Zhang (2): + +- venus: force async pipeline create on threads creating descriptor pools +- venus: fix the cmd stride used for qfb recording + +thfrwn (1): + +- mesa: fix off-by-one for newblock allocation in dlist_alloc From a3df5eab6c38682fd6ece3f9420d696eab9479f5 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Wed, 28 Feb 2024 18:29:18 +0000 Subject: [PATCH 22/25] VERSION: bump for 24.0.2 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 1b3e74f84e7..a19540167eb 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.0.1 +24.0.2 From a66c48e2e956d73fc715559ef7c808708296bdf3 Mon Sep 17 00:00:00 2001 From: Ruijing Dong Date: Thu, 22 Feb 2024 14:42:29 -0500 Subject: [PATCH 23/25] radeonsi/vcn: data structure av1 enc long term reference. Here it borrowed the term "long term reference" to represent the customized reference frame rather than the default ones used. To enable that, it needs application to leverage VAAPI existing interface to mark a frame as "long term reference", and then it will be preserved in the DPB for later usage. This preserved frame later could be refered to by having its signature used in the ref_frame_idx[] list, and the index can be indicated by RefFrameCtrl index2, which has not been used for other purpose. Reviewed-by: Leo Liu Signed-off-by: Ruijing Dong Part-of: --- src/amd/common/ac_vcn_enc.h | 5 +++++ src/gallium/include/pipe/p_video_state.h | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/src/amd/common/ac_vcn_enc.h b/src/amd/common/ac_vcn_enc.h index 73f2154387e..589f690f5d7 100644 --- a/src/amd/common/ac_vcn_enc.h +++ b/src/amd/common/ac_vcn_enc.h @@ -200,6 +200,8 @@ #define RENCODE_COLOR_SPACE_YUV 0 #define RENCODE_COLOR_SPACE_RGB 1 +#define RENCODE_VCN4_AV1_MAX_NUM_LTR 2 + typedef struct rvcn_enc_session_info_s { uint32_t interface_version; uint32_t sw_context_address_hi; @@ -611,10 +613,13 @@ typedef struct rvcn_enc_av1_color_description_s typedef struct rvcn_enc_av1_ref_frame_s { bool in_use; + bool is_ltr; uint32_t frame_id; uint32_t temporal_id; uint32_t slot_id; uint32_t frame_type; + uint32_t ltr_seq; + void *frame_signature; } rvcn_enc_av1_ref_frame_t; typedef struct rvcn_enc_av1_recon_slot_s diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 1afa57dadf4..a6592dd2722 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -996,6 +996,7 @@ struct pipe_av1_enc_picture_desc uint32_t use_superres:1; uint32_t reduced_tx_set:1; uint32_t skip_mode_present:1; + uint32_t long_term_reference:1; }; struct pipe_enc_quality_modes quality_modes; struct pipe_enc_intra_refresh intra_refresh; @@ -1028,6 +1029,9 @@ struct pipe_av1_enc_picture_desc uint32_t primary_ref_frame; uint8_t refresh_frame_flags; uint8_t ref_frame_idx[7]; + uint32_t ref_frame_ctrl_l0; /* forward prediction only */ + void *ref_list[8]; /* for tracking ref frames */ + void *recon_frame; struct { uint8_t cdef_damping_minus_3; From 8090bee45eee4f30735a6654ca672b13c71efb0c Mon Sep 17 00:00:00 2001 From: Ruijing Dong Date: Thu, 22 Feb 2024 17:16:02 -0500 Subject: [PATCH 24/25] radeonsi/vcn: vcn4 av1 long term ref support Add vcn4 av1 long term reference support. So that frames can be controlled from application side to refer to the identified reference, which usually could provide better coding efficiency in the case of scene chagne back and forth, just it needs to identify and mark these frames before using them. We assume 2 long term reference frames should be good in a key frame period, and these long term references can be overwritten by marking new ones. Reviewed-by: Leo Liu Signed-off-by: Ruijing Dong Part-of: --- src/gallium/drivers/radeonsi/radeon_vcn_enc.c | 13 +- src/gallium/drivers/radeonsi/radeon_vcn_enc.h | 8 + .../drivers/radeonsi/radeon_vcn_enc_4_0.c | 150 +++++++++++++++--- 3 files changed, 152 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c index 9d0aa41e342..86ab2457c93 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c @@ -769,7 +769,17 @@ static void radeon_vcn_enc_av1_get_param(struct radeon_encoder *enc, pic->seq.num_temporal_layers : RENCODE_MAX_NUM_TEMPORAL_LAYERS; /* 1, 2 layer needs 1 reference, and 3, 4 layer needs 2 references */ - enc->base.max_references = (enc_pic->num_temporal_layers + 1) / 2; + enc->base.max_references = (enc_pic->num_temporal_layers + 1) / 2 + + RENCODE_VCN4_AV1_MAX_NUM_LTR; + for (int i = 0; i < RENCDOE_AV1_REFS_PER_FRAME; i++) + enc_pic->av1_ref_frame_idx[i] = pic->ref_frame_idx[i]; + + for (int i = 0; i < RENCDOE_AV1_NUM_REF_FRAMES; i++) + enc_pic->av1_ref_list[i] = pic->ref_list[i]; + + enc_pic->av1_recon_frame = pic->recon_frame; + enc_pic->av1_ref_frame_ctrl_l0 = pic->ref_frame_ctrl_l0; + radeon_vcn_enc_quality_modes(enc, &pic->quality_modes); enc_pic->frame_id_numbers_present = pic->seq.seq_bits.frame_id_number_present_flag; enc_pic->enable_error_resilient_mode = pic->error_resilient_mode; @@ -785,6 +795,7 @@ static void radeon_vcn_enc_av1_get_param(struct radeon_encoder *enc, enc_pic->disable_screen_content_tools = !pic->allow_screen_content_tools; enc_pic->is_obu_frame = pic->enable_frame_obu; enc_pic->need_av1_seq = (pic->frame_type == PIPE_AV1_ENC_FRAME_TYPE_KEY); + enc_pic->av1_mark_long_term_reference = pic->long_term_reference; radeon_vcn_enc_av1_get_spec_misc_param(enc, pic); radeon_vcn_enc_av1_timing_info(enc, pic); diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.h b/src/gallium/drivers/radeonsi/radeon_vcn_enc.h index 114315ac948..b3b18080207 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.h +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.h @@ -124,6 +124,7 @@ struct radeon_enc_pic { uint32_t is_obu_frame:1; uint32_t stream_obu_frame:1; /* all frames have the same number of tiles */ uint32_t need_av1_seq:1; + uint32_t av1_mark_long_term_reference:1; }; uint32_t render_width; uint32_t render_height; @@ -131,6 +132,7 @@ struct radeon_enc_pic { enum pipe_av1_enc_frame_type last_frame_type; uint32_t display_frame_id; uint32_t frame_id; + uint32_t temporal_seq_num; uint32_t order_hint; uint32_t order_hint_bits; uint32_t refresh_frame_flags; @@ -146,6 +148,12 @@ struct radeon_enc_pic { uint32_t count_last_layer; rvcn_enc_av1_ref_frame_t frames[RENCDOE_AV1_NUM_REF_FRAMES]; rvcn_enc_av1_recon_slot_t recon_slots[RENCDOE_AV1_NUM_REF_FRAMES + 1]; + uint8_t av1_ref_frame_idx[RENCDOE_AV1_REFS_PER_FRAME]; + void *av1_ref_list[RENCDOE_AV1_NUM_REF_FRAMES]; + void *av1_recon_frame; + uint32_t av1_ref_frame_ctrl_l0; + uint32_t av1_ref_frame_ctrl_l1; + uint32_t av1_ltr_seq; }; rvcn_enc_session_info_t session_info; diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c index 5255922a555..05a3aa2f055 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c @@ -191,10 +191,14 @@ static void redeon_enc_av1_release_recon_slot(struct radeon_encoder *enc, static uint32_t radeon_enc_av1_alloc_curr_frame(struct radeon_encoder *enc, uint32_t frame_id, uint32_t temporal_id, + uint32_t mark_long_term, /* mark it as long term reference */ + void *frame_signature, enum pipe_av1_enc_frame_type frame_type) { uint32_t i = 0; + assert(frame_signature); + for (i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) { rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i]; if (!frame->in_use) { @@ -203,6 +207,10 @@ static uint32_t radeon_enc_av1_alloc_curr_frame(struct radeon_encoder *enc, frame->temporal_id = temporal_id; frame->slot_id = radeon_enc_av1_alloc_recon_slot(enc); frame->frame_type = frame_type; + frame->frame_signature = frame_signature; + frame->is_ltr = !!(mark_long_term); + if (frame->is_ltr) + frame->ltr_seq = enc->enc_pic.av1_ltr_seq++; break; } } @@ -251,6 +259,33 @@ static void radeon_enc_av1_pre_scan_frames(struct radeon_encoder *enc, { uint32_t i = 0; + /* checking long term frames if it reached the limit, it needs to + * release the oldest. */ + if (enc->enc_pic.av1_mark_long_term_reference) { + int cnt = 0; + uint32_t min_seq = (uint32_t)-1; + uint32_t min_seq_idx = 0; + for (i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) { + rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i]; + if (frame->in_use && frame->is_ltr) { + if (frame->ltr_seq < min_seq) { + min_seq = frame->ltr_seq; + min_seq_idx = i; + } + cnt++; + + /* this means some LTR ref buffer has been re-used. */ + if (enc->enc_pic.av1_recon_frame == frame->frame_signature) + RVID_ERR("recon duplicated! it could refer to a wrong frame!\n"); + } + } + /* release the frame with minimum ltr seq number (oldest), + * this check is happening on each frame, the total number + * of LTR is limited by RENCODE_VCN4_AV1_MAX_NUM_LTR.*/ + if (cnt > RENCODE_VCN4_AV1_MAX_NUM_LTR) + radeon_enc_av1_release_ref_frame(enc, min_seq_idx, false); + } + for (i = 0; i < ARRAY_SIZE(enc->enc_pic.recon_slots); i++) { rvcn_enc_av1_recon_slot_t *slot = &enc->enc_pic.recon_slots[i]; if (slot->in_use && slot->is_orphaned) { @@ -264,39 +299,102 @@ static void radeon_enc_av1_pre_scan_frames(struct radeon_encoder *enc, if (frame->in_use) { if (temporal_id < frame->temporal_id) radeon_enc_av1_release_ref_frame(enc, i, false); - else if (temporal_id == frame->temporal_id) + else if (temporal_id == frame->temporal_id && (!frame->is_ltr)) radeon_enc_av1_release_ref_frame(enc, i, true); } } } +static bool radeon_enc_av1_search_requested_reference( + struct radeon_encoder *enc, + uint32_t *idx) +{ + bool find = false; + /* Here is the assumption, the 3rd item of ref_frame_ctrl_l0 + indicates which slot it needs to find in ref_frame_idx[], and + from ref_frame_idx to find the requested reference frame + in ref_list[] */ + #define RENCODE_AV1_REF_CTRL_L0_THIRD_ITEM (0x1c0) /* 111 000 000 */ + uint32_t marked_ref_frame_idx = (RENCODE_AV1_REF_CTRL_L0_THIRD_ITEM & + enc->enc_pic.av1_ref_frame_ctrl_l0) >> 6; + /* valid marked_ref_frame_idx > 0 */ + if (marked_ref_frame_idx) { + uint32_t requested_frame_idx = + enc->enc_pic.av1_ref_frame_idx[marked_ref_frame_idx - 1]; + void *request_signature = NULL; + + if (requested_frame_idx >= RENCDOE_AV1_NUM_REF_FRAMES) + goto end; + + request_signature = enc->enc_pic.av1_ref_list[requested_frame_idx]; + for (uint32_t i = 0; i < ARRAY_SIZE((enc->enc_pic.frames)); i++) { + rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i]; + if (frame->in_use && + frame->is_ltr && + (request_signature == frame->frame_signature)) { + find = true; + /* increase the frame seq number after found, when it + * reaches the maximum limit, this found one will not + * be released. */ + frame->ltr_seq = enc->enc_pic.av1_ltr_seq++; + *idx = i; + break; + } + } + } +end: + return find; +} + static uint32_t radeon_enc_av1_obtain_ref0_frame(struct radeon_encoder *enc, uint32_t temporal_id) { + /* when only ltr frames in DPB, it needs to use the biggest ltr_seq + * one (latest) for reference, instead of the first one met. */ uint32_t i = 0; + uint32_t ret_idx = 0; + uint32_t max_seq = 0; + uint32_t max_seq_idx = 0; + for (i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) { + rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i]; + if (frame->in_use && frame->is_ltr && (frame->ltr_seq >= max_seq)) { + max_seq = frame->ltr_seq; + max_seq_idx = i; + } + } for (i = ARRAY_SIZE(enc->enc_pic.frames); i > 0; i--) { rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i - 1]; - if (frame->in_use && frame->temporal_id <= temporal_id) + if (frame->in_use && frame->temporal_id <= temporal_id) { + if (frame->is_ltr) + ret_idx = max_seq_idx; + else + ret_idx = i - 1; + break; + } } - /* not find, ref = 0, or ref = i - 1 */ - return i == 0 ? i : i - 1; + return ret_idx; } static void radeon_enc_reset_av1_dpb_frames(struct radeon_encoder *enc) { - for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) { - enc->enc_pic.frames[i].in_use = false; - enc->enc_pic.frames[i].frame_id = 0; - enc->enc_pic.frames[i].temporal_id = 0; - enc->enc_pic.frames[i].slot_id = 0; - enc->enc_pic.frames[i].frame_type = 0; - } + for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) + enc->enc_pic.frames[i] = (rvcn_enc_av1_ref_frame_t) { + .in_use = false, + .is_ltr = false, + .ltr_seq = 0, + .frame_id = 0, + .temporal_id = 0, + .slot_id = 0, + .frame_type = 0, + .frame_signature = NULL, + }; - for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.recon_slots); i++) { - enc->enc_pic.recon_slots[i].in_use = false; - enc->enc_pic.recon_slots[i].is_orphaned = false; - } + for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.recon_slots); i++) + enc->enc_pic.recon_slots[i] = (rvcn_enc_av1_recon_slot_t) { + .in_use = false, + .is_orphaned = false, + }; } static void radeon_enc_av1_dpb_management(struct radeon_encoder *enc) @@ -304,21 +402,35 @@ static void radeon_enc_av1_dpb_management(struct radeon_encoder *enc) struct radeon_enc_pic *pic = &enc->enc_pic; uint32_t current_slot; uint32_t ref_slot; + uint32_t request_idx; + bool find = false; if (pic->frame_type == PIPE_AV1_ENC_FRAME_TYPE_KEY) { pic->frame_id = 0; + pic->temporal_seq_num = 0; pic->temporal_id = 0; pic->reference_delta_frame_id = 0; pic->reference_frame_index = 0; pic->last_frame_type = PIPE_AV1_ENC_FRAME_TYPE_KEY; + pic->av1_ltr_seq = 0; current_slot = 0; ref_slot = 0; + request_idx = 0; radeon_enc_reset_av1_dpb_frames(enc); } else { - pic->temporal_id = radeon_enc_av1_calculate_temporal_id(pic->frame_id, + find = radeon_enc_av1_search_requested_reference(enc, &request_idx); + if (pic->av1_mark_long_term_reference || find) + pic->temporal_seq_num = 0; /*for ltr, always temporal_id = 0 */ + else + pic->temporal_seq_num++; + + pic->temporal_id = radeon_enc_av1_calculate_temporal_id(pic->temporal_seq_num, pic->num_temporal_layers - 1); - pic->reference_frame_index = - radeon_enc_av1_obtain_ref0_frame(enc, pic->temporal_id); + if (find) + pic->reference_frame_index = request_idx; + else + pic->reference_frame_index = + radeon_enc_av1_obtain_ref0_frame(enc, pic->temporal_id); ref_slot = pic->frames[pic->reference_frame_index].slot_id; pic->last_frame_type = pic->frames[pic->reference_frame_index].frame_type; radeon_enc_av1_pre_scan_frames(enc, pic->temporal_id); @@ -336,6 +448,8 @@ static void radeon_enc_av1_dpb_management(struct radeon_encoder *enc) pic->frames[pic->reference_frame_index].frame_id; current_slot = radeon_enc_av1_alloc_curr_frame(enc, pic->frame_id, pic->temporal_id, + pic->av1_mark_long_term_reference, + pic->av1_recon_frame, pic->frame_type); if (pic->frame_type == PIPE_AV1_ENC_FRAME_TYPE_KEY || pic->frame_type == PIPE_AV1_ENC_FRAME_TYPE_SWITCH || From 2b7b8e0448a4b490ea9e0befc53e524520993859 Mon Sep 17 00:00:00 2001 From: Ruijing Dong Date: Thu, 22 Feb 2024 15:21:30 -0500 Subject: [PATCH 25/25] frontends/va: get av1 encoding ref frame infos for L0. Reference frame list is formed by each of the provided recon_frame, while the assumption here is to use the API provided by VAAPI interface, when a frame is marked as "long term reference" by av1->picture_flags.bits.long_term_reference Its recon_frame will be kept in DPB marked by its recon_frame as signature. When a future input requests refering to it, it can go this way: 1. set av1->ref_frame_ctrl_l0.field.search_idx2 to indicate which ref_frame_idx slot will be used. x = av1->ref_frame_ctrl_l0.field.search_idx2; 2. n = av1->ref_frame_idx[x-1]; av1->reference_frames[n] as the signature to compare with. if av1->reference_frames[n] is pointing to the same video buffer (signature) as the one marked as "long term reference". Then the new input is refering to it only. 3. in SVC case, long terms are used for temproal_id 0 only, because using long term means potentially scene change could happen. 4. the "long term reference" recon_frame should be kept, instead of being reused until it is no longer needed to avoid signature duplication. Reviewed-by: Leo Liu Signed-off-by: Ruijing Dong Part-of: --- src/gallium/frontends/va/picture_av1_enc.c | 32 ++++++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/gallium/frontends/va/picture_av1_enc.c b/src/gallium/frontends/va/picture_av1_enc.c index e34190f6266..3eb119738aa 100644 --- a/src/gallium/frontends/va/picture_av1_enc.c +++ b/src/gallium/frontends/va/picture_av1_enc.c @@ -133,6 +133,7 @@ VAStatus vlVaHandleVAEncSequenceParameterBufferTypeAV1(vlVaDriver *drv, vlVaCont VAStatus vlVaHandleVAEncPictureParameterBufferTypeAV1(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf) { VAEncPictureParameterBufferAV1 *av1 = buf->data; + struct pipe_video_buffer *video_buf = NULL; vlVaBuffer *coded_buf; int i; @@ -142,6 +143,7 @@ VAStatus vlVaHandleVAEncPictureParameterBufferTypeAV1(vlVaDriver *drv, vlVaConte context->desc.av1enc.enable_frame_obu = av1->picture_flags.bits.enable_frame_obu; context->desc.av1enc.allow_high_precision_mv = av1->picture_flags.bits.allow_high_precision_mv; context->desc.av1enc.palette_mode_enable = av1->picture_flags.bits.palette_mode_enable; + context->desc.av1enc.long_term_reference = av1->picture_flags.bits.long_term_reference; context->desc.av1enc.num_tiles_in_pic = av1->tile_cols * av1->tile_rows; context->desc.av1enc.tile_rows = av1->tile_rows; context->desc.av1enc.tile_cols = av1->tile_cols; @@ -157,14 +159,14 @@ VAStatus vlVaHandleVAEncPictureParameterBufferTypeAV1(vlVaDriver *drv, vlVaConte /* The last tile column or row size needs to be derived. */ for (uint8_t i = 0 ; i < ARRAY_SIZE(av1->width_in_sbs_minus_1); i++) context->desc.av1enc.width_in_sbs_minus_1[i] = av1->width_in_sbs_minus_1[i]; - + /* The last tile column or row size needs to be derived. */ for (uint8_t i = 0 ; i < ARRAY_SIZE(av1->height_in_sbs_minus_1); i++) context->desc.av1enc.height_in_sbs_minus_1[i] = av1->height_in_sbs_minus_1[i]; context->desc.av1enc.cdef.cdef_damping_minus_3 = av1->cdef_damping_minus_3; context->desc.av1enc.cdef.cdef_bits = av1->cdef_bits; - + for (uint8_t i = 0 ; i < ARRAY_SIZE(av1->cdef_y_strengths); i++) context->desc.av1enc.cdef.cdef_y_strengths[i] = av1->cdef_y_strengths[i]; @@ -226,7 +228,7 @@ VAStatus vlVaHandleVAEncPictureParameterBufferTypeAV1(vlVaDriver *drv, vlVaConte context->desc.av1enc.rc[i].min_qp = av1->min_base_qindex ? av1->min_base_qindex : 1; context->desc.av1enc.rc[i].max_qp = av1->max_base_qindex ? av1->max_base_qindex : 255; /* Distinguishes from the default params set for these values and app specific params passed down */ - context->desc.av1enc.rc[i].app_requested_qp_range = + context->desc.av1enc.rc[i].app_requested_qp_range = ((context->desc.av1enc.rc[i].max_qp != AV1_MAX_QP_DEFAULT) || (context->desc.av1enc.rc[i].min_qp != AV1_MIN_QP_DEFAULT)); } @@ -250,7 +252,25 @@ VAStatus vlVaHandleVAEncPictureParameterBufferTypeAV1(vlVaDriver *drv, vlVaConte if (context->desc.av1enc.frame_type == FRAME_TYPE_KEY_FRAME) context->desc.av1enc.last_key_frame_num = context->desc.av1enc.frame_num; - for (uint8_t i = 0 ; i < ARRAY_SIZE(av1->ref_frame_idx); i++) + if (av1->reconstructed_frame != VA_INVALID_ID) { + vlVaGetReferenceFrame(drv, av1->reconstructed_frame, &video_buf); + context->desc.av1enc.recon_frame = video_buf; + } + else + context->desc.av1enc.recon_frame = NULL; + + for (int i = 0 ; i < ARRAY_SIZE(context->desc.av1enc.ref_list); i++) { + if (av1->reference_frames[i] != VA_INVALID_ID) { + vlVaGetReferenceFrame(drv, av1->reference_frames[i], &video_buf); + context->desc.av1enc.ref_list[i] = video_buf; + } + else + context->desc.av1enc.ref_list[i] = NULL; + } + + context->desc.av1enc.ref_frame_ctrl_l0 = av1->ref_frame_ctrl_l0.value; + + for (int i = 0 ; i < ARRAY_SIZE(av1->ref_frame_idx); i++) context->desc.av1enc.ref_frame_idx[i] = av1->ref_frame_idx[i]; /* Initialize slice descriptors for this picture */ @@ -817,7 +837,7 @@ void getEncParamPresetAV1(vlVaContext *context) VAStatus vlVaHandleVAEncSliceParameterBufferTypeAV1(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf) { VAEncTileGroupBufferAV1 *tile_buf = (VAEncTileGroupBufferAV1*) buf->data; - + if (context->desc.av1enc.num_tile_groups < ARRAY_SIZE(context->desc.av1enc.tile_groups)) { context->desc.av1enc.tile_groups[context->desc.av1enc.num_tile_groups].tile_group_start = tile_buf->tg_start; context->desc.av1enc.tile_groups[context->desc.av1enc.num_tile_groups].tile_group_end = tile_buf->tg_end; @@ -825,7 +845,7 @@ VAStatus vlVaHandleVAEncSliceParameterBufferTypeAV1(vlVaDriver *drv, vlVaContext } else { return VA_STATUS_ERROR_NOT_ENOUGH_BUFFER; } - + return VA_STATUS_SUCCESS; } #endif /* VA_CHECK_VERSION(1, 16, 0) */