diff --git a/docs/envvars.rst b/docs/envvars.rst index 084e37fa1fd..d466755d0f0 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -995,7 +995,7 @@ Clover environment variables allows specifying additional linker options. Specified options are appended after the options set by the OpenCL program in ``clLinkProgram``. - + .. _rusticl-env-var: .. envvar:: IRIS_ENABLE_CLOVER @@ -1339,6 +1339,8 @@ RADV driver environment variables enable optimizations to move more driver internal objects to VRAM. ``rtwave64`` enable wave64 for ray tracing shaders (GFX10+) + ``transfer_queue`` + enable experimental transfer queue support (GFX9+, not yet spec compliant) ``video_decode`` enable experimental video decoding support ``gsfastlaunch2`` diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index d28c2de8043..73ccead2a67 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -89,6 +89,7 @@ enum { RADV_PERFTEST_VIDEO_DECODE = 1u << 11, RADV_PERFTEST_DMA_SHADERS = 1u << 12, RADV_PERFTEST_GS_FAST_LAUNCH_2 = 1u << 13, + RADV_PERFTEST_TRANSFER_QUEUE = 1u << 14, }; bool radv_init_trace(struct radv_device *device); diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c index 762b4b3383d..c0877f53ce1 100644 --- a/src/amd/vulkan/radv_instance.c +++ b/src/amd/vulkan/radv_instance.c @@ -100,6 +100,7 @@ static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_P {"video_decode", RADV_PERFTEST_VIDEO_DECODE}, {"dmashaders", RADV_PERFTEST_DMA_SHADERS}, {"gsfastlaunch2", RADV_PERFTEST_GS_FAST_LAUNCH_2}, + {"transfer_queue", RADV_PERFTEST_TRANSFER_QUEUE}, {NULL, 0}}; const char * diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index fe3ec275ef5..b797bc61eb6 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -71,6 +71,17 @@ radv_taskmesh_enabled(const struct radv_physical_device *pdevice) !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) && pdevice->rad_info.has_gang_submit; } +static bool +radv_transfer_queue_enabled(const struct radv_physical_device *pdevice) +{ + /* Check if the GPU has SDMA support and transfer queues are allowed. */ + if (pdevice->rad_info.sdma_ip_version == SDMA_UNKNOWN || !pdevice->rad_info.ip[AMD_IP_SDMA].num_queues || + !(pdevice->instance->perftest_flags & RADV_PERFTEST_TRANSFER_QUEUE)) + return false; + + return pdevice->rad_info.gfx_level >= GFX9; +} + static bool radv_vrs_attachment_enabled(const struct radv_physical_device *pdevice) { @@ -199,6 +210,11 @@ radv_physical_device_init_queue_table(struct radv_physical_device *pdevice) } } + if (radv_transfer_queue_enabled(pdevice)) { + pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_TRANSFER; + idx++; + } + pdevice->vk_queue_to_radv[idx++] = RADV_QUEUE_SPARSE; pdevice->num_queues = idx; @@ -2119,6 +2135,10 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd num_queue_families++; } + if (radv_transfer_queue_enabled(pdevice)) { + num_queue_families++; + } + if (pQueueFamilyProperties == NULL) { *pCount = num_queue_families; return; @@ -2171,6 +2191,18 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd } } + if (radv_transfer_queue_enabled(pdevice)) { + if (*pCount > idx) { + *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){ + .queueFlags = VK_QUEUE_TRANSFER_BIT, + .queueCount = pdevice->rad_info.ip[AMD_IP_SDMA].num_queues, + .timestampValidBits = 64, + .minImageTransferGranularity = (VkExtent3D){16, 16, 8}, + }; + idx++; + } + } + if (*pCount > idx) { *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){ .queueFlags = VK_QUEUE_SPARSE_BINDING_BIT, @@ -2201,13 +2233,12 @@ radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, ui return; } VkQueueFamilyProperties *properties[] = { - &pQueueFamilyProperties[0].queueFamilyProperties, - &pQueueFamilyProperties[1].queueFamilyProperties, - &pQueueFamilyProperties[2].queueFamilyProperties, - &pQueueFamilyProperties[3].queueFamilyProperties, + &pQueueFamilyProperties[0].queueFamilyProperties, &pQueueFamilyProperties[1].queueFamilyProperties, + &pQueueFamilyProperties[2].queueFamilyProperties, &pQueueFamilyProperties[3].queueFamilyProperties, + &pQueueFamilyProperties[4].queueFamilyProperties, }; radv_get_physical_device_queue_family_properties(pdevice, pCount, properties); - assert(*pCount <= 4); + assert(*pCount <= 5); for (uint32_t i = 0; i < *pCount; i++) { vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) {