From c55ac7717434b9d93f858463d15404944425bac2 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Tue, 16 Jan 2024 00:42:45 +0000 Subject: [PATCH] radv: Remove check_status Following discussion on kernel mailing list[1], we are not gaining anything from this right now, and it does not handle soft recovery. We will hear about the context loss and rationale when we vkQueueSubmit next. We can come back to this if there is ever a Vulkan extension for figuring out innocent vs guilty like GL_EXT_robustness. This does mean however that we return VK_SUCCESS for cancelled semaphore and fence waits, but this is legal per the Vulkan spec: "Commands that wait indefinitely for device execution (namely vkDeviceWaitIdle, vkQueueWaitIdle, vkWaitForFences with a maximum timeout, and vkGetQueryPoolResults with the VK_QUERY_RESULT_WAIT_BIT bit set in flags) must return in finite time even in the case of a lost device, and return either VK_SUCCESS or VK_ERROR_DEVICE_LOST." "If device loss occurs (see Lost Device) before the timeout has expired, vkWaitSemaphores must return in finite time with either VK_SUCCESS or VK_ERROR_DEVICE_LOST." [1]: https://lists.freedesktop.org/archives/amd-gfx/2024-January/103337.html Signed-off-by: Joshua Ashton Reviewed-by: Samuel Pitoiset Reviewed-by: Friedrich Vock --- src/amd/vulkan/radv_device.c | 27 --------------------------- src/amd/vulkan/radv_queue.c | 2 ++ 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 121e993e1f56..bc8ce135c33b 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -598,32 +598,6 @@ init_dispatch_tables(struct radv_device *device, struct radv_physical_device *ph add_entrypoints(&b, &vk_common_device_entrypoints, RADV_DISPATCH_TABLE_COUNT); } -static VkResult -radv_check_status(struct vk_device *vk_device) -{ - struct radv_device *device = container_of(vk_device, struct radv_device, vk); - enum radv_reset_status status; - bool context_reset = false; - - /* If an INNOCENT_CONTEXT_RESET is found in one of the contexts, we need to - * keep querying in case there's a guilty one, so we can correctly log if the - * hung happened in this app or not */ - for (int i = 0; i < RADV_NUM_HW_CTX; i++) { - if (device->hw_ctx[i]) { - status = device->ws->ctx_query_reset_status(device->hw_ctx[i]); - - if (status == RADV_GUILTY_CONTEXT_RESET) - return vk_device_set_lost(&device->vk, "GPU hung detected in this process"); - else if (status == RADV_INNOCENT_CONTEXT_RESET) - context_reset = true; - } - } - - if (context_reset) - return vk_device_set_lost(&device->vk, "GPU hung triggered by other process"); - return VK_SUCCESS; -} - static VkResult capture_trace(VkQueue _queue) { @@ -816,7 +790,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr device->vk.capture_trace = capture_trace; device->vk.command_buffer_ops = &radv_cmd_buffer_ops; - device->vk.check_status = radv_check_status; device->instance = physical_device->instance; device->physical_device = physical_device; diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index ae80616c520a..4c228ed64185 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -31,6 +31,8 @@ #include "vk_semaphore.h" #include "vk_sync.h" +#include "ac_debug.h" + enum radeon_ctx_priority radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoKHR *pObj) {