diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index fb1c91be2141..2af078568440 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -468,7 +468,6 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx, bool full_reset_o bool *needs_reset, bool *reset_completed) { struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx; - int r; if (needs_reset) *needs_reset = false; @@ -476,81 +475,60 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx, bool full_reset_o *reset_completed = false; /* Return a failure due to a GPU hang. */ - if (ctx->ws->info.drm_minor >= 24) { - uint64_t flags; - - if (full_reset_only && ctx->sw_status == PIPE_NO_RESET) { - /* If the caller is only interested in full reset (= wants to ignore soft - * recoveries), we can use the rejected cs count as a quick first check. - */ - return PIPE_NO_RESET; - } + uint64_t flags; - r = amdgpu_cs_query_reset_state2(ctx->ctx, &flags); - if (r) { - fprintf(stderr, "amdgpu: amdgpu_cs_query_reset_state2 failed. (%i)\n", r); - return PIPE_NO_RESET; - } + if (full_reset_only && ctx->sw_status == PIPE_NO_RESET) { + /* If the caller is only interested in full reset (= wants to ignore soft + * recoveries), we can use the rejected cs count as a quick first check. + */ + return PIPE_NO_RESET; + } - if (flags & AMDGPU_CTX_QUERY2_FLAGS_RESET) { - if (reset_completed) { - /* The ARB_robustness spec says: - * - * If a reset status other than NO_ERROR is returned and subsequent - * calls return NO_ERROR, the context reset was encountered and - * completed. If a reset status is repeatedly returned, the context may - * be in the process of resetting. - * - * Starting with drm_minor >= 54 amdgpu reports if the reset is complete, - * so don't do anything special. On older kernels, submit a no-op cs. If it - * succeeds then assume the reset is complete. - */ - if (!(flags & AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS)) - *reset_completed = true; - - if (ctx->ws->info.drm_minor < 54 && ctx->ws->info.has_graphics) - *reset_completed = amdgpu_submit_gfx_nop(ctx) == 0; + /* + * ctx->sw_status is updated on alloc/ioctl failures. + * + * We only rely on amdgpu_cs_query_reset_state2 to tell us + * that the context reset is complete. + */ + if (ctx->sw_status != PIPE_NO_RESET) { + int r = amdgpu_cs_query_reset_state2(ctx->ctx, &flags); + if (!r) { + if (flags & AMDGPU_CTX_QUERY2_FLAGS_RESET) { + if (reset_completed) { + /* The ARB_robustness spec says: + * + * If a reset status other than NO_ERROR is returned and subsequent + * calls return NO_ERROR, the context reset was encountered and + * completed. If a reset status is repeatedly returned, the context may + * be in the process of resetting. + * + * Starting with drm_minor >= 54 amdgpu reports if the reset is complete, + * so don't do anything special. On older kernels, submit a no-op cs. If it + * succeeds then assume the reset is complete. + */ + if (!(flags & AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS)) + *reset_completed = true; + + if (ctx->ws->info.drm_minor < 54 && ctx->ws->info.has_graphics) + *reset_completed = amdgpu_submit_gfx_nop(ctx) == 0; + } } - - if (needs_reset) - *needs_reset = flags & AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; - if (flags & AMDGPU_CTX_QUERY2_FLAGS_GUILTY) - return PIPE_GUILTY_CONTEXT_RESET; - else - return PIPE_INNOCENT_CONTEXT_RESET; - } - } else { - uint32_t result, hangs; - - r = amdgpu_cs_query_reset_state(ctx->ctx, &result, &hangs); - if (r) { - fprintf(stderr, "amdgpu: amdgpu_cs_query_reset_state failed. (%i)\n", r); - return PIPE_NO_RESET; - } - - if (needs_reset) - *needs_reset = true; - switch (result) { - case AMDGPU_CTX_GUILTY_RESET: - return PIPE_GUILTY_CONTEXT_RESET; - case AMDGPU_CTX_INNOCENT_RESET: - return PIPE_INNOCENT_CONTEXT_RESET; - case AMDGPU_CTX_UNKNOWN_RESET: - return PIPE_UNKNOWN_CONTEXT_RESET; + } else { + fprintf(stderr, "amdgpu: amdgpu_cs_query_reset_state2 failed. (%i)\n", r); } - } - /* Return a failure due to SW issues. */ - if (ctx->sw_status != PIPE_NO_RESET) { + /* Return a failure due to SW issues. */ if (needs_reset) *needs_reset = true; return ctx->sw_status; } + if (needs_reset) *needs_reset = false; return PIPE_NO_RESET; } + /* COMMAND SUBMISSION */ static bool amdgpu_cs_has_user_fence(struct amdgpu_cs_context *cs) @@ -1784,19 +1762,19 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index) if (unlikely(r)) { if (r == -ECANCELED) { amdgpu_ctx_set_sw_reset_status((struct radeon_winsys_ctx*)acs->ctx, PIPE_INNOCENT_CONTEXT_RESET, - "amdgpu: The CS has cancelled because the context is lost. This context is innocent.\n"); + "amdgpu: The CS has cancelled because the context is lost. This context is innocent.\n"); } else if (r == -ENODATA) { amdgpu_ctx_set_sw_reset_status((struct radeon_winsys_ctx*)acs->ctx, PIPE_GUILTY_CONTEXT_RESET, - "amdgpu: The CS has cancelled because the context is lost. This context is guilty of a soft recovery.\n"); + "amdgpu: The CS has cancelled because the context is lost. This context is guilty of a soft recovery.\n"); } else if (r == -ETIME) { amdgpu_ctx_set_sw_reset_status((struct radeon_winsys_ctx*)acs->ctx, PIPE_GUILTY_CONTEXT_RESET, - "amdgpu: The CS has cancelled because the context is lost. This context is guilty of a hard recovery.\n"); + "amdgpu: The CS has cancelled because the context is lost. This context is guilty of a hard recovery.\n"); } else { amdgpu_ctx_set_sw_reset_status((struct radeon_winsys_ctx*)acs->ctx, - PIPE_UNKNOWN_CONTEXT_RESET, - "amdgpu: The CS has been rejected, " - "see dmesg for more information (%i).\n", - r); + PIPE_UNKNOWN_CONTEXT_RESET, + "amdgpu: The CS has been rejected, " + "see dmesg for more information (%i).\n", + r); } }