From c1581efa4b4bd7b18c139924f8abbecc6d384190 Mon Sep 17 00:00:00 2001 From: Michael Keiblinger Date: Sun, 1 Sep 2024 11:13:35 +0200 Subject: [PATCH] wait after ensureEnoughLocalMem() if it did something even if launch is async --- driverapi/src/cmdqueue.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/driverapi/src/cmdqueue.cpp b/driverapi/src/cmdqueue.cpp index 5d87db9..41b44b1 100644 --- a/driverapi/src/cmdqueue.cpp +++ b/driverapi/src/cmdqueue.cpp @@ -437,8 +437,6 @@ libreCudaStatus_t NvCommandQueue::ensureEnoughLocalMem(NvU32 localMemReq) { timelineCtr++; } - //LIBRECUDA_ERR_PROPAGATE(signalWaitGpu(timelineSignal, timelineCtr)); - LIBRECUDA_SUCCEED(); } @@ -454,10 +452,17 @@ NvCommandQueue::launchFunction(LibreCUFunction function, bool async) { LIBRECUDA_VALIDATE(function != nullptr, LIBRECUDA_ERROR_INVALID_VALUE); LIBRECUDA_VALIDATE(numParams == function->param_info.size(), LIBRECUDA_ERROR_INVALID_VALUE); - if (!async) { + + bool local_mem_changed; + { + auto pre_ctr = timelineCtr; + LIBRECUDA_ERR_PROPAGATE(ensureEnoughLocalMem(function->local_mem_req)); + local_mem_changed = timelineCtr > pre_ctr; + } + + if (!async || local_mem_changed) { LIBRECUDA_ERR_PROPAGATE(signalWaitGpu(timelineSignal, timelineCtr)); } - LIBRECUDA_ERR_PROPAGATE(ensureEnoughLocalMem(function->local_mem_req)); if (dmaCommandBuffer.empty()) { currentQueueType = COMPUTE;