From dc7e8e9625a4b61888d1f5bcd383e9712e999423 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 4 Feb 2025 01:44:02 +0100 Subject: [PATCH] GPU: Remove support for host helper threads (no longer used) --- Common/Topologies/o2prototype_topology.xml | 2 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 3 - GPU/GPUTracking/Base/GPUReconstructionCPU.h | 10 +- .../Base/GPUReconstructionDeviceBase.cxx | 139 ------------------ .../Base/GPUReconstructionDeviceBase.h | 17 +-- .../Base/GPUReconstructionHelpers.h | 50 ------- GPU/GPUTracking/CMakeLists.txt | 1 - GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 - GPU/GPUTracking/Global/GPUChain.h | 13 -- GPU/GPUTracking/Global/GPUChainTracking.h | 7 +- .../Global/GPUChainTrackingSliceTracker.cxx | 65 +------- 11 files changed, 8 insertions(+), 300 deletions(-) delete mode 100644 GPU/GPUTracking/Base/GPUReconstructionHelpers.h diff --git a/Common/Topologies/o2prototype_topology.xml b/Common/Topologies/o2prototype_topology.xml index 240b8d87d469a..8d53c9eb0127a 100644 --- a/Common/Topologies/o2prototype_topology.xml +++ b/Common/Topologies/o2prototype_topology.xml @@ -74,7 +74,7 @@ The following parameters need adjustment when extending the FLP-EPN configuratio - $ALICEO2_INSTALL_DIR/bin/aliceHLTWrapper Tracker_%collectionIndex%_%taskIndex% 1 --dds --poll-period 100 --input type=pull,size=5000,method=connect,property=EPNReceiverOutputAddress,count=1 --output type=push,size=500,method=bind,property=TrackingOutputAddress,min-port=48000 --library libAliHLTTPC.so --component TPCCATracker --run 167808 --parameter '-GlobalTracking -allowGPU -GPUHelperThreads 4 -loglevel=0x7c' + $ALICEO2_INSTALL_DIR/bin/aliceHLTWrapper Tracker_%collectionIndex%_%taskIndex% 1 --dds --poll-period 100 --input type=pull,size=5000,method=connect,property=EPNReceiverOutputAddress,count=1 --output type=push,size=500,method=bind,property=TrackingOutputAddress,min-port=48000 --library libAliHLTTPC.so --component TPCCATracker --run 167808 --parameter '-GlobalTracking -allowGPU -loglevel=0x7c' EPNReceiverOutputAddress diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 1496300818fd8..270f092a1fd29 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -278,9 +278,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() if (!(mRecoSteps.stepsGPUMask & GPUDataTypes::RecoStep::TPCMerging)) { mProcessingSettings.mergerSortTracks = false; } - if (!IsGPU()) { - mProcessingSettings.nDeviceHelperThreads = 0; - } if (mProcessingSettings.debugLevel > 3 || !IsGPU() || mProcessingSettings.deterministicGPUReconstruction) { mProcessingSettings.delayedOutput = false; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 8cc753731d074..27959382e7b67 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -16,7 +16,6 @@ #define GPURECONSTRUCTIONICPU_H #include "GPUReconstruction.h" -#include "GPUReconstructionHelpers.h" #include "GPUConstantMem.h" #include #include "utils/timer.h" @@ -117,13 +116,6 @@ class GPUReconstructionCPU : public GPUReconstructionKernelsPtr(), res->PtrDevice()); } size_t TransferMemoryResourceToHost(GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryInternal(res, stream, ev, evList, nEvents, false, res->PtrDevice(), res->Ptr()); } @@ -294,7 +286,7 @@ HighResTimer& GPUReconstructionCPU::getTimer(const char* name, int32_t num) static int32_t id = getNextTimerId(); timerMeta* timer = getTimerById(id); if (timer == nullptr) { - int32_t max = std::max({getOMPMaxThreads(), mProcessingSettings.nDeviceHelperThreads + 1, mProcessingSettings.nStreams}); + int32_t max = std::max({getOMPMaxThreads(), mProcessingSettings.nStreams}); timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep); } if (num == -1) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index 3522095622ad4..91715fab4f668 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -41,57 +41,6 @@ GPUReconstructionDeviceBase::GPUReconstructionDeviceBase(const GPUSettingsDevice GPUReconstructionDeviceBase::~GPUReconstructionDeviceBase() = default; -void* GPUReconstructionDeviceBase::helperWrapper_static(void* arg) -{ - GPUReconstructionHelpers::helperParam* par = (GPUReconstructionHelpers::helperParam*)arg; - GPUReconstructionDeviceBase* cls = par->cls; - return cls->helperWrapper(par); -} - -void* GPUReconstructionDeviceBase::helperWrapper(GPUReconstructionHelpers::helperParam* par) -{ - if (mProcessingSettings.debugLevel >= 3) { - GPUInfo("\tHelper thread %d starting", par->num); - } - - // cpu_set_t mask; //TODO add option - // CPU_ZERO(&mask); - // CPU_SET(par->num * 2 + 2, &mask); - // sched_setaffinity(0, sizeof(mask), &mask); - - par->mutex[0].lock(); - while (par->terminate == false) { - for (int32_t i = par->num + 1; i < par->count; i += mProcessingSettings.nDeviceHelperThreads + 1) { - // if (mProcessingSettings.debugLevel >= 3) GPUInfo("\tHelper Thread %d Running, Slice %d+%d, Phase %d", par->num, i, par->phase); - if ((par->functionCls->*par->function)(i, par->num + 1, par)) { - par->error = 1; - } - if (par->reset) { - break; - } - par->done = i + 1; - // if (mProcessingSettings.debugLevel >= 3) GPUInfo("\tHelper Thread %d Finished, Slice %d+%d, Phase %d", par->num, i, par->phase); - } - ResetThisHelperThread(par); - par->mutex[0].lock(); - } - if (mProcessingSettings.debugLevel >= 3) { - GPUInfo("\tHelper thread %d terminating", par->num); - } - par->mutex[1].unlock(); - pthread_exit(nullptr); - return (nullptr); -} - -void GPUReconstructionDeviceBase::ResetThisHelperThread(GPUReconstructionHelpers::helperParam* par) -{ - if (par->reset) { - GPUImportant("GPU Helper Thread %d reseting", par->num); - } - par->reset = false; - par->mutex[1].unlock(); -} - int32_t GPUReconstructionDeviceBase::GetGlobalLock(void*& pLock) { #ifdef _WIN32 @@ -138,86 +87,6 @@ void GPUReconstructionDeviceBase::ReleaseGlobalLock(void* sem) #endif } -void GPUReconstructionDeviceBase::ResetHelperThreads(int32_t helpers) -{ - GPUImportant("Error occurred, GPU tracker helper threads will be reset (Number of threads %d (%d))", mProcessingSettings.nDeviceHelperThreads, mNSlaveThreads); - SynchronizeGPU(); - for (int32_t i = 0; i < mProcessingSettings.nDeviceHelperThreads; i++) { - mHelperParams[i].reset = true; - if (helpers || i >= mProcessingSettings.nDeviceHelperThreads) { - pthread_mutex_lock(&((pthread_mutex_t*)mHelperParams[i].mutex)[1]); - } - } - GPUImportant("GPU Tracker helper threads have ben reset"); -} - -int32_t GPUReconstructionDeviceBase::StartHelperThreads() -{ - int32_t nThreads = mProcessingSettings.nDeviceHelperThreads; - if (nThreads) { - mHelperParams = new GPUReconstructionHelpers::helperParam[nThreads]; - if (mHelperParams == nullptr) { - GPUError("Memory allocation error"); - ExitDevice(); - return (1); - } - for (int32_t i = 0; i < nThreads; i++) { - mHelperParams[i].cls = this; - mHelperParams[i].terminate = false; - mHelperParams[i].reset = false; - mHelperParams[i].num = i; - for (int32_t j = 0; j < 2; j++) { - mHelperParams[i].mutex[j].lock(); - } - - if (pthread_create(&mHelperParams[i].threadId, nullptr, helperWrapper_static, &mHelperParams[i])) { - GPUError("Error starting slave thread"); - ExitDevice(); - return (1); - } - } - } - mNSlaveThreads = nThreads; - return (0); -} - -int32_t GPUReconstructionDeviceBase::StopHelperThreads() -{ - if (mNSlaveThreads) { - for (int32_t i = 0; i < mNSlaveThreads; i++) { - mHelperParams[i].terminate = true; - mHelperParams[i].mutex[0].unlock(); - mHelperParams[i].mutex[1].lock(); - if (pthread_join(mHelperParams[i].threadId, nullptr)) { - GPUError("Error waiting for thread to terminate"); - return (1); - } - } - delete[] mHelperParams; - } - mNSlaveThreads = 0; - return (0); -} - -void GPUReconstructionDeviceBase::WaitForHelperThreads() -{ - for (int32_t i = 0; i < mProcessingSettings.nDeviceHelperThreads; i++) { - pthread_mutex_lock(&((pthread_mutex_t*)mHelperParams[i].mutex)[1]); - } -} - -void GPUReconstructionDeviceBase::RunHelperThreads(int32_t (GPUReconstructionHelpers::helperDelegateBase::*function)(int32_t i, int32_t t, GPUReconstructionHelpers::helperParam* p), GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count) -{ - for (int32_t i = 0; i < mProcessingSettings.nDeviceHelperThreads; i++) { - mHelperParams[i].done = 0; - mHelperParams[i].error = 0; - mHelperParams[i].function = function; - mHelperParams[i].functionCls = functionCls; - mHelperParams[i].count = count; - pthread_mutex_unlock(&((pthread_mutex_t*)mHelperParams[i].mutex)[0]); - } -} - int32_t GPUReconstructionDeviceBase::InitDevice() { // cpu_set_t mask; @@ -262,10 +131,6 @@ int32_t GPUReconstructionDeviceBase::InitDevice() mProcShadow.mMemoryResProcessors = RegisterMemoryAllocation(&mProcShadow, &GPUProcessorProcessors::SetPointersDeviceProcessor, GPUMemoryResource::MEMORY_PERMANENT | GPUMemoryResource::MEMORY_HOST, "Processors"); AllocateRegisteredMemory(mProcShadow.mMemoryResProcessors); - if (StartHelperThreads()) { - return (1); - } - if (mMaster == nullptr || mProcessingSettings.debugLevel >= 2) { GPUInfo("GPU Tracker initialization successfull"); // Verbosity reduced because GPU backend will print GPUImportant message! } @@ -282,10 +147,6 @@ void* GPUReconstructionDeviceBase::GPUProcessorProcessors::SetPointersDeviceProc int32_t GPUReconstructionDeviceBase::ExitDevice() { - if (StopHelperThreads()) { - return (1); - } - int32_t retVal = ExitDevice_Runtime(); mProcessorsShadow = nullptr; mHostMemoryPool = mHostMemoryBase = mDeviceMemoryPool = mDeviceMemoryBase = mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = mHostMemoryPermanent = mDeviceMemoryPermanent = nullptr; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index 215615f558442..1381fd0f76981 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -17,7 +17,6 @@ #include "GPUReconstructionCPU.h" #include -#include "GPUReconstructionHelpers.h" #include "GPUChain.h" #include @@ -61,24 +60,10 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU size_t GPUMemCpyAlways(bool onGpu, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override; size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override = 0; - int32_t StartHelperThreads() override; - int32_t StopHelperThreads() override; - void RunHelperThreads(int32_t (GPUReconstructionHelpers::helperDelegateBase::*function)(int32_t, int32_t, GPUReconstructionHelpers::helperParam*), GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count) override; - int32_t HelperError(int32_t iThread) const override { return mHelperParams[iThread].error; } - int32_t HelperDone(int32_t iThread) const override { return mHelperParams[iThread].done; } - void WaitForHelperThreads() override; - void ResetHelperThreads(int32_t helpers) override; - void ResetThisHelperThread(GPUReconstructionHelpers::helperParam* par); - int32_t GetGlobalLock(void*& pLock); void ReleaseGlobalLock(void* sem); - static void* helperWrapper_static(void* arg); - void* helperWrapper(GPUReconstructionHelpers::helperParam* par); - - int32_t mDeviceId = -1; // Device ID used by backend - GPUReconstructionHelpers::helperParam* mHelperParams = nullptr; // Control Struct for helper threads - int32_t mNSlaveThreads = 0; // Number of slave threads currently active + int32_t mDeviceId = -1; // Device ID used by backend struct DebugEvents { deviceEvent DebugStart, DebugStop; // Debug timer events diff --git a/GPU/GPUTracking/Base/GPUReconstructionHelpers.h b/GPU/GPUTracking/Base/GPUReconstructionHelpers.h deleted file mode 100644 index c55e81905f32f..0000000000000 --- a/GPU/GPUTracking/Base/GPUReconstructionHelpers.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionHelpers.h -/// \author David Rohr - -#ifndef GPURECONSTRUCTIONHELPERS_H -#define GPURECONSTRUCTIONHELPERS_H - -#include - -namespace o2 -{ -namespace gpu -{ -class GPUReconstructionDeviceBase; -class GPUReconstructionHelpers -{ - public: - class helperDelegateBase - { - }; - - struct helperParam { - pthread_t threadId; - GPUReconstructionDeviceBase* cls; - int32_t num; - std::mutex mutex[2]; - int8_t terminate; - helperDelegateBase* functionCls; - int32_t (helperDelegateBase::*function)(int32_t, int32_t, helperParam*); - int32_t phase; - int32_t count; - volatile int32_t done; - volatile int8_t error; - volatile int8_t reset; - }; -}; -} // namespace gpu -} // namespace o2 - -#endif diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 5dd92d41db29b..6acc7fd1dd537 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -104,7 +104,6 @@ set(HDRS_INSTALL Base/GPUConstantMem.h Base/GPUParam.inc Base/GPUParamRTC.h - Base/GPUReconstructionHelpers.h Base/GPUReconstructionIncludes.h Base/GPUReconstructionIncludesITS.h Base/GPUReconstructionKernelMacros.h diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index c10793975453d..ca6f2f370300e 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -252,7 +252,6 @@ AddOption(registerStandaloneInputMemory, bool, false, "registerInputMemory", 0, AddOption(ompThreads, int32_t, -1, "omp", 't', "Number of OMP threads to run (-1: all)", min(-1), message("Using %s OMP threads")) AddOption(ompKernels, uint8_t, 2, "", 0, "Parallelize with OMP inside kernels instead of over slices, 2 for nested parallelization over TPC sectors and inside kernels") AddOption(ompAutoNThreads, bool, true, "", 0, "Auto-adjust number of OMP threads, decreasing the number for small input data") -AddOption(nDeviceHelperThreads, int32_t, 1, "", 0, "Number of CPU helper threads for CPU processing") AddOption(nStreams, int8_t, 8, "", 0, "Number of GPU streams / command queues") AddOption(nTPCClustererLanes, int8_t, -1, "", 0, "Number of TPC clusterers that can run in parallel (-1 = autoset)") AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster max fragment len to a certain value (-1 = autodetect)") diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 06650f9d9c733..0981fea43810a 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -16,7 +16,6 @@ #define GPUCHAIN_H #include "GPUReconstructionCPU.h" -#include "GPUReconstructionHelpers.h" namespace o2 { @@ -111,12 +110,6 @@ class GPUChain } } inline void StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents = 1) { mRec->StreamWaitForEvents(stream, evList, nEvents); } - template - void RunHelperThreads(T function, GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count); - inline void WaitForHelperThreads() { mRec->WaitForHelperThreads(); } - inline int32_t HelperError(int32_t iThread) const { return mRec->HelperError(iThread); } - inline int32_t HelperDone(int32_t iThread) const { return mRec->HelperDone(iThread); } - inline void ResetHelperThreads(int32_t helpers) { mRec->ResetHelperThreads(helpers); } inline int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1) { return mRec->GPUDebug(state, stream); } // nEvents is forced to 0 if evList == nullptr inline void TransferMemoryResourceToGPU(RecoStep step, GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourceToGPU, res, stream, ev, evList, nEvents); } @@ -242,12 +235,6 @@ class GPUChain void timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args); }; -template -inline void GPUChain::RunHelperThreads(T function, GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count) -{ - mRec->RunHelperThreads((int32_t(GPUReconstructionHelpers::helperDelegateBase::*)(int32_t, int32_t, GPUReconstructionHelpers::helperParam*))function, functionCls, count); -} - template inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args) { diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 6d6d82b518097..d827b095773b1 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -16,7 +16,6 @@ #define GPUCHAINTRACKING_H #include "GPUChain.h" -#include "GPUReconstructionHelpers.h" #include "GPUDataTypes.h" #include #include @@ -68,7 +67,7 @@ struct GPUTPCCFChainContext; struct GPUNewCalibValues; struct GPUTriggerOutputs; -class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelegateBase +class GPUChainTracking : public GPUChain { friend class GPUReconstruction; @@ -314,15 +313,11 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function allocator, bool applyClusterCuts); bool NeedTPCClustersOnGPU(); - std::atomic_flag mLockAtomicOutputBuffer = ATOMIC_FLAG_INIT; std::mutex mMutexUpdateCalib; std::unique_ptr mPipelineFinalizationCtx; GPUChainTrackingFinalContext* mPipelineNotifyCtx = nullptr; std::function mWaitForFinalInputs; - int32_t HelperReadEvent(int32_t iSlice, int32_t threadId, GPUReconstructionHelpers::helperParam* par); - int32_t HelperOutput(int32_t iSlice, int32_t threadId, GPUReconstructionHelpers::helperParam* par); - int32_t OutputStream() const { return mRec->NStreams() - 2; } }; } // namespace gpu diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx index 35a8c6c455048..174b3757d3307 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx @@ -55,9 +55,6 @@ int32_t GPUChainTracking::RunTPCTrackingSlices() if (retVal) { SynchronizeGPU(); } - if (retVal >= 2) { - ResetHelperThreads(retVal >= 3); - } return (retVal != 0); } @@ -114,9 +111,6 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() processorsShadow()->tpcTrackers[iSlice].SetGPUTextureBase(mRec->DeviceMemoryBase()); } - if (!doSliceDataOnGPU) { - RunHelperThreads(&GPUChainTracking::HelperReadEvent, this, NSLICES); - } if (PrepareTextures()) { return (2); } @@ -183,22 +177,12 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); streamInit[useStream] = true; - } else if (!doGPU || iSlice % (GetProcessingSettings().nDeviceHelperThreads + 1) == 0) { + } else { if (ReadEvent(iSlice, 0)) { GPUError("Error reading event"); error = 1; continue; } - } else { - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Waiting for helper thread %d", iSlice % (GetProcessingSettings().nDeviceHelperThreads + 1) - 1); - } - while (HelperDone(iSlice % (GetProcessingSettings().nDeviceHelperThreads + 1) - 1) < (int32_t)iSlice) { - } - if (HelperError(iSlice % (GetProcessingSettings().nDeviceHelperThreads + 1) - 1)) { - error = 1; - continue; - } } if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}}); @@ -297,9 +281,6 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() if (doGPU) { ReleaseEvent(mEvents->init); } - if (!doSliceDataOnGPU) { - WaitForHelperThreads(); - } if (!GetProcessingSettings().trackletSelectorInPipeline) { if (GetProcessingSettings().trackletConstructorInPipeline) { @@ -359,7 +340,6 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() if (param().rec.tpc.globalTracking) { mWriteOutputDone.fill(0); } - RunHelperThreads(&GPUChainTracking::HelperOutput, this, NSLICES); uint32_t tmpSlice = 0; for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { @@ -402,12 +382,12 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Data ready for slice %d, helper thread %d", iSlice, iSlice % (GetProcessingSettings().nDeviceHelperThreads + 1)); + GPUInfo("Data ready for slice %d", iSlice); } mSliceSelectorReady = iSlice; if (param().rec.tpc.globalTracking) { - for (uint32_t tmpSlice2a = 0; tmpSlice2a <= iSlice; tmpSlice2a += GetProcessingSettings().nDeviceHelperThreads + 1) { + for (uint32_t tmpSlice2a = 0; tmpSlice2a <= iSlice; tmpSlice2a++) { uint32_t tmpSlice2 = GPUTPCGlobalTracking::GlobalTrackingSliceOrder(tmpSlice2a); uint32_t sliceLeft, sliceRight; GPUTPCGlobalTracking::GlobalTrackingSliceLeftRight(tmpSlice2, sliceLeft, sliceRight); @@ -419,12 +399,9 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } } } else { - if (iSlice % (GetProcessingSettings().nDeviceHelperThreads + 1) == 0) { - WriteOutput(iSlice, 0); - } + WriteOutput(iSlice, 0); } } - WaitForHelperThreads(); } if (!(GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && param().rec.tpc.globalTracking) { std::vector blocking(NSLICES * mRec->NStreams()); @@ -518,43 +495,9 @@ void GPUChainTracking::WriteOutput(int32_t iSlice, int32_t threadId) if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Running WriteOutput for slice %d on thread %d\n", iSlice, threadId); } - if (GetProcessingSettings().nDeviceHelperThreads) { - while (mLockAtomicOutputBuffer.test_and_set(std::memory_order_acquire)) { - } - } processors()->tpcTrackers[iSlice].WriteOutputPrepare(); - if (GetProcessingSettings().nDeviceHelperThreads) { - mLockAtomicOutputBuffer.clear(); - } processors()->tpcTrackers[iSlice].WriteOutput(); if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Finished WriteOutput for slice %d on thread %d\n", iSlice, threadId); } } - -int32_t GPUChainTracking::HelperReadEvent(int32_t iSlice, int32_t threadId, GPUReconstructionHelpers::helperParam* par) { return ReadEvent(iSlice, threadId); } - -int32_t GPUChainTracking::HelperOutput(int32_t iSlice, int32_t threadId, GPUReconstructionHelpers::helperParam* par) -{ - if (param().rec.tpc.globalTracking) { - uint32_t tmpSlice = GPUTPCGlobalTracking::GlobalTrackingSliceOrder(iSlice); - uint32_t sliceLeft, sliceRight; - GPUTPCGlobalTracking::GlobalTrackingSliceLeftRight(tmpSlice, sliceLeft, sliceRight); - - while (mSliceSelectorReady < (int32_t)tmpSlice || mSliceSelectorReady < (int32_t)sliceLeft || mSliceSelectorReady < (int32_t)sliceRight) { - if (par->reset) { - return 1; - } - } - GlobalTracking(tmpSlice, 0); - WriteOutput(tmpSlice, 0); - } else { - while (mSliceSelectorReady < iSlice) { - if (par->reset) { - return 1; - } - } - WriteOutput(iSlice, threadId); - } - return 0; -}