ITS::gpu: Update track selection logics to the state of the art (#13816…

…) (#13899) Add processNeighbours GPU kernel and handler Update Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt Fix second iteration Move the whole processNeighbours on GPU
AliceO2Group · Jan 25, 2025 · fb7b17c · fb7b17c
1 parent d399bee
commit fb7b17c
Show file tree

Hide file tree

Showing 6 changed files with 157 additions and 177 deletions.
diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h
@@ -160,7 +160,7 @@ class TrackParametrization
   GPUd() value_t getZ() const;
   GPUd() value_t getSnp() const;
   GPUd() value_t getTgl() const;
-  GPUd() value_t getQ2Pt() const;
+  GPUhd() value_t getQ2Pt() const;
   GPUd() value_t getCharge2Pt() const;
   GPUd() int getAbsCharge() const;
   GPUd() PID getPID() const;
@@ -357,7 +357,7 @@ GPUdi() auto TrackParametrization<value_T>::getTgl() const -> value_t
 
 //____________________________________________________________
 template <typename value_T>
-GPUdi() auto TrackParametrization<value_T>::getQ2Pt() const -> value_t
+GPUhdi() auto TrackParametrization<value_T>::getQ2Pt() const -> value_t
 {
   return mP[kQ2Pt];
 }

diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h
@@ -116,6 +116,7 @@ class TimeFrameGPU : public TimeFrame
   int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
   gsl::span<int*> getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; }
   gpuPair<int, int>* getDeviceNeighbourPairs(const int layer) { return mNeighbourPairsDevice[layer]; }
+  std::array<int*, nLayers - 2>& getDeviceNeighboursAll() { return mNeighboursDevice; }
   int* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; }
   int** getDeviceNeighboursArray() { return mNeighboursDeviceArray; }
   TrackingFrameInfo* getDeviceTrackingFrameInfo(const int);
@@ -142,6 +143,7 @@ class TimeFrameGPU : public TimeFrame
   // Host-specific getters
   gsl::span<int, nLayers - 1> getNTracklets() { return mNTracklets; }
   gsl::span<int, nLayers - 2> getNCells() { return mNCells; }
+  std::array<int, nLayers - 2>& getArrayNCells() { return mNCells; }
 
   // Host-available device getters
   gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }

diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h
@@ -186,19 +186,17 @@ void processNeighboursHandler(const int startLayer,
                               const int startLevel,
                               CellSeed** allCellSeeds,
                               CellSeed* currentCellSeeds,
-                              const unsigned int nCurrentCells,
+                              std::array<int, nLayers - 2>& nCells,
                               const unsigned char** usedClusters,
-                              int* neighbours,
+                              std::array<int*, nLayers - 2>& neighbours,
                               gsl::span<int*> neighboursDeviceLUTs,
                               const TrackingFrameInfo** foundTrackingFrameInfo,
+                              std::vector<CellSeed>& seedsHost,
                               const float bz,
                               const float MaxChi2ClusterAttachment,
+                              const float maxChi2NDF,
                               const o2::base::Propagator* propagator,
                               const o2::base::PropagatorF::MatCorrType matCorrType,
-                              const std::vector<int>& lastCellIdHost,        // temporary host vector
-                              const std::vector<CellSeed>& lastCellSeedHost, // temporary host vector
-                              std::vector<int>& updatedCellIdHost,           // temporary host vector
-                              std::vector<CellSeed>& updatedCellSeedHost,    // temporary host vector
                               const int nBlocks,
                               const int nThreads);
 

diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt
@@ -13,7 +13,7 @@
 if(CUDA_ENABLED)
 find_package(CUDAToolkit)
 message(STATUS "Building ITS CUDA tracker")
-# add_compile_options(-O0 -g -lineinfo -fPIC)
+add_compile_options(-O0 -g -lineinfo -fPIC)
 # add_compile_definitions(ITS_MEASURE_GPU_TIME)
 o2_add_library(ITStrackingCUDA
                SOURCES ClusterLinesGPU.cu

diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx
@@ -205,9 +205,6 @@ void TrackerTraitsGPU<nLayers>::computeCellsHybrid(const int iteration)
                         conf.nBlocks,
                         conf.nThreads);
   }
-  // Needed for processNeighbours() which is still on CPU.
-  mTimeFrameGPU->downloadCellsDevice();
-  mTimeFrameGPU->downloadCellsLUTDevice();
 }
 
 template <int nLayers>
@@ -221,11 +218,11 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighboursHybrid(const int iteration)
     mTimeFrameGPU->getCellsNeighboursLUT()[iLayer].clear();
     mTimeFrameGPU->getCellsNeighboursLUT()[iLayer].resize(nextLayerCellsNum, 0);
 
-    if (mTimeFrameGPU->getCells()[iLayer + 1].empty() ||
-        mTimeFrameGPU->getCellsLookupTable()[iLayer].empty()) {
-      mTimeFrameGPU->getCellsNeighbours()[iLayer].clear();
-      continue;
-    }
+    // if (mTimeFrameGPU->getCells()[iLayer + 1].empty() ||
+    //     mTimeFrameGPU->getCellsLookupTable()[iLayer].empty()) {
+    //   mTimeFrameGPU->getCellsNeighbours()[iLayer].clear();
+    //   continue;
+    // }
 
     mTimeFrameGPU->createNeighboursLUTDevice(iLayer, nextLayerCellsNum);
     countCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(),
@@ -267,7 +264,6 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighboursHybrid(const int iteration)
                                 cellsNeighboursLayer[iLayer].size());
   }
   mTimeFrameGPU->createNeighboursDeviceArray();
-  mTimeFrameGPU->downloadCellsDevice();
   mTimeFrameGPU->unregisterRest();
 };
 
@@ -289,55 +285,21 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
                                         startLevel,
                                         mTimeFrameGPU->getDeviceArrayCells(),
                                         mTimeFrameGPU->getDeviceCells()[startLayer],
-                                        mTimeFrameGPU->getNCells()[startLayer],
+                                        mTimeFrameGPU->getArrayNCells(),
                                         mTimeFrameGPU->getDeviceArrayUsedClusters(),
-                                        mTimeFrameGPU->getDeviceNeighbours(startLayer - 1),
+                                        mTimeFrameGPU->getDeviceNeighboursAll(),
                                         mTimeFrameGPU->getDeviceNeighboursLUTs(),
                                         mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
+                                        trackSeeds,
                                         mBz,
-                                        mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment
+                                        mTrkParams[0].MaxChi2ClusterAttachment,
+                                        mTrkParams[0].MaxChi2NDF,
                                         mTimeFrameGPU->getDevicePropagator(),
                                         mCorrType,
-                                        lastCellId,      // temporary host vector
-                                        lastCellSeed,    // temporary host vector
-                                        updatedCellId,   // temporary host vectors
-                                        updatedCellSeed, // temporary host vectors
                                         conf.nBlocks,
                                         conf.nThreads);
-
-      int level = startLevel;
-      for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) {
-        lastCellSeed.swap(updatedCellSeed);
-        lastCellId.swap(updatedCellId);
-        std::vector<CellSeed>().swap(updatedCellSeed); /// tame the memory peaks
-        updatedCellId.clear();
-        processNeighboursHandler<nLayers>(iLayer,
-                                          --level,
-                                          mTimeFrameGPU->getDeviceArrayCells(),
-                                          mTimeFrameGPU->getDeviceCells()[iLayer],
-                                          mTimeFrameGPU->getNCells()[iLayer],
-                                          mTimeFrameGPU->getDeviceArrayUsedClusters(),
-                                          mTimeFrameGPU->getDeviceNeighbours(iLayer - 1),
-                                          mTimeFrameGPU->getDeviceNeighboursLUTs(),
-                                          mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
-                                          mBz,
-                                          mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment
-                                          mTimeFrameGPU->getDevicePropagator(),
-                                          mCorrType,
-                                          lastCellId,      // temporary host vector
-                                          lastCellSeed,    // temporary host vector
-                                          updatedCellId,   // temporary host vectors
-                                          updatedCellSeed, // temporary host vectors
-                                          conf.nBlocks,
-                                          conf.nThreads);
-      }
-      for (auto& seed : updatedCellSeed) {
-        if (seed.getQ2Pt() > 1.e3 || seed.getChi2() > mTrkParams[0].MaxChi2NDF * ((startLevel + 2) * 2 - 5)) {
-          continue;
-        }
-        trackSeeds.push_back(seed);
-      }
     }
+    // fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory.
     if (!trackSeeds.size()) {
       LOGP(info, "No track seeds found, skipping track finding");
       continue;
@@ -362,9 +324,6 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
     mTimeFrameGPU->downloadTrackITSExtDevice(trackSeeds);
 
     auto& tracks = mTimeFrameGPU->getTrackITSExt();
-    std::sort(tracks.begin(), tracks.end(), [](const TrackITSExt& a, const TrackITSExt& b) {
-      return a.getChi2() < b.getChi2();
-    });
 
     for (auto& track : tracks) {
       if (!track.getChi2()) {