Skip to content

Commit

Permalink
ITS::gpu: Update track selection logics to the state of the art (#13816
Browse files Browse the repository at this point in the history
…) (#13899)

Add processNeighbours GPU kernel and handler

Update Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt

Fix second iteration

Move the whole processNeighbours on GPU
  • Loading branch information
mconcas authored Jan 25, 2025
1 parent d399bee commit fb7b17c
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 177 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ class TrackParametrization
GPUd() value_t getZ() const;
GPUd() value_t getSnp() const;
GPUd() value_t getTgl() const;
GPUd() value_t getQ2Pt() const;
GPUhd() value_t getQ2Pt() const;
GPUd() value_t getCharge2Pt() const;
GPUd() int getAbsCharge() const;
GPUd() PID getPID() const;
Expand Down Expand Up @@ -357,7 +357,7 @@ GPUdi() auto TrackParametrization<value_T>::getTgl() const -> value_t

//____________________________________________________________
template <typename value_T>
GPUdi() auto TrackParametrization<value_T>::getQ2Pt() const -> value_t
GPUhdi() auto TrackParametrization<value_T>::getQ2Pt() const -> value_t
{
return mP[kQ2Pt];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ class TimeFrameGPU : public TimeFrame
int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
gsl::span<int*> getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; }
gpuPair<int, int>* getDeviceNeighbourPairs(const int layer) { return mNeighbourPairsDevice[layer]; }
std::array<int*, nLayers - 2>& getDeviceNeighboursAll() { return mNeighboursDevice; }
int* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; }
int** getDeviceNeighboursArray() { return mNeighboursDeviceArray; }
TrackingFrameInfo* getDeviceTrackingFrameInfo(const int);
Expand All @@ -142,6 +143,7 @@ class TimeFrameGPU : public TimeFrame
// Host-specific getters
gsl::span<int, nLayers - 1> getNTracklets() { return mNTracklets; }
gsl::span<int, nLayers - 2> getNCells() { return mNCells; }
std::array<int, nLayers - 2>& getArrayNCells() { return mNCells; }

// Host-available device getters
gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,19 +186,17 @@ void processNeighboursHandler(const int startLayer,
const int startLevel,
CellSeed** allCellSeeds,
CellSeed* currentCellSeeds,
const unsigned int nCurrentCells,
std::array<int, nLayers - 2>& nCells,
const unsigned char** usedClusters,
int* neighbours,
std::array<int*, nLayers - 2>& neighbours,
gsl::span<int*> neighboursDeviceLUTs,
const TrackingFrameInfo** foundTrackingFrameInfo,
std::vector<CellSeed>& seedsHost,
const float bz,
const float MaxChi2ClusterAttachment,
const float maxChi2NDF,
const o2::base::Propagator* propagator,
const o2::base::PropagatorF::MatCorrType matCorrType,
const std::vector<int>& lastCellIdHost, // temporary host vector
const std::vector<CellSeed>& lastCellSeedHost, // temporary host vector
std::vector<int>& updatedCellIdHost, // temporary host vector
std::vector<CellSeed>& updatedCellSeedHost, // temporary host vector
const int nBlocks,
const int nThreads);

Expand Down
2 changes: 1 addition & 1 deletion Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
if(CUDA_ENABLED)
find_package(CUDAToolkit)
message(STATUS "Building ITS CUDA tracker")
# add_compile_options(-O0 -g -lineinfo -fPIC)
add_compile_options(-O0 -g -lineinfo -fPIC)
# add_compile_definitions(ITS_MEASURE_GPU_TIME)
o2_add_library(ITStrackingCUDA
SOURCES ClusterLinesGPU.cu
Expand Down
63 changes: 11 additions & 52 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,6 @@ void TrackerTraitsGPU<nLayers>::computeCellsHybrid(const int iteration)
conf.nBlocks,
conf.nThreads);
}
// Needed for processNeighbours() which is still on CPU.
mTimeFrameGPU->downloadCellsDevice();
mTimeFrameGPU->downloadCellsLUTDevice();
}

template <int nLayers>
Expand All @@ -221,11 +218,11 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighboursHybrid(const int iteration)
mTimeFrameGPU->getCellsNeighboursLUT()[iLayer].clear();
mTimeFrameGPU->getCellsNeighboursLUT()[iLayer].resize(nextLayerCellsNum, 0);

if (mTimeFrameGPU->getCells()[iLayer + 1].empty() ||
mTimeFrameGPU->getCellsLookupTable()[iLayer].empty()) {
mTimeFrameGPU->getCellsNeighbours()[iLayer].clear();
continue;
}
// if (mTimeFrameGPU->getCells()[iLayer + 1].empty() ||
// mTimeFrameGPU->getCellsLookupTable()[iLayer].empty()) {
// mTimeFrameGPU->getCellsNeighbours()[iLayer].clear();
// continue;
// }

mTimeFrameGPU->createNeighboursLUTDevice(iLayer, nextLayerCellsNum);
countCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(),
Expand Down Expand Up @@ -267,7 +264,6 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighboursHybrid(const int iteration)
cellsNeighboursLayer[iLayer].size());
}
mTimeFrameGPU->createNeighboursDeviceArray();
mTimeFrameGPU->downloadCellsDevice();
mTimeFrameGPU->unregisterRest();
};

Expand All @@ -289,55 +285,21 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
startLevel,
mTimeFrameGPU->getDeviceArrayCells(),
mTimeFrameGPU->getDeviceCells()[startLayer],
mTimeFrameGPU->getNCells()[startLayer],
mTimeFrameGPU->getArrayNCells(),
mTimeFrameGPU->getDeviceArrayUsedClusters(),
mTimeFrameGPU->getDeviceNeighbours(startLayer - 1),
mTimeFrameGPU->getDeviceNeighboursAll(),
mTimeFrameGPU->getDeviceNeighboursLUTs(),
mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
trackSeeds,
mBz,
mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment
mTrkParams[0].MaxChi2ClusterAttachment,
mTrkParams[0].MaxChi2NDF,
mTimeFrameGPU->getDevicePropagator(),
mCorrType,
lastCellId, // temporary host vector
lastCellSeed, // temporary host vector
updatedCellId, // temporary host vectors
updatedCellSeed, // temporary host vectors
conf.nBlocks,
conf.nThreads);

int level = startLevel;
for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) {
lastCellSeed.swap(updatedCellSeed);
lastCellId.swap(updatedCellId);
std::vector<CellSeed>().swap(updatedCellSeed); /// tame the memory peaks
updatedCellId.clear();
processNeighboursHandler<nLayers>(iLayer,
--level,
mTimeFrameGPU->getDeviceArrayCells(),
mTimeFrameGPU->getDeviceCells()[iLayer],
mTimeFrameGPU->getNCells()[iLayer],
mTimeFrameGPU->getDeviceArrayUsedClusters(),
mTimeFrameGPU->getDeviceNeighbours(iLayer - 1),
mTimeFrameGPU->getDeviceNeighboursLUTs(),
mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
mBz,
mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment
mTimeFrameGPU->getDevicePropagator(),
mCorrType,
lastCellId, // temporary host vector
lastCellSeed, // temporary host vector
updatedCellId, // temporary host vectors
updatedCellSeed, // temporary host vectors
conf.nBlocks,
conf.nThreads);
}
for (auto& seed : updatedCellSeed) {
if (seed.getQ2Pt() > 1.e3 || seed.getChi2() > mTrkParams[0].MaxChi2NDF * ((startLevel + 2) * 2 - 5)) {
continue;
}
trackSeeds.push_back(seed);
}
}
// fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory.
if (!trackSeeds.size()) {
LOGP(info, "No track seeds found, skipping track finding");
continue;
Expand All @@ -362,9 +324,6 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
mTimeFrameGPU->downloadTrackITSExtDevice(trackSeeds);

auto& tracks = mTimeFrameGPU->getTrackITSExt();
std::sort(tracks.begin(), tracks.end(), [](const TrackITSExt& a, const TrackITSExt& b) {
return a.getChi2() < b.getChi2();
});

for (auto& track : tracks) {
if (!track.getChi2()) {
Expand Down
Loading

0 comments on commit fb7b17c

Please sign in to comment.