Skip to content

Commit

Permalink
ITSGPU: Port findNeighbours on GPU (AliceO2Group#13636)
Browse files Browse the repository at this point in the history
  • Loading branch information
mconcas authored Oct 30, 2024
1 parent 6a2cc7e commit 03fd79a
Show file tree
Hide file tree
Showing 11 changed files with 893 additions and 558 deletions.
150 changes: 150 additions & 0 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameChunk.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.
///

#ifndef TRACKINGITSGPU_INCLUDE_TIMEFRAMECHUNKGPU_H
#define TRACKINGITSGPU_INCLUDE_TIMEFRAMECHUNKGPU_H

#include "ITStracking/Configuration.h"
#include "ITStracking/TimeFrame.h"

#include "ITStrackingGPU/ClusterLinesGPU.h"
#include "ITStrackingGPU/Array.h"
#include "ITStrackingGPU/Vector.h"
#include "ITStrackingGPU/Stream.h"

#include <gsl/gsl>

namespace o2::its::gpu
{
template <int nLayers>
struct StaticTrackingParameters {
StaticTrackingParameters<nLayers>& operator=(const StaticTrackingParameters<nLayers>& t) = default;
void set(const TrackingParameters& pars)
{
ClusterSharing = pars.ClusterSharing;
MinTrackLength = pars.MinTrackLength;
NSigmaCut = pars.NSigmaCut;
PVres = pars.PVres;
DeltaROF = pars.DeltaROF;
ZBins = pars.ZBins;
PhiBins = pars.PhiBins;
CellDeltaTanLambdaSigma = pars.CellDeltaTanLambdaSigma;
}

/// General parameters
int ClusterSharing = 0;
int MinTrackLength = nLayers;
float NSigmaCut = 5;
float PVres = 1.e-2f;
int DeltaROF = 0;
int ZBins{256};
int PhiBins{128};

/// Cell finding cuts
float CellDeltaTanLambdaSigma = 0.007f;
};

template <int nLayers>
class GpuTimeFrameChunk
{
public:
static size_t computeScalingSizeBytes(const int, const TimeFrameGPUParameters&);
static size_t computeFixedSizeBytes(const TimeFrameGPUParameters&);
static size_t computeRofPerChunk(const TimeFrameGPUParameters&, const size_t);

GpuTimeFrameChunk() = delete;
GpuTimeFrameChunk(o2::its::TimeFrame* tf, TimeFrameGPUParameters& conf)
{
mTimeFramePtr = tf;
mTFGPUParams = &conf;
}
~GpuTimeFrameChunk();

/// Most relevant operations
void allocate(const size_t, Stream&);
void reset(const Task, Stream&);
size_t loadDataOnDevice(const size_t, const size_t, const int, Stream&);

/// Interface
Cluster* getDeviceClusters(const int);
int* getDeviceClusterExternalIndices(const int);
int* getDeviceIndexTables(const int);
Tracklet* getDeviceTracklets(const int);
int* getDeviceTrackletsLookupTables(const int);
CellSeed* getDeviceCells(const int);
int* getDeviceCellsLookupTables(const int);
int* getDeviceRoadsLookupTables(const int);
TimeFrameGPUParameters* getTimeFrameGPUParameters() const { return mTFGPUParams; }

int* getDeviceCUBTmpBuffer() { return mCUBTmpBufferDevice; }
int* getDeviceFoundTracklets() { return mFoundTrackletsDevice; }
int* getDeviceNFoundCells() { return mNFoundCellsDevice; }
int* getDeviceCellNeigboursLookupTables(const int);
int* getDeviceCellNeighbours(const int);
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
int** getDeviceArrayNeighboursCell() const { return mNeighboursCellDeviceArray; }
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLookupTablesDeviceArray; }

/// Vertexer only
int* getDeviceNTrackletCluster(const int combid) { return mNTrackletsPerClusterDevice[combid]; }
Line* getDeviceLines() { return mLinesDevice; };
int* getDeviceNFoundLines() { return mNFoundLinesDevice; }
int* getDeviceNExclusiveFoundLines() { return mNExclusiveFoundLinesDevice; }
unsigned char* getDeviceUsedTracklets() { return mUsedTrackletsDevice; }
int* getDeviceClusteredLines() { return mClusteredLinesDevice; }
size_t getNPopulatedRof() const { return mNPopulatedRof; }

private:
/// Host
std::array<gsl::span<const Cluster>, nLayers> mHostClusters;
std::array<gsl::span<const int>, nLayers> mHostIndexTables;

/// Device
std::array<Cluster*, nLayers> mClustersDevice;
std::array<int*, nLayers> mClusterExternalIndicesDevice;
std::array<int*, nLayers> mIndexTablesDevice;
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
std::array<int*, nLayers - 1> mTrackletsLookupTablesDevice;
std::array<CellSeed*, nLayers - 2> mCellsDevice;
// Road<nLayers - 2>* mRoadsDevice;
std::array<int*, nLayers - 2> mCellsLookupTablesDevice;
std::array<int*, nLayers - 3> mNeighboursCellDevice;
std::array<int*, nLayers - 3> mNeighboursCellLookupTablesDevice;
std::array<int*, nLayers - 2> mRoadsLookupTablesDevice;

// These are to make them accessible using layer index
CellSeed** mCellsDeviceArray;
int** mNeighboursCellDeviceArray;
int** mNeighboursCellLookupTablesDeviceArray;

// Small accessory buffers
int* mCUBTmpBufferDevice;
int* mFoundTrackletsDevice;
int* mNFoundCellsDevice;

/// Vertexer only
Line* mLinesDevice;
int* mNFoundLinesDevice;
int* mNExclusiveFoundLinesDevice;
unsigned char* mUsedTrackletsDevice;
std::array<int*, 2> mNTrackletsPerClusterDevice;
int* mClusteredLinesDevice;

/// State and configuration
bool mAllocated = false;
size_t mNRof = 0;
size_t mNPopulatedRof = 0;
o2::its::TimeFrame* mTimeFramePtr = nullptr;
TimeFrameGPUParameters* mTFGPUParams = nullptr;
};
} // namespace o2::its::gpu
#endif
156 changes: 19 additions & 137 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,156 +20,21 @@
#include "ITStrackingGPU/Array.h"
#include "ITStrackingGPU/Vector.h"
#include "ITStrackingGPU/Stream.h"
#include "ITStrackingGPU/TimeFrameChunk.h"

#include <gsl/gsl>

namespace o2
{
namespace gpu
{
class GPUChainITS;
}
namespace its
{
template <typename T1, typename T2>
struct gpuPair {
T1 first;
T2 second;
};

namespace gpu
{

class DefaultGPUAllocator : public ExternalAllocator
{
void* allocate(size_t size) override;
};
template <int nLayers>
struct StaticTrackingParameters {
StaticTrackingParameters<nLayers>& operator=(const StaticTrackingParameters<nLayers>& t) = default;
void set(const TrackingParameters& pars)
{
ClusterSharing = pars.ClusterSharing;
MinTrackLength = pars.MinTrackLength;
NSigmaCut = pars.NSigmaCut;
PVres = pars.PVres;
DeltaROF = pars.DeltaROF;
ZBins = pars.ZBins;
PhiBins = pars.PhiBins;
CellDeltaTanLambdaSigma = pars.CellDeltaTanLambdaSigma;
}

/// General parameters
int ClusterSharing = 0;
int MinTrackLength = nLayers;
float NSigmaCut = 5;
float PVres = 1.e-2f;
int DeltaROF = 0;
int ZBins{256};
int PhiBins{128};

/// Cell finding cuts
float CellDeltaTanLambdaSigma = 0.007f;
};

enum class Task {
Tracker = 0,
Vertexer = 1
};

template <int nLayers>
class GpuTimeFrameChunk
{
public:
static size_t computeScalingSizeBytes(const int, const TimeFrameGPUParameters&);
static size_t computeFixedSizeBytes(const TimeFrameGPUParameters&);
static size_t computeRofPerChunk(const TimeFrameGPUParameters&, const size_t);

GpuTimeFrameChunk() = delete;
GpuTimeFrameChunk(o2::its::TimeFrame* tf, TimeFrameGPUParameters& conf)
{
mTimeFramePtr = tf;
mTFGPUParams = &conf;
}
~GpuTimeFrameChunk();

/// Most relevant operations
void allocate(const size_t, Stream&);
void reset(const Task, Stream&);
size_t loadDataOnDevice(const size_t, const size_t, const int, Stream&);

/// Interface
Cluster* getDeviceClusters(const int);
int* getDeviceClusterExternalIndices(const int);
int* getDeviceIndexTables(const int);
Tracklet* getDeviceTracklets(const int);
int* getDeviceTrackletsLookupTables(const int);
CellSeed* getDeviceCells(const int);
int* getDeviceCellsLookupTables(const int);
int* getDeviceRoadsLookupTables(const int);
TimeFrameGPUParameters* getTimeFrameGPUParameters() const { return mTFGPUParams; }

int* getDeviceCUBTmpBuffer() { return mCUBTmpBufferDevice; }
int* getDeviceFoundTracklets() { return mFoundTrackletsDevice; }
int* getDeviceNFoundCells() { return mNFoundCellsDevice; }
int* getDeviceCellNeigboursLookupTables(const int);
int* getDeviceCellNeighbours(const int);
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
int** getDeviceArrayNeighboursCell() const { return mNeighboursCellDeviceArray; }
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLookupTablesDeviceArray; }

/// Vertexer only
int* getDeviceNTrackletCluster(const int combid) { return mNTrackletsPerClusterDevice[combid]; }
Line* getDeviceLines() { return mLinesDevice; };
int* getDeviceNFoundLines() { return mNFoundLinesDevice; }
int* getDeviceNExclusiveFoundLines() { return mNExclusiveFoundLinesDevice; }
unsigned char* getDeviceUsedTracklets() { return mUsedTrackletsDevice; }
int* getDeviceClusteredLines() { return mClusteredLinesDevice; }
size_t getNPopulatedRof() const { return mNPopulatedRof; }

private:
/// Host
std::array<gsl::span<const Cluster>, nLayers> mHostClusters;
std::array<gsl::span<const int>, nLayers> mHostIndexTables;

/// Device
std::array<Cluster*, nLayers> mClustersDevice;
std::array<int*, nLayers> mClusterExternalIndicesDevice;
std::array<int*, nLayers> mIndexTablesDevice;
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
std::array<int*, nLayers - 1> mTrackletsLookupTablesDevice;
std::array<CellSeed*, nLayers - 2> mCellsDevice;
// Road<nLayers - 2>* mRoadsDevice;
std::array<int*, nLayers - 2> mCellsLookupTablesDevice;
std::array<int*, nLayers - 3> mNeighboursCellDevice;
std::array<int*, nLayers - 3> mNeighboursCellLookupTablesDevice;
std::array<int*, nLayers - 2> mRoadsLookupTablesDevice;

// These are to make them accessible using layer index
CellSeed** mCellsDeviceArray;
int** mNeighboursCellDeviceArray;
int** mNeighboursCellLookupTablesDeviceArray;

// Small accessory buffers
int* mCUBTmpBufferDevice;
int* mFoundTrackletsDevice;
int* mNFoundCellsDevice;

/// Vertexer only
Line* mLinesDevice;
int* mNFoundLinesDevice;
int* mNExclusiveFoundLinesDevice;
unsigned char* mUsedTrackletsDevice;
std::array<int*, 2> mNTrackletsPerClusterDevice;
int* mClusteredLinesDevice;

/// State and configuration
bool mAllocated = false;
size_t mNRof = 0;
size_t mNPopulatedRof = 0;
o2::its::TimeFrame* mTimeFramePtr = nullptr;
TimeFrameGPUParameters* mTFGPUParams = nullptr;
};

template <int nLayers = 7>
class TimeFrameGPU : public TimeFrame
Expand All @@ -191,13 +56,19 @@ class TimeFrameGPU : public TimeFrame
void loadClustersDevice();
void loadTrackletsDevice();
void loadCellsDevice();
void loadCellsLUT();
void loadTrackSeedsDevice();
void loadTrackSeedsChi2Device();
void loadRoadsDevice();
void loadTrackSeedsDevice(std::vector<CellSeed>&);
void createCellNeighboursDevice(const unsigned int& layer, std::vector<std::pair<int, int>>& neighbours);
void createNeighboursDevice(const unsigned int& layer, std::vector<std::pair<int, int>>& neighbours);
void createNeighboursLUTDevice(const int, const unsigned int);
void createTrackITSExtDevice(std::vector<CellSeed>&);
void downloadTrackITSExtDevice(std::vector<CellSeed>&);
void downloadCellsNeighbours(std::vector<std::vector<std::pair<int, int>>>&, const int);
void downloadNeighboursLUT(std::vector<int>&, const int);
void downloadCellsDevice(const int);
void unregisterRest();
void initDeviceChunks(const int, const int);
template <Task task>
size_t loadChunkData(const size_t, const size_t, const size_t);
Expand All @@ -224,17 +95,22 @@ class TimeFrameGPU : public TimeFrame
// Hybrid
Road<nLayers - 2>* getDeviceRoads() { return mRoadsDevice; }
TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
gpuPair<int, int>* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; }
TrackingFrameInfo* getDeviceTrackingFrameInfo(const int);
// TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() { return mTrackingFrameInfoDeviceArray; }
const TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() const { return mTrackingFrameInfoDeviceArray; }
Cluster** getDeviceArrayClusters() const { return mClustersDeviceArray; }
Cluster** getDeviceArrayUnsortedClusters() const { return mUnsortedClustersDeviceArray; }
Tracklet** getDeviceArrayTracklets() const { return mTrackletsDeviceArray; }
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
CellSeed* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }

void setDevicePropagator(const o2::base::PropagatorImpl<float>*) override;

// Host-specific getters
Expand Down Expand Up @@ -263,7 +139,13 @@ class TimeFrameGPU : public TimeFrame
Cluster** mUnsortedClustersDeviceArray;
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
Tracklet** mTrackletsDeviceArray;
std::array<int*, nLayers - 2> mCellsLUTDevice;
std::array<int*, nLayers - 3> mNeighboursLUTDevice;
int** mCellsLUTDeviceArray;
int** mNeighboursCellDeviceArray;
int** mNeighboursCellLUTDeviceArray;
std::array<CellSeed*, nLayers - 2> mCellsDevice;
std::array<int*, nLayers - 2> mNeighboursIndexTablesDevice;
CellSeed* mTrackSeedsDevice;
CellSeed** mCellsDeviceArray;
std::array<o2::track::TrackParCovF*, nLayers - 2> mCellSeedsDevice;
Expand Down
Loading

0 comments on commit 03fd79a

Please sign in to comment.