Skip to content

Commit

Permalink
GPU TPC: Decoding: Add option to apply timebin cut to CTF cluster dec…
Browse files Browse the repository at this point in the history
…oding on GPUs (#13753)

* GPU: TPC Decoding: add optional timebin cut to CTF cluster decoding

* GPU: TPC Decoding: add missing checks on track model parameters
  • Loading branch information
cima22 authored Dec 1, 2024
1 parent 167b8c0 commit dc760aa
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 17 deletions.
21 changes: 21 additions & 0 deletions GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,34 @@ void* GPUTPCDecompression::SetPointersTmpNativeBuffersInput(void* mem)
return mem;
}

void* GPUTPCDecompression::SetPointersTmpClusterNativeAccessForFiltering(void* mem)
{
computePointerWithAlignment(mem, mNativeClustersBuffer, mNClusterNativeBeforeFiltering);
return mem;
}

void* GPUTPCDecompression::SetPointersInputClusterNativeAccess(void* mem)
{
computePointerWithAlignment(mem, mClusterNativeAccess);
return mem;
}

void* GPUTPCDecompression::SetPointersNClusterPerSectorRow(void* mem)
{
computePointerWithAlignment(mem, mNClusterPerSectorRow, NSLICES * GPUCA_ROW_COUNT);
return mem;
}

void GPUTPCDecompression::RegisterMemoryAllocation()
{
AllocateAndInitializeLate();
mMemoryResInputGPU = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersInputGPU, GPUMemoryResource::MEMORY_INPUT_FLAG | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_EXTERNAL | GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionInput");
mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersGPU, GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionTmpBuffersGPU");
mResourceTmpIndexes = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersOutput, GPUMemoryResource::MEMORY_OUTPUT | GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionTmpBuffersOutput");
mResourceTmpClustersOffsets = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersInput, GPUMemoryResource::MEMORY_INPUT | GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionTmpBuffersInput");
mResourceTmpBufferBeforeFiltering = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpClusterNativeAccessForFiltering, GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionTmpBufferForFiltering");
mResourceClusterNativeAccess = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersInputClusterNativeAccess, GPUMemoryResource::MEMORY_INPUT | GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionTmpClusterAccessForFiltering");
mResourceNClusterPerSectorRow = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersNClusterPerSectorRow, GPUMemoryResource::MEMORY_OUTPUT | GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionTmpClusterCountForFiltering");
}

void GPUTPCDecompression::SetMaxData(const GPUTrackingInOutPointers& io)
Expand Down
9 changes: 9 additions & 0 deletions GPU/GPUTracking/DataCompression/GPUTPCDecompression.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ class GPUTPCDecompression : public GPUProcessor
void* SetPointersTmpNativeBuffersGPU(void* mem);
void* SetPointersTmpNativeBuffersOutput(void* mem);
void* SetPointersTmpNativeBuffersInput(void* mem);
void* SetPointersTmpClusterNativeAccessForFiltering(void* mem);
void* SetPointersInputClusterNativeAccess(void* mem);
void* SetPointersNClusterPerSectorRow(void* mem);

#endif

Expand All @@ -63,18 +66,24 @@ class GPUTPCDecompression : public GPUProcessor
o2::tpc::CompressedClusters mInputGPU;

uint32_t mMaxNativeClustersPerBuffer;
uint32_t mNClusterNativeBeforeFiltering;
uint32_t* mNativeClustersIndex;
uint32_t* mUnattachedClustersOffsets;
uint32_t* mAttachedClustersOffsets;
uint32_t* mNClusterPerSectorRow;
o2::tpc::ClusterNative* mTmpNativeClusters;
o2::tpc::ClusterNative* mNativeClustersBuffer;
o2::tpc::ClusterNativeAccess* mClusterNativeAccess;

template <class T>
void SetPointersCompressedClusters(void*& mem, T& c, uint32_t nClA, uint32_t nTr, uint32_t nClU, bool reducedClA);

int16_t mMemoryResInputGPU = -1;
int16_t mResourceTmpIndexes = -1;
int16_t mResourceTmpClustersOffsets = -1;
int16_t mResourceTmpBufferBeforeFiltering = -1;
int16_t mResourceClusterNativeAccess = -1;
int16_t mResourceNClusterPerSectorRow = -1;
};
} // namespace GPUCA_NAMESPACE::gpu
#endif // GPUTPCDECOMPRESSION_H
47 changes: 46 additions & 1 deletion GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::ste
GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;
CompressedClusters& GPUrestrict() cmprClusters = decompressor.mInputGPU;
ClusterNative* GPUrestrict() clusterBuffer = decompressor.mNativeClustersBuffer;
const ClusterNativeAccess* outputAccess = processors.ioPtrs.clustersNative;
const ClusterNativeAccess* outputAccess = decompressor.mClusterNativeAccess;
uint32_t* offsets = decompressor.mUnattachedClustersOffsets;
for (int32_t i = get_global_id(0); i < GPUCA_ROW_COUNT * nSlices; i += get_global_size(0)) {
uint32_t iRow = i % GPUCA_ROW_COUNT;
Expand Down Expand Up @@ -81,6 +81,51 @@ GPUdi() void GPUTPCDecompressionKernels::decompressorMemcpyBasic(T* GPUrestrict(
}
}

GPUdi() bool GPUTPCDecompressionUtilKernels::isClusterKept(const o2::tpc::ClusterNative& cl, const GPUParam& GPUrestrict() param)
{
return param.tpcCutTimeBin > 0 ? cl.getTime() < param.tpcCutTimeBin : true;
}

template <>
GPUdii() void GPUTPCDecompressionUtilKernels::Thread<GPUTPCDecompressionUtilKernels::countFilteredClusters>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
{
const GPUParam& GPUrestrict() param = processors.param;
GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;
const ClusterNativeAccess* clusterAccess = decompressor.mClusterNativeAccess;
for (uint32_t i = get_global_id(0); i < GPUCA_NSLICES * GPUCA_ROW_COUNT; i += get_global_size(0)) {
uint32_t slice = i / GPUCA_ROW_COUNT;
uint32_t row = i % GPUCA_ROW_COUNT;
for (uint32_t k = 0; k < clusterAccess->nClusters[slice][row]; k++) {
ClusterNative cl = clusterAccess->clusters[slice][row][k];
if (isClusterKept(cl, param)) {
decompressor.mNClusterPerSectorRow[i]++;
}
}
}
}

template <>
GPUdii() void GPUTPCDecompressionUtilKernels::Thread<GPUTPCDecompressionUtilKernels::storeFilteredClusters>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
{
const GPUParam& GPUrestrict() param = processors.param;
GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;
ClusterNative* GPUrestrict() clusterBuffer = decompressor.mNativeClustersBuffer;
const ClusterNativeAccess* clusterAccess = decompressor.mClusterNativeAccess;
const ClusterNativeAccess* outputAccess = processors.ioPtrs.clustersNative;
for (uint32_t i = get_global_id(0); i < GPUCA_NSLICES * GPUCA_ROW_COUNT; i += get_global_size(0)) {
uint32_t slice = i / GPUCA_ROW_COUNT;
uint32_t row = i % GPUCA_ROW_COUNT;
uint32_t count = 0;
for (uint32_t k = 0; k < clusterAccess->nClusters[slice][row]; k++) {
const ClusterNative cl = clusterAccess->clusters[slice][row][k];
if (isClusterKept(cl, param)) {
clusterBuffer[outputAccess->clusterOffset[slice][row] + count] = cl;
count++;
}
}
}
}

template <>
GPUdii() void GPUTPCDecompressionUtilKernels::Thread<GPUTPCDecompressionUtilKernels::sortPerSectorRow>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
{
Expand Down
6 changes: 5 additions & 1 deletion GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,15 @@ class GPUTPCDecompressionUtilKernels : public GPUKernelTemplate
{
public:
enum K : int32_t {
sortPerSectorRow = 0,
countFilteredClusters = 0,
storeFilteredClusters = 1,
sortPerSectorRow = 2,
};

template <int32_t iKernel = defaultKernel>
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);

GPUdi() static bool isClusterKept(const o2::tpc::ClusterNative& cl, const GPUParam& GPUrestrict() param);
};

} // namespace GPUCA_NAMESPACE::gpu
Expand Down
6 changes: 6 additions & 0 deletions GPU/GPUTracking/Definitions/GPUDefGPUParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,12 @@
#endif
#ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow
#define GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow 256
#endif
#ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters
#define GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters 256
#endif
#ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters
#define GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters 256
#endif
#ifndef GPUCA_LB_GPUTPCCFDecodeZS
#define GPUCA_LB_GPUTPCCFDecodeZS 128, 4
Expand Down
89 changes: 74 additions & 15 deletions GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -246,13 +246,21 @@ int32_t GPUChainTracking::RunTPCDecompression()
mRec->PushNonPersistentMemory(qStr2Tag("TPCDCMPR"));
RecoStep myStep = RecoStep::TPCDecompression;
bool doGPU = GetRecoStepsGPU() & RecoStep::TPCDecompression;
bool runFiltering = param().tpcCutTimeBin > 0;
GPUTPCDecompression& Decompressor = processors()->tpcDecompressor;
GPUTPCDecompression& DecompressorShadow = doGPU ? processorsShadow()->tpcDecompressor : Decompressor;
const auto& threadContext = GetThreadContext();
CompressedClusters cmprClsHost = *mIOPtrs.tpcCompressedClusters;
CompressedClusters& inputGPU = Decompressor.mInputGPU;
CompressedClusters& inputGPUShadow = DecompressorShadow.mInputGPU;

if (cmprClsHost.nTracks && cmprClsHost.solenoidBz != -1e6f && cmprClsHost.solenoidBz != param().bzkG) {
throw std::runtime_error("Configured solenoid Bz does not match value used for track model encoding");
}
if (cmprClsHost.nTracks && cmprClsHost.maxTimeBin != -1e6 && cmprClsHost.maxTimeBin != param().continuousMaxTimeBin) {
throw std::runtime_error("Configured max time bin does not match value used for track model encoding");
}

int32_t inputStream = 0;
int32_t unattachedStream = mRec->NStreams() - 1;
inputGPU = cmprClsHost;
Expand Down Expand Up @@ -300,12 +308,6 @@ int32_t GPUChainTracking::RunTPCDecompression()
GPUMemCpy(myStep, inputGPUShadow.sigmaPadU, cmprClsHost.sigmaPadU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.sigmaPadU[0]), unattachedStream, toGPU);
GPUMemCpy(myStep, inputGPUShadow.sigmaTimeU, cmprClsHost.sigmaTimeU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.sigmaTimeU[0]), unattachedStream, toGPU);

mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = cmprClsHost.nAttachedClusters + cmprClsHost.nUnattachedClusters;
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeOutput, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
DecompressorShadow.mNativeClustersBuffer = mInputsShadow->mPclusterNativeBuffer;
Decompressor.mNativeClustersBuffer = mInputsHost->mPclusterNativeOutput;
WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), inputStream);
TransferMemoryResourceLinkToHost(RecoStep::TPCDecompression, Decompressor.mResourceTmpIndexes, inputStream, nullptr, mEvents->stream, nStreams);
SynchronizeStream(inputStream);
uint32_t offset = 0;
Expand All @@ -324,27 +326,83 @@ int32_t GPUChainTracking::RunTPCDecompression()
if (decodedAttachedClusters != cmprClsHost.nAttachedClusters) {
GPUWarning("%u / %u clusters failed track model decoding (%f %%)", cmprClsHost.nAttachedClusters - decodedAttachedClusters, cmprClsHost.nAttachedClusters, 100.f * (float)(cmprClsHost.nAttachedClusters - decodedAttachedClusters) / (float)cmprClsHost.nAttachedClusters);
}
if (doGPU) {
mClusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;
if (runFiltering) { // If filtering, allocate a temporary buffer and cluster native access in decompressor context
Decompressor.mNClusterNativeBeforeFiltering = DecompressorShadow.mNClusterNativeBeforeFiltering = decodedAttachedClusters + cmprClsHost.nUnattachedClusters;
AllocateRegisteredMemory(Decompressor.mResourceTmpBufferBeforeFiltering);
AllocateRegisteredMemory(Decompressor.mResourceClusterNativeAccess);
mClusterNativeAccess->clustersLinear = DecompressorShadow.mNativeClustersBuffer;
mClusterNativeAccess->setOffsetPtrs();
*Decompressor.mClusterNativeAccess = *mClusterNativeAccess;
WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), inputStream);
TransferMemoryResourceLinkToGPU(RecoStep::TPCDecompression, Decompressor.mResourceClusterNativeAccess, inputStream, &mEvents->single);
} else { // If not filtering, directly allocate the final buffers
mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = cmprClsHost.nAttachedClusters + cmprClsHost.nUnattachedClusters;
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeOutput, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
DecompressorShadow.mNativeClustersBuffer = mInputsShadow->mPclusterNativeBuffer;
Decompressor.mNativeClustersBuffer = mInputsHost->mPclusterNativeOutput;
DecompressorShadow.mClusterNativeAccess = mInputsShadow->mPclusterNativeAccess;
Decompressor.mClusterNativeAccess = mInputsHost->mPclusterNativeAccess;
WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), inputStream);
if (doGPU) {
mClusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;
mClusterNativeAccess->setOffsetPtrs();
*mInputsHost->mPclusterNativeAccess = *mClusterNativeAccess;
processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;
WriteToConstantMemory(RecoStep::TPCDecompression, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), inputStream);
TransferMemoryResourceLinkToGPU(RecoStep::TPCDecompression, mInputsHost->mResourceClusterNativeAccess, inputStream, &mEvents->single);
}
mIOPtrs.clustersNative = mClusterNativeAccess.get();
mClusterNativeAccess->clustersLinear = mInputsHost->mPclusterNativeOutput;
mClusterNativeAccess->setOffsetPtrs();
*mInputsHost->mPclusterNativeAccess = *mClusterNativeAccess;
processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;
WriteToConstantMemory(RecoStep::TPCDecompression, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), inputStream);
TransferMemoryResourceLinkToGPU(RecoStep::TPCDecompression, mInputsHost->mResourceClusterNativeAccess, inputStream, &mEvents->single);
}
mIOPtrs.clustersNative = mClusterNativeAccess.get();
mClusterNativeAccess->clustersLinear = mInputsHost->mPclusterNativeOutput;
mClusterNativeAccess->setOffsetPtrs();

uint32_t batchSize = doGPU ? 6 : NSLICES;
for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice = iSlice + batchSize) {
int32_t iStream = (iSlice / batchSize) % mRec->NStreams();
runKernel<GPUTPCDecompressionKernels, GPUTPCDecompressionKernels::step1unattached>({GetGridAuto(iStream), krnlRunRangeNone, {nullptr, &mEvents->single}}, iSlice, batchSize);
uint32_t copySize = std::accumulate(mClusterNativeAccess->nClustersSector + iSlice, mClusterNativeAccess->nClustersSector + iSlice + batchSize, 0u);
GPUMemCpy(RecoStep::TPCDecompression, mInputsHost->mPclusterNativeOutput + mClusterNativeAccess->clusterOffset[iSlice][0], DecompressorShadow.mNativeClustersBuffer + mClusterNativeAccess->clusterOffset[iSlice][0], sizeof(Decompressor.mNativeClustersBuffer[0]) * copySize, iStream, false);
if (!runFiltering) {
GPUMemCpy(RecoStep::TPCDecompression, mInputsHost->mPclusterNativeOutput + mClusterNativeAccess->clusterOffset[iSlice][0], DecompressorShadow.mNativeClustersBuffer + mClusterNativeAccess->clusterOffset[iSlice][0], sizeof(Decompressor.mNativeClustersBuffer[0]) * copySize, iStream, false);
}
}
SynchronizeGPU();

if (runFiltering) { // If filtering is applied, count how many clusters will remain after filtering and allocate final buffers accordingly
AllocateRegisteredMemory(Decompressor.mResourceNClusterPerSectorRow);
WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), unattachedStream);
runKernel<GPUMemClean16>({GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression), krnlRunRangeNone}, DecompressorShadow.mNClusterPerSectorRow, NSLICES * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNClusterPerSectorRow[0]));
runKernel<GPUTPCDecompressionUtilKernels, GPUTPCDecompressionUtilKernels::countFilteredClusters>(GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression));
TransferMemoryResourceLinkToHost(RecoStep::TPCDecompression, Decompressor.mResourceNClusterPerSectorRow, unattachedStream);
SynchronizeStream(unattachedStream);
uint32_t nClustersFinal = std::accumulate(Decompressor.mNClusterPerSectorRow, Decompressor.mNClusterPerSectorRow + inputGPU.nSliceRows, 0u);
mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = nClustersFinal;
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeOutput, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
DecompressorShadow.mNativeClustersBuffer = mInputsShadow->mPclusterNativeBuffer;
Decompressor.mNativeClustersBuffer = mInputsHost->mPclusterNativeOutput;
WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), unattachedStream);
for (uint32_t i = 0; i < NSLICES; i++) {
for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
mClusterNativeAccess->nClusters[i][j] = Decompressor.mNClusterPerSectorRow[i * GPUCA_ROW_COUNT + j];
}
}
if (doGPU) {
mClusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;
mClusterNativeAccess->setOffsetPtrs();
*mInputsHost->mPclusterNativeAccess = *mClusterNativeAccess;
processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;
WriteToConstantMemory(RecoStep::TPCDecompression, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), unattachedStream);
TransferMemoryResourceLinkToGPU(RecoStep::TPCDecompression, mInputsHost->mResourceClusterNativeAccess, unattachedStream);
}
mIOPtrs.clustersNative = mClusterNativeAccess.get();
mClusterNativeAccess->clustersLinear = mInputsHost->mPclusterNativeOutput;
mClusterNativeAccess->setOffsetPtrs();
runKernel<GPUTPCDecompressionUtilKernels, GPUTPCDecompressionUtilKernels::storeFilteredClusters>(GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression));
GPUMemCpy(RecoStep::TPCDecompression, mInputsHost->mPclusterNativeOutput, DecompressorShadow.mNativeClustersBuffer, sizeof(Decompressor.mNativeClustersBuffer[0]) * nClustersFinal, unattachedStream, false);
SynchronizeStream(unattachedStream);
}
if (GetProcessingSettings().deterministicGPUReconstruction || GetProcessingSettings().debugLevel >= 4) {
runKernel<GPUTPCDecompressionUtilKernels, GPUTPCDecompressionUtilKernels::sortPerSectorRow>(GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression));
const ClusterNativeAccess* decoded = mIOPtrs.clustersNative;
Expand All @@ -357,6 +415,7 @@ int32_t GPUChainTracking::RunTPCDecompression()
}
}
}
SynchronizeStream(unattachedStream);
}
mRec->PopNonPersistentMemory(RecoStep::TPCDecompression, qStr2Tag("TPCDCMPR"));
}
Expand Down
Loading

0 comments on commit dc760aa

Please sign in to comment.