Skip to content

Commit

Permalink
Fixed synchronization bug and refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
cima22 authored and Gabriele Cimador committed Feb 18, 2024
1 parent 45e295d commit f920549
Show file tree
Hide file tree
Showing 8 changed files with 19 additions and 160 deletions.
144 changes: 0 additions & 144 deletions CMakeLists.txt.orig

This file was deleted.

5 changes: 2 additions & 3 deletions GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,5 @@ void GPUTPCDecompression::RegisterMemoryAllocation()

void GPUTPCDecompression::SetMaxData(const GPUTrackingInOutPointers& io)
{
// mMaxNativeClustersPerBuffer = 81760;
mMaxNativeClustersPerBuffer = 12000;
}
mMaxNativeClustersPerBuffer = mRec->GetProcessingSettings().tpcMaxAttachedClustersPerSectorRow;
}
11 changes: 6 additions & 5 deletions GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,7 @@ GPUdii() void GPUTPCDecompressionKernels::decompressTrack(CompressedClusters& cm
time = cmprClusters.timeA[trackIndex];
pad = cmprClusters.padA[trackIndex];
}
bool stored;
const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, decompressor, stored);
const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, decompressor);
float y = param.tpcGeometry.LinearPad2Y(slice, row, cluster.getPad());
float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime());
if (clusterIndex == 0) {
Expand All @@ -118,14 +117,16 @@ GPUdii() void GPUTPCDecompressionKernels::decompressTrack(CompressedClusters& cm
clusterOffset += cmprClusters.nTrackClusters[trackIndex] - clusterIndex;
}

GPUdii() ClusterNative GPUTPCDecompressionKernels::decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const unsigned int clusterOffset, unsigned int slice, unsigned int row, unsigned int pad, unsigned int time, GPUTPCDecompression& decompressor, bool& stored)
GPUdii() ClusterNative GPUTPCDecompressionKernels::decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const unsigned int clusterOffset, unsigned int slice, unsigned int row, unsigned int pad, unsigned int time, GPUTPCDecompression& decompressor)
{
unsigned int tmpBufferIndex = computeLinearTmpBufferIndex(slice, row, decompressor.mMaxNativeClustersPerBuffer);
unsigned int currentClusterIndex = CAMath::AtomicAdd(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u);
const ClusterNative c(time, cmprClusters.flagsA[clusterOffset], pad, cmprClusters.sigmaTimeA[clusterOffset], cmprClusters.sigmaPadA[clusterOffset], cmprClusters.qMaxA[clusterOffset], cmprClusters.qTotA[clusterOffset]);
stored = currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer;
if (stored) {
if (currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer) {
decompressor.mTmpNativeClusters[tmpBufferIndex + currentClusterIndex] = c;
} else {
decompressor.raiseError(GPUErrors::ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, slice * 1000 + row, currentClusterIndex, decompressor.mMaxNativeClustersPerBuffer);
CAMath::AtomicExch(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), decompressor.mMaxNativeClustersPerBuffer);
}
return c;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class GPUTPCDecompressionKernels : public GPUKernelTemplate
template <int iKernel = defaultKernel>
GPUd() static void Thread(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
GPUd() static void decompressTrack(o2::tpc::CompressedClusters& cmprClusters, const GPUParam& param, const unsigned int maxTime, const unsigned int trackIndex, unsigned int& clusterOffset, GPUTPCDecompression& decompressor);
GPUdi() static o2::tpc::ClusterNative decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const unsigned int clusterOffset, unsigned int slice, unsigned int row, unsigned int pad, unsigned int time, GPUTPCDecompression& decompressor, bool& stored);
GPUdi() static o2::tpc::ClusterNative decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const unsigned int clusterOffset, unsigned int slice, unsigned int row, unsigned int pad, unsigned int time, GPUTPCDecompression& decompressor);
GPUdi() static void decompressHits(const o2::tpc::CompressedClusters& cmprClusters, const unsigned int start, const unsigned int end, o2::tpc::ClusterNative* clusterNativeBuffer);

GPUd() static unsigned int computeLinearTmpBufferIndex(unsigned int slice, unsigned int row, unsigned int maxClustersPerBuffer)
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Definitions/GPUSettingsList.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ AddOption(throttleAlarms, bool, false, "", 0, "Throttle rate at which alarms are
AddOption(outputSanityCheck, bool, false, "", 0, "Run some simple sanity checks finding errors in the output")
AddOption(tpcSingleSector, int, -1, "", 0, "Restrict TPC processing to a single sector")
AddOption(tpcDownscaledEdx, unsigned char, 0, "", 0, "If != 0, downscale dEdx processing (if enabled) to x %")
AddOption(tpcMaxAttachedClustersPerSectorRow, unsigned int, 20000, "", 0, "Maximum number of TPC attached clusters which can be decoded per SectorRow")
AddVariable(eventDisplay, GPUCA_NAMESPACE::gpu::GPUDisplayFrontendInterface*, nullptr)
AddSubConfig(GPUSettingsProcessingRTC, rtc)
AddSubConfig(GPUSettingsProcessingParam, param)
Expand Down
4 changes: 3 additions & 1 deletion GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,9 @@ int GPUChainTracking::RunTPCDecompression()
processors()->ioPtrs.clustersNative = mInputsHost->mPclusterNativeAccess;

runKernel<GPUTPCDecompressionKernels, GPUTPCDecompressionKernels::step1unattached>(GetGridAutoStep(inputStream, RecoStep::TPCDecompression), krnlRunRangeNone, krnlEventNone);
GPUMemCpy(RecoStep::TPCDecompression, mInputsHost->mPclusterNativeOutput, mInputsShadow->mPclusterNativeBuffer, sizeof(mInputsShadow->mPclusterNativeBuffer[0]) * mIOPtrs.clustersNative->nClustersTotal, inputStream, false);
SynchronizeStream(inputStream);
GPUMemCpy(RecoStep::TPCDecompression, (void*)mInputsHost->mPclusterNativeOutput, (void*)mInputsShadow->mPclusterNativeBuffer, sizeof(mInputsShadow->mPclusterNativeBuffer[0]) * mIOPtrs.clustersNative->nClustersTotal, inputStream, false);

mIOPtrs.clustersNative = mClusterNativeAccess.get();
#endif
return 0;
Expand Down
11 changes: 6 additions & 5 deletions GPU/GPUTracking/Global/GPUErrorCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,12 @@ GPUCA_ERROR_CODE(20, ERROR_CF_PEAK_OVERFLOW, Sector, Value, Max)
GPUCA_ERROR_CODE(21, ERROR_CF_CLUSTER_OVERFLOW, Sector, Value, Max)
GPUCA_ERROR_CODE(22, ERROR_CF_ROW_CLUSTER_OVERFLOW, SectorRow, Value, Max)
GPUCA_ERROR_CODE(23, ERROR_CF_GLOBAL_CLUSTER_OVERFLOW, SectorRow, Value, Max)
GPUCA_ERROR_CODE(24, MAX_OVERFLOW_ERROR_NUMBER) // Overflow errors are detected as errno <= MAX_OVERFLOW_ERROR_NUMBER
GPUCA_ERROR_CODE(24, ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, SectorRow, Value, Max)
GPUCA_ERROR_CODE(25, MAX_OVERFLOW_ERROR_NUMBER) // Overflow errors are detected as errno <= MAX_OVERFLOW_ERROR_NUMBER

GPUCA_ERROR_CODE(25, ERROR_TPCZS_INVALID_ROW, SectorRow) // Data from invalid row is skipped
GPUCA_ERROR_CODE(26, ERROR_TPCZS_INVALID_NADC, SectorCRU, SamplesInPage, SamplesWritten) // Invalid number of ADC samples in header, existing samples were decoded
GPUCA_ERROR_CODE(27, ERROR_TPCZS_INCOMPLETE_HBF, SectorCRU, PacketCount, NextPacketCount) // Part of HBF is missing, decoding incomplete
GPUCA_ERROR_CODE(28, ERROR_TPCZS_INVALID_OFFSET, SectorEndpoint, Value, Expected) // Raw page is skipped since it contains invalid payload offset
GPUCA_ERROR_CODE(26, ERROR_TPCZS_INVALID_ROW, SectorRow) // Data from invalid row is skipped
GPUCA_ERROR_CODE(27, ERROR_TPCZS_INVALID_NADC, SectorCRU, SamplesInPage, SamplesWritten) // Invalid number of ADC samples in header, existing samples were decoded
GPUCA_ERROR_CODE(28, ERROR_TPCZS_INCOMPLETE_HBF, SectorCRU, PacketCount, NextPacketCount) // Part of HBF is missing, decoding incomplete
GPUCA_ERROR_CODE(29, ERROR_TPCZS_INVALID_OFFSET, SectorEndpoint, Value, Expected) // Raw page is skipped since it contains invalid payload offset

// #define GPUCA_CHECK_TPCZS_CORRUPTION
1 change: 0 additions & 1 deletion GPU/GPUTracking/Standalone/Benchmark/standalone.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,6 @@ int main(int argc, char** argv)
printf("Error unregistering memory\n");
}
}
// exit(0);
rec->Exit();

if (!configStandalone.noprompt) {
Expand Down

0 comments on commit f920549

Please sign in to comment.