From c49f0748845ea24f436542373eac00c8bd3cff8e Mon Sep 17 00:00:00 2001
From: cima22 <gabriele.cimador@cern.ch>
Date: Thu, 25 Jan 2024 10:07:21 +0100
Subject: [PATCH] Updated code for new kernel registration and fixed clang
 formats

---
 .../include/DataFormatsTPC/ClusterNative.h    |  13 ++-
 GPU/CMakeLists.txt                            |   2 +
 .../Base/GPUReconstructionIncludesDevice.h    |   2 +-
 GPU/GPUTracking/CMakeLists.txt                |   2 +-
 .../DataCompression/GPUTPCDecompression.cxx   |  46 +++-----
 .../DataCompression/GPUTPCDecompression.h     |  10 --
 .../GPUTPCDecompressionKernels.cxx            |  69 +++++++-----
 .../GPUTPCDecompressionKernels.h              |   7 +-
 GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h  |   3 +
 .../Global/GPUChainTrackingCompression.cxx    |  83 ++++++++++----
 .../Standalone/Benchmark/standalone.cxx       |  10 +-
 GPU/GPUTracking/kernels.cmake                 |   2 +
 .../.cmake/api/v1/query/cache-v2              |   0
 .../.cmake/api/v1/query/cmakeFiles-v1         |   0
 .../.cmake/api/v1/query/codemodel-v2          |   0
 .../.cmake/api/v1/query/toolchains-v1         |   0
 cmake-build-debug/DartConfiguration.tcl       | 106 ------------------
 17 files changed, 149 insertions(+), 206 deletions(-)
 delete mode 100644 cmake-build-debug/.cmake/api/v1/query/cache-v2
 delete mode 100644 cmake-build-debug/.cmake/api/v1/query/cmakeFiles-v1
 delete mode 100644 cmake-build-debug/.cmake/api/v1/query/codemodel-v2
 delete mode 100644 cmake-build-debug/.cmake/api/v1/query/toolchains-v1
 delete mode 100644 cmake-build-debug/DartConfiguration.tcl
diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
index 49d61007092a8..a996f59f51f9e 100644
--- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
+++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
@@ -31,7 +31,7 @@ template <class T>
 class ConstMCTruthContainer;
 template <class T>
 class ConstMCTruthContainerView;
-}
+} // namespace dataformats
 } // namespace o2
 
 namespace o2
@@ -156,6 +156,17 @@ struct ClusterNative {
       return (this->getFlags() < rhs.getFlags());
     }
   }
+
+  GPUd() bool operator==(const ClusterNative& rhs) const
+  {
+    return this->getTimePacked() == rhs.getTimePacked() &&
+           this->padPacked == rhs.padPacked &&
+           this->sigmaTimePacked == rhs.sigmaTimePacked &&
+           this->sigmaPadPacked == rhs.sigmaPadPacked &&
+           this->qMax == rhs.qMax &&
+           this->qTot == rhs.qTot &&
+           this->getFlags() == rhs.getFlags();
+  }
 };
 
 // This is an index struct to access TPC clusters inside sectors and rows. It shall not own the data, but just point to
diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt
index 6e08e9582761a..0af7d998d2f0c 100644
--- a/GPU/CMakeLists.txt
+++ b/GPU/CMakeLists.txt
@@ -18,6 +18,8 @@
 # HDRS_CINT_O2: Headers for ROOT dictionary (only for O2) HDRS_INSTALL: Headers
 # for installation only
 
+#set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE}} -fno-omit-frame-pointer") # to uncomment if needed, tired of typing this...
+
 if(NOT DEFINED GPUCA_NO_FAST_MATH)
   set(GPUCA_NO_FAST_MATH 0)
 endif()
diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludesDevice.h b/GPU/GPUTracking/Base/GPUReconstructionIncludesDevice.h
index 81fde1dbfb996..58e32253336de 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionIncludesDevice.h
+++ b/GPU/GPUTracking/Base/GPUReconstructionIncludesDevice.h
@@ -75,7 +75,7 @@ using namespace GPUCA_NAMESPACE::gpu;
 #include "GPUTPCCompressionKernels.cxx"
 #include "GPUTPCCompressionTrackModel.cxx"
 
-//Files for TPC Decompression
+// Files for TPC Decompression
 #include "GPUTPCDecompressionKernels.cxx"
 
 // Files for TPC Cluster Finder
diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt
index 18a187bf5f64d..200ac832a433e 100644
--- a/GPU/GPUTracking/CMakeLists.txt
+++ b/GPU/GPUTracking/CMakeLists.txt
@@ -11,7 +11,7 @@
 
 set(MODULE GPUTracking)
 
-set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE}} -fno-omit-frame-pointer") # to uncomment if needed, tired of typing this...
+# set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE}} -fno-omit-frame-pointer") # to uncomment if needed, tired of typing this...
 
 include(cmake/helpers.cmake)
 
diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx
index 922bfc120d136..83420fa37feb1 100644
--- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx
+++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx
@@ -65,45 +65,35 @@ void GPUTPCDecompression::SetPointersCompressedClusters(void*& mem, T& c, unsign
   computePointerWithAlignment(mem, c.nTrackClusters, nTr);
 }
 
-void* GPUTPCDecompression::SetPointersTmpNativeBuffersGPU(void* mem){
-  computePointerWithAlignment(mem,mTmpNativeClusters,NSLICES * GPUCA_ROW_COUNT * mMaxNativeClustersPerBuffer);
-  //computePointerWithAlignment(mem,mClusterNativeAccess);
+void* GPUTPCDecompression::SetPointersTmpNativeBuffersGPU(void* mem)
+{
+  computePointerWithAlignment(mem, mTmpNativeClusters, NSLICES * GPUCA_ROW_COUNT * mMaxNativeClustersPerBuffer);
   return mem;
 }
 
-void* GPUTPCDecompression::SetPointersTmpNativeBuffersOutput(void* mem){
-  computePointerWithAlignment(mem,mNativeClustersIndex,NSLICES * GPUCA_ROW_COUNT);
+void* GPUTPCDecompression::SetPointersTmpNativeBuffersOutput(void* mem)
+{
+  computePointerWithAlignment(mem, mNativeClustersIndex, NSLICES * GPUCA_ROW_COUNT);
   return mem;
 }
 
-void* GPUTPCDecompression::SetPointersTmpNativeBuffersInput(void* mem){
-  computePointerWithAlignment(mem,mUnattachedClustersOffsets,NSLICES * GPUCA_ROW_COUNT);
+void* GPUTPCDecompression::SetPointersTmpNativeBuffersInput(void* mem)
+{
+  computePointerWithAlignment(mem, mUnattachedClustersOffsets, NSLICES * GPUCA_ROW_COUNT);
   return mem;
 }
 
-void GPUTPCDecompression::RegisterMemoryAllocation() {
+void GPUTPCDecompression::RegisterMemoryAllocation()
+{
   AllocateAndInitializeLate();
   mMemoryResInputGPU = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersInputGPU, GPUMemoryResource::MEMORY_INPUT_FLAG | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_CUSTOM, "TPCDecompressionInput");
-  mRec->RegisterMemoryAllocation(this,&GPUTPCDecompression::SetPointersTmpNativeBuffersGPU,GPUMemoryResource::MEMORY_GPU,"TPCDecompressionTmpBuffersGPU");
-  mResourceTmpIndexes = mRec->RegisterMemoryAllocation(this,&GPUTPCDecompression::SetPointersTmpNativeBuffersOutput,GPUMemoryResource::MEMORY_OUTPUT,"TPCDecompressionTmpBuffersOutput");
-  mResourceTmpClustersOffsets = mRec->RegisterMemoryAllocation(this,&GPUTPCDecompression::SetPointersTmpNativeBuffersInput,GPUMemoryResource::MEMORY_INPUT,"TPCDecompressionTmpBuffersInput");
+  mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersGPU, GPUMemoryResource::MEMORY_GPU, "TPCDecompressionTmpBuffersGPU");
+  mResourceTmpIndexes = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersOutput, GPUMemoryResource::MEMORY_OUTPUT, "TPCDecompressionTmpBuffersOutput");
+  mResourceTmpClustersOffsets = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersInput, GPUMemoryResource::MEMORY_INPUT, "TPCDecompressionTmpBuffersInput");
 }
 
-void GPUTPCDecompression::SetMaxData(const GPUTrackingInOutPointers& io){
-  //mMaxNativeClustersPerBuffer = 81760;
-  mMaxNativeClustersPerBuffer = 12000;
-}
-/*
-GPUTPCDecompression::ConcurrentClusterNativeBuffer::ConcurrentClusterNativeBuffer():
-mCmprClsBuffer{new o2::tpc::ClusterNative[mCapacity]},mIndex{0}
-{}
-
-void GPUTPCDecompression::ConcurrentClusterNativeBuffer::push_back(tpc::ClusterNative cluster)
+void GPUTPCDecompression::SetMaxData(const GPUTrackingInOutPointers& io)
 {
-  if(mIndex == mCapacity){
-    //reallocate?
-    return;
-  }
-  unsigned int current = CAMath::AtomicAdd(mIndex, 1u);
-  mTmpNativeClusters[current] = cluster;
-}*/
\ No newline at end of file
+  // mMaxNativeClustersPerBuffer = 81760;
+  mMaxNativeClustersPerBuffer = 12000;
+}
\ No newline at end of file
diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h
index e4b2cca65c007..ac63f698b8226 100644
--- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h
+++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h
@@ -66,15 +66,6 @@ class GPUTPCDecompression : public GPUProcessor
   o2::tpc::ClusterNative* mTmpNativeClusters;
   o2::tpc::ClusterNativeAccess* mClusterNativeAccess;
   o2::tpc::ClusterNative* mNativeClustersBuffer;
-/*  class ConcurrentClusterNativeBuffer{
-    size_t mIndex;
-    size_t mCapacity = 10;
-    o2::tpc::ClusterNative* mCmprClsBuffer;
-   public:
-    ConcurrentClusterNativeBuffer();
-    void push_back(ClusterNative cluster);
-  };*/
-//  ConcurrentClusterNativeBuffer* tmpBuffer;
 
   template <class T>
   void SetPointersCompressedClusters(void*& mem, T& c, unsigned int nClA, unsigned int nTr, unsigned int nClU, bool reducedClA);
@@ -82,7 +73,6 @@ class GPUTPCDecompression : public GPUProcessor
   short mMemoryResInputGPU = -1;
   short mResourceTmpIndexes = -1;
   short mResourceTmpClustersOffsets = -1;
-
 };
 } // namespace GPUCA_NAMESPACE::gpu
 #endif // GPUTPCDECOMPRESSION_H
diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx
index 033a38391dec7..ac4dc2765fd11 100644
--- a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx
+++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx
@@ -17,12 +17,14 @@
 #include "GPUConstantMem.h"
 #include "GPUTPCCompressionTrackModel.h"
 #include "GPUCommonAlgorithm.h"
+#include <string.h>
 
 using namespace GPUCA_NAMESPACE::gpu;
 using namespace o2::tpc;
 
 template <>
-GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::step0attached>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors){
+GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::step0attached>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
+{
   GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;
   CompressedClusters& GPUrestrict() cmprClusters = decompressor.mInputGPU;
   const GPUParam& GPUrestrict() param = processors.param;
@@ -39,46 +41,47 @@ GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::ste
   }
 }
 
-GPUdii() void GPUTPCDecompressionKernels::decompressTrack(CompressedClusters& cmprClusters, const GPUParam& param, const unsigned int maxTime, const unsigned int trackIndex, unsigned int& clusterOffset, GPUTPCDecompression& decompressor){
+GPUdii() void GPUTPCDecompressionKernels::decompressTrack(CompressedClusters& cmprClusters, const GPUParam& param, const unsigned int maxTime, const unsigned int trackIndex, unsigned int& clusterOffset, GPUTPCDecompression& decompressor)
+{
   float zOffset = 0;
   unsigned int slice = cmprClusters.sliceA[trackIndex];
   unsigned int row = cmprClusters.rowA[trackIndex];
   GPUTPCCompressionTrackModel track;
   unsigned int clusterIndex;
-  for(clusterIndex = 0; clusterIndex < cmprClusters.nTrackClusters[trackIndex]; clusterIndex++){
+  for (clusterIndex = 0; clusterIndex < cmprClusters.nTrackClusters[trackIndex]; clusterIndex++) {
     unsigned int pad = 0, time = 0;
-    if(clusterIndex != 0){
-      unsigned char tmpSlice = cmprClusters.sliceLegDiffA[clusterOffset - trackIndex -1];
+    if (clusterIndex != 0) {
+      unsigned char tmpSlice = cmprClusters.sliceLegDiffA[clusterOffset - trackIndex - 1];
       bool changeLeg = (tmpSlice >= GPUCA_NSLICES);
-      if(changeLeg){
+      if (changeLeg) {
         tmpSlice -= GPUCA_NSLICES;
       }
-      if(cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences){
+      if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
         slice += tmpSlice;
-        if(slice >= GPUCA_NSLICES){
+        if (slice >= GPUCA_NSLICES) {
           slice -= GPUCA_NSLICES;
         }
-        row += cmprClusters.rowDiffA[clusterOffset -trackIndex -1];
-        if(row >= GPUCA_ROW_COUNT){
+        row += cmprClusters.rowDiffA[clusterOffset - trackIndex - 1];
+        if (row >= GPUCA_ROW_COUNT) {
           row -= GPUCA_ROW_COUNT;
         }
       } else {
         slice = tmpSlice;
-        row = cmprClusters.rowDiffA[clusterOffset -trackIndex -1];
+        row = cmprClusters.rowDiffA[clusterOffset - trackIndex - 1];
       }
       if (changeLeg && track.Mirror()) {
         break;
       }
-      if (track.Propagate(param.tpcGeometry.Row2X(row),param.SliceParam[slice].Alpha)){
+      if (track.Propagate(param.tpcGeometry.Row2X(row), param.SliceParam[slice].Alpha)) {
         break;
       }
-      unsigned int timeTmp = cmprClusters.timeResA[clusterOffset -trackIndex -1];
+      unsigned int timeTmp = cmprClusters.timeResA[clusterOffset - trackIndex - 1];
       if (timeTmp & 800000) {
         timeTmp |= 0xFF000000;
       }
-      time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f,param.tpcGeometry.LinearZ2Time(slice,track.Z() + zOffset)));
+      time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(slice, track.Z() + zOffset)));
       float tmpPad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), param.tpcGeometry.LinearY2Pad(slice, row, track.Y())));
-      pad = cmprClusters.padResA[clusterOffset -trackIndex - 1] + ClusterNative::packPad(tmpPad);
+      pad = cmprClusters.padResA[clusterOffset - trackIndex - 1] + ClusterNative::packPad(tmpPad);
       time = time & 0xFFFFFF;
       pad = (unsigned short)pad;
       if (pad >= param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked) {
@@ -103,11 +106,11 @@ GPUdii() void GPUTPCDecompressionKernels::decompressTrack(CompressedClusters& cm
     const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, decompressor, stored);
     float y = param.tpcGeometry.LinearPad2Y(slice, row, cluster.getPad());
     float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime());
-    if(clusterIndex == 0){
+    if (clusterIndex == 0) {
       zOffset = z;
-      track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SliceParam[slice].Alpha, cmprClusters.qPtA[trackIndex],param);
+      track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SliceParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param);
     }
-    if(clusterIndex + 1 < cmprClusters.nTrackClusters[trackIndex] && track.Filter(y,z-zOffset,row)){
+    if (clusterIndex + 1 < cmprClusters.nTrackClusters[trackIndex] && track.Filter(y, z - zOffset, row)) {
       break;
     }
     clusterOffset++;
@@ -115,28 +118,32 @@ GPUdii() void GPUTPCDecompressionKernels::decompressTrack(CompressedClusters& cm
   clusterOffset += cmprClusters.nTrackClusters[trackIndex] - clusterIndex;
 }
 
-GPUdii() ClusterNative GPUTPCDecompressionKernels::decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const unsigned int clusterOffset, unsigned int slice, unsigned int row, unsigned int pad, unsigned int time, GPUTPCDecompression& decompressor, bool& stored){
-  unsigned int tmpBufferIndex = computeLinearTmpBufferIndex(slice,row,decompressor.mMaxNativeClustersPerBuffer);
-  unsigned int currentClusterIndex = CAMath::AtomicAdd(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row),1u);
+GPUdii() ClusterNative GPUTPCDecompressionKernels::decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const unsigned int clusterOffset, unsigned int slice, unsigned int row, unsigned int pad, unsigned int time, GPUTPCDecompression& decompressor, bool& stored)
+{
+  unsigned int tmpBufferIndex = computeLinearTmpBufferIndex(slice, row, decompressor.mMaxNativeClustersPerBuffer);
+  unsigned int currentClusterIndex = CAMath::AtomicAdd(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u);
   const ClusterNative c(time, cmprClusters.flagsA[clusterOffset], pad, cmprClusters.sigmaTimeA[clusterOffset], cmprClusters.sigmaPadA[clusterOffset], cmprClusters.qMaxA[clusterOffset], cmprClusters.qTotA[clusterOffset]);
   stored = currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer;
-  if(stored){
+  if (stored) {
     decompressor.mTmpNativeClusters[tmpBufferIndex + currentClusterIndex] = c;
   }
   return c;
 }
 
 template <>
-GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::step1unattached>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors){
+GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::step1unattached>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
+{
   GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;
   CompressedClusters& GPUrestrict() cmprClusters = decompressor.mInputGPU;
   ClusterNative* GPUrestrict() clusterBuffer = decompressor.mNativeClustersBuffer;
+  const ClusterNativeAccess* outputAccess = processors.ioPtrs.clustersNative;
+
   unsigned int* offsets = decompressor.mUnattachedClustersOffsets;
-  for (unsigned int i = get_global_id(0); i < GPUCA_NSLICES * GPUCA_ROW_COUNT; i += get_global_size(0)){
+  for (unsigned int i = get_global_id(0); i < GPUCA_NSLICES * GPUCA_ROW_COUNT; i += get_global_size(0)) {
     unsigned int slice = i / GPUCA_ROW_COUNT;
     unsigned int row = i % GPUCA_ROW_COUNT;
-    unsigned int tmpBufferIndex = computeLinearTmpBufferIndex(slice,row,decompressor.mMaxNativeClustersPerBuffer);
-    ClusterNative* buffer = clusterBuffer + processors.ioPtrs.clustersNative->clusterOffset[slice][row];
+    unsigned int tmpBufferIndex = computeLinearTmpBufferIndex(slice, row, decompressor.mMaxNativeClustersPerBuffer);
+    ClusterNative* buffer = clusterBuffer + outputAccess->clusterOffset[slice][row];
     if (decompressor.mNativeClustersIndex[i] != 0) {
       memcpy((void*)buffer, (const void*)(decompressor.mTmpNativeClusters + tmpBufferIndex), decompressor.mNativeClustersIndex[i] * sizeof(clusterBuffer[0]));
     }
@@ -144,7 +151,7 @@ GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::ste
     unsigned int end = offsets[i] + ((i >= decompressor.mInputGPU.nSliceRows) ? 0 : decompressor.mInputGPU.nSliceRowClusters[i]);
     decompressHits(cmprClusters, offsets[i], end, clout);
     if (processors.param.rec.tpc.clustersShiftTimebins != 0.f) {
-      for (unsigned int k = 0; k < processors.ioPtrs.clustersNative->nClusters[slice][row]; k++) {
+      for (unsigned int k = 0; k < outputAccess->nClusters[slice][row]; k++) {
         auto& cl = buffer[k];
         float t = cl.getTime() + processors.param.rec.tpc.clustersShiftTimebins;
         if (t < 0) {
@@ -156,12 +163,12 @@ GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::ste
         cl.setTime(t);
       }
     }
-    GPUCommonAlgorithm::sort(buffer, buffer + processors.ioPtrs.clustersNative->nClusters[slice][row]);
+    GPUCommonAlgorithm::sort(buffer, buffer + outputAccess->nClusters[slice][row]);
   }
-
 }
 
-GPUdii() void GPUTPCDecompressionKernels::decompressHits(const o2::tpc::CompressedClusters& cmprClusters, const unsigned int start, const unsigned int end, ClusterNative* clusterNativeBuffer){
+GPUdii() void GPUTPCDecompressionKernels::decompressHits(const o2::tpc::CompressedClusters& cmprClusters, const unsigned int start, const unsigned int end, ClusterNative* clusterNativeBuffer)
+{
   unsigned int time = 0;
   unsigned short pad = 0;
   for (unsigned int k = start; k < end; k++) {
@@ -176,7 +183,7 @@ GPUdii() void GPUTPCDecompressionKernels::decompressHits(const o2::tpc::Compress
       time = cmprClusters.timeDiffU[k];
       pad = cmprClusters.padDiffU[k];
     }
-   *(clusterNativeBuffer++) = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k]);
+    *(clusterNativeBuffer++) = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k]);
   }
 }
 
diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h
index e70f7486bebbf..ee2046b4ab0f6 100644
--- a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h
+++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h
@@ -41,7 +41,7 @@ class GPUTPCDecompressionKernels : public GPUKernelTemplate
   enum K : int {
     step0attached = 0,
     step1unattached = 1,
-    prepareAccess = 2
+    // prepareAccess = 2
   };
 
   template <int iKernel = defaultKernel>
@@ -50,10 +50,11 @@ class GPUTPCDecompressionKernels : public GPUKernelTemplate
   GPUdi() static o2::tpc::ClusterNative decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const unsigned int clusterOffset, unsigned int slice, unsigned int row, unsigned int pad, unsigned int time, GPUTPCDecompression& decompressor, bool& stored);
   GPUdi() static void decompressHits(const o2::tpc::CompressedClusters& cmprClusters, const unsigned int start, const unsigned int end, o2::tpc::ClusterNative* clusterNativeBuffer);
 
-  GPUd() static unsigned int computeLinearTmpBufferIndex(unsigned int slice, unsigned int row, unsigned int maxClustersPerBuffer){
+  GPUd() static unsigned int computeLinearTmpBufferIndex(unsigned int slice, unsigned int row, unsigned int maxClustersPerBuffer)
+  {
     return slice * (GPUCA_ROW_COUNT * maxClustersPerBuffer) + row * maxClustersPerBuffer;
   }
 };
 
-}
+} // namespace GPUCA_NAMESPACE::gpu
 #endif // GPUTPCDECOMPRESSIONKERNELS_H
diff --git a/GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h b/GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h
index 02bb8b9775ad9..9a1db75b11185 100644
--- a/GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h
+++ b/GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h
@@ -116,6 +116,9 @@ class GPUTPCCompression
  public:
   GPUFakeEmpty* mOutput;
 };
+class GPUTPCDecompression
+{
+};
 class GPUTPCClusterFinder
 {
 };
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx
index f924602baaa5d..05e8135353f2c 100644
--- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx
+++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx
@@ -28,8 +28,7 @@ using namespace o2::tpc;
 
 int GPUChainTracking::RunTPCCompression()
 {
-  LOGP(info, "====== Compression");
-//#ifdef GPUCA_HAVE_O2HEADERS
+#ifdef GPUCA_HAVE_O2HEADERS
   mRec->PushNonPersistentMemory(qStr2Tag("TPCCOMPR"));
   RecoStep myStep = RecoStep::TPCCompression;
   bool doGPU = GetRecoStepsGPU() & RecoStep::TPCCompression;
@@ -200,16 +199,20 @@ int GPUChainTracking::RunTPCCompression()
     ((GPUChainTracking*)GetNextChainInQueue())->mRec->BlockStackedMemory(mRec);
   }
   mRec->PopNonPersistentMemory(RecoStep::TPCCompression, qStr2Tag("TPCCOMPR"));
-//#endif
+#endif
   return 0;
 }
 
 int GPUChainTracking::RunTPCDecompression()
 {
-  LOGP(info, "====== Decompression");
+#ifdef GPUCA_HAVE_O2HEADERS
+  // mRec->PushNonPersistentMemory(qStr2Tag("TPCDCMPR"));
+  ClusterNativeAccess* original = new ClusterNativeAccess;
+  original->clustersLinear = new ClusterNative[mIOPtrs.clustersNative->nClustersTotal];
+  memcpy((void*)original->clustersLinear, mIOPtrs.clustersNative->clustersLinear, mIOPtrs.clustersNative->nClustersTotal * sizeof(mIOPtrs.clustersNative->clustersLinear[0]));
+  memcpy((void*)original->nClusters, mIOPtrs.clustersNative->nClusters, NSLICES * GPUCA_ROW_COUNT * sizeof(mIOPtrs.clustersNative->nClusters[0][0]));
+  original->setOffsetPtrs();
 
-//#ifdef GPUCA_HAVE_O2HEADERS
- // mRec->PushNonPersistentMemory(qStr2Tag("TPCDCMPR"));
   RecoStep myStep = RecoStep::TPCDecompression;
   bool doGPU = GetRecoStepsGPU() & RecoStep::TPCDecompression; // with -g gives true
   GPUTPCDecompression& Decompressor = processors()->tpcDecompressor;
@@ -230,7 +233,6 @@ int GPUChainTracking::RunTPCDecompression()
 
   size_t copySize = AllocateRegisteredMemory(Decompressor.mMemoryResInputGPU);
   WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), 0);
-  //TransferMemoryResourcesToGPU(myStep, &Decompressor, 0);
 
   int outputStream = 0;
   bool toGPU = true;
@@ -287,12 +289,12 @@ int GPUChainTracking::RunTPCDecompression()
       decodedAttachedClusters += Decompressor.mNativeClustersIndex[linearIndex];
     }
   }
-  LOGP(info,"decoded = {}",decodedAttachedClusters);
-  TransferMemoryResourceLinkToGPU(RecoStep::TPCDecompression,Decompressor.mResourceTmpClustersOffsets,0);
+  LOGP(info, "decoded = {}", decodedAttachedClusters);
+  TransferMemoryResourceLinkToGPU(RecoStep::TPCDecompression, Decompressor.mResourceTmpClustersOffsets, 0);
   if (decodedAttachedClusters != cmprClsHost.nAttachedClusters) {
-         GPUWarning("My version: %u / %u clusters failed track model decoding (%f %%)", cmprClsHost.nAttachedClusters - decodedAttachedClusters, cmprClsHost.nAttachedClusters, 100.f * (float)(cmprClsHost.nAttachedClusters - decodedAttachedClusters) / (float)cmprClsHost.nAttachedClusters);
+    GPUWarning("My version: %u / %u clusters failed track model decoding (%f %%)", cmprClsHost.nAttachedClusters - decodedAttachedClusters, cmprClsHost.nAttachedClusters, 100.f * (float)(cmprClsHost.nAttachedClusters - decodedAttachedClusters) / (float)cmprClsHost.nAttachedClusters);
   } else {
-         GPUInfo("My version: all attached clusters have been decoded");
+    GPUInfo("My version: all attached clusters have been decoded");
   }
 
   Decompressor.mNativeClustersBuffer = mInputsHost->mPclusterNativeOutput;
@@ -313,20 +315,20 @@ int GPUChainTracking::RunTPCDecompression()
   runKernel<GPUTPCDecompressionKernels, GPUTPCDecompressionKernels::step1unattached>(GetGridAutoStep(0, RecoStep::TPCDecompression), krnlRunRangeNone, krnlEventNone);
 
   ClusterNative* tmpBuffer = new ClusterNative[mInputsHost->mNClusterNative];
-  //GPUMemCpy(RecoStep::TPCDecompression,mInputsHost->mPclusterNativeOutput,mInputsShadow->mPclusterNativeBuffer, sizeof(mInputsShadow->mPclusterNativeBuffer[0]) * mIOPtrs.clustersNative->nClustersTotal,0,false);
-  GPUMemCpy(RecoStep::TPCDecompression,tmpBuffer,mInputsShadow->mPclusterNativeBuffer, sizeof(mInputsShadow->mPclusterNativeBuffer[0]) * mIOPtrs.clustersNative->nClustersTotal,0,false);
+  ClusterNativeAccess gpuBuffer = *mInputsHost->mPclusterNativeAccess;
+  gpuBuffer.clustersLinear = tmpBuffer;
+  // GPUMemCpy(RecoStep::TPCDecompression,mInputsHost->mPclusterNativeOutput,mInputsShadow->mPclusterNativeBuffer, sizeof(mInputsShadow->mPclusterNativeBuffer[0]) * mIOPtrs.clustersNative->nClustersTotal,0,false);
+  GPUMemCpy(RecoStep::TPCDecompression, tmpBuffer, mInputsShadow->mPclusterNativeBuffer, sizeof(mInputsShadow->mPclusterNativeBuffer[0]) * mIOPtrs.clustersNative->nClustersTotal, 0, false);
+  gpuBuffer.setOffsetPtrs();
 
   TPCClusterDecompressor decomp;
   auto allocator = [this](size_t size) {
     this->mInputsHost->mNClusterNative = this->mInputsShadow->mNClusterNative = size;
-    //this->AllocateRegisteredMemory(this->mInputsHost->mResourceClusterNativeOutput, this->mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
+    // this->AllocateRegisteredMemory(this->mInputsHost->mResourceClusterNativeOutput, this->mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
     return this->mInputsHost->mPclusterNativeOutput;
   };
   auto& gatherTimer = getTimer<TPCClusterDecompressor>("TPCDecompression", 0);
   gatherTimer.Start();
-
-  LOGP(info,"==== mIOPtrs.compressed.nAttCl = {}, nUnAttCl = {}, nTracks = {}",cmprClsHost.nAttachedClusters,cmprClsHost.nUnattachedClusters,cmprClsHost.nTracks);
-
   if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocator, param())) {
     GPUError("Error decompressing clusters");
     return 1;
@@ -334,7 +336,7 @@ int GPUChainTracking::RunTPCDecompression()
   gatherTimer.Stop();
   mIOPtrs.clustersNative = mClusterNativeAccess.get();
   if (mRec->IsGPU()) {
-    //AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
+    // AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
     processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;
     WriteToConstantMemory(RecoStep::TPCDecompression, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
     *mInputsHost->mPclusterNativeAccess = *mIOPtrs.clustersNative;
@@ -345,10 +347,49 @@ int GPUChainTracking::RunTPCDecompression()
     SynchronizeStream(0);
   }
 
-  LOGP(info,"==== My version/cpu version: {}/{}", tmpBuffer[0].qTot,mInputsHost->mPclusterNativeOutput[0].qTot);
-
+  const ClusterNativeAccess* decoded = &gpuBuffer; // mIOPtrs.clustersNative;
+  // original = (ClusterNativeAccess*)mIOPtrs.clustersNative;
+  unsigned int decodingErrors = 0;
+  std::vector<o2::tpc::ClusterNative> tmpClusters;
+  if (param().rec.tpc.rejectionStrategy == GPUSettings::RejectionNone) { // verification does not make sense if we reject clusters during compression
+    for (unsigned int i = 0; i < NSLICES; i++) {
+      for (unsigned int j = 0; j < GPUCA_ROW_COUNT; j++) {
+        if (original->nClusters[i][j] != decoded->nClusters[i][j]) {
+          GPUError("Number of clusters mismatch slice %u row %u: expected %d v.s. decoded %d", i, j, original->nClusters[i][j], decoded->nClusters[i][j]);
+          decodingErrors++;
+          continue;
+        }
+        tmpClusters.resize(original->nClusters[i][j]);
+        for (unsigned int k = 0; k < original->nClusters[i][j]; k++) {
+          tmpClusters[k] = original->clusters[i][j][k];
+          if (param().rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) {
+            GPUTPCCompression::truncateSignificantBitsChargeMax(tmpClusters[k].qMax, param());
+            GPUTPCCompression::truncateSignificantBitsCharge(tmpClusters[k].qTot, param());
+            GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaPadPacked, param());
+            GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaTimePacked, param());
+          }
+        }
+        std::sort(tmpClusters.begin(), tmpClusters.end());
+        for (unsigned int k = 0; k < original->nClusters[i][j]; k++) {
+          const o2::tpc::ClusterNative& c1 = tmpClusters[k];
+          const o2::tpc::ClusterNative& c2 = decoded->clusters[i][j][k];
+          if (!(c1 == c2)) {
+            if (decodingErrors++ < 100) {
+              // GPUWarning("Cluster mismatch: slice %2u row %3u hit %5u: %6d %3d %4d %3d %3d %4d %4d", i, j, k, (int)c1.getTimePacked(), (int)c1.getFlags(), (int)c1.padPacked, (int)c1.sigmaTimePacked, (int)c1.sigmaPadPacked, (int)c1.qMax, (int)c1.qTot);
+              // GPUWarning("%45s %6d %3d %4d %3d %3d %4d %4d", "", (int)c2.getTimePacked(), (int)c2.getFlags(), (int)c2.padPacked, (int)c2.sigmaTimePacked, (int)c2.sigmaPadPacked, (int)c2.qMax, (int)c2.qTot);
+            }
+          }
+        }
+      }
+    }
+    if (decodingErrors) {
+      GPUWarning("Errors during cluster decoding %u\n", decodingErrors);
+    } else {
+      GPUInfo("Cluster decoding verification: PASSED");
+    }
+  }
   delete[] tmpBuffer;
   // mRec->PopNonPersistentMemory(RecoStep::TPCCompression, qStr2Tag("TPCDCMPR"));
-//#endif
+#endif
   return 0;
 }
\ No newline at end of file
diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx
index 04bf75f39bab3..ebb3c3def1679 100644
--- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx
+++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx
@@ -612,7 +612,8 @@ int RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingUse,
     if (configStandalone.testSyncAsync) {
       printf("Running synchronous phase\n");
     }
-    const GPUTrackingInOutPointers& ioPtrs = ioPtrEvents[!configStandalone.preloadEvents ? 0 : configStandalone.proc.doublePipeline ? (iteration % ioPtrEvents.size()) : (iEvent - configStandalone.StartEvent)];
+    const GPUTrackingInOutPointers& ioPtrs = ioPtrEvents[!configStandalone.preloadEvents ? 0 : configStandalone.proc.doublePipeline ? (iteration % ioPtrEvents.size())
+                                                                                                                                    : (iEvent - configStandalone.StartEvent)];
     chainTrackingUse->mIOPtrs = ioPtrs;
     if (iteration == (configStandalone.proc.doublePipeline ? 2 : (configStandalone.runs - 1))) {
       if (configStandalone.proc.doublePipeline && timerPipeline) {
@@ -667,7 +668,7 @@ int RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingUse,
         chainTrackingAsync->mIOPtrs.rawClusters[i] = nullptr;
         chainTrackingAsync->mIOPtrs.nRawClusters[i] = 0;
       }
-      chainTrackingAsync->mIOPtrs.clustersNative = nullptr;
+      chainTrackingAsync->mIOPtrs.clustersNative = chainTrackingUse->mIOPtrs.clustersNative; // todo: revert back to nullptr
       recAsync->SetResetTimers(iRun < configStandalone.runsInit);
       tmpRetVal = recAsync->RunChains();
       if (tmpRetVal == 0 || tmpRetVal == 2) {
@@ -864,7 +865,8 @@ int main(int argc, char** argv)
           if (grp.continuousMaxTimeBin == 0) {
             printf("Cannot override max time bin for non-continuous data!\n");
           } else {
-            grp.continuousMaxTimeBin = chainTracking->mIOPtrs.tpcZS ? GPUReconstructionConvert::GetMaxTimeBin(*chainTracking->mIOPtrs.tpcZS) : chainTracking->mIOPtrs.tpcPackedDigits ? GPUReconstructionConvert::GetMaxTimeBin(*chainTracking->mIOPtrs.tpcPackedDigits) : GPUReconstructionConvert::GetMaxTimeBin(*chainTracking->mIOPtrs.clustersNative);
+            grp.continuousMaxTimeBin = chainTracking->mIOPtrs.tpcZS ? GPUReconstructionConvert::GetMaxTimeBin(*chainTracking->mIOPtrs.tpcZS) : chainTracking->mIOPtrs.tpcPackedDigits ? GPUReconstructionConvert::GetMaxTimeBin(*chainTracking->mIOPtrs.tpcPackedDigits)
+                                                                                                                                                                                      : GPUReconstructionConvert::GetMaxTimeBin(*chainTracking->mIOPtrs.clustersNative);
             printf("Max time bin set to %d\n", (int)grp.continuousMaxTimeBin);
             rec->UpdateSettings(&grp);
             if (recAsync) {
@@ -948,7 +950,7 @@ int main(int argc, char** argv)
       printf("Error unregistering memory\n");
     }
   }
-  //exit(0);
+  // exit(0);
   rec->Exit();
 
   if (!configStandalone.noprompt) {
diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake
index b06749010d2d9..d7302f17271d2 100644
--- a/GPU/GPUTracking/kernels.cmake
+++ b/GPU/GPUTracking/kernels.cmake
@@ -73,6 +73,8 @@ o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered32"   LB      simple)
 o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64"   LB      simple)
 o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128"  LB      simple)
 o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock"   LB      simple)
+o2_gpu_add_kernel("GPUTPCDecompressionKernels, step0attached"    LB      simple)
+o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached"  LB      simple)
 o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline"                     LB      single)
 o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap"        LB      single)
 o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits"      LB      single)
diff --git a/cmake-build-debug/.cmake/api/v1/query/cache-v2 b/cmake-build-debug/.cmake/api/v1/query/cache-v2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/cmake-build-debug/.cmake/api/v1/query/cmakeFiles-v1 b/cmake-build-debug/.cmake/api/v1/query/cmakeFiles-v1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/cmake-build-debug/.cmake/api/v1/query/codemodel-v2 b/cmake-build-debug/.cmake/api/v1/query/codemodel-v2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/cmake-build-debug/.cmake/api/v1/query/toolchains-v1 b/cmake-build-debug/.cmake/api/v1/query/toolchains-v1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/cmake-build-debug/DartConfiguration.tcl b/cmake-build-debug/DartConfiguration.tcl
deleted file mode 100644
index 09b3136cfee5f..0000000000000
--- a/cmake-build-debug/DartConfiguration.tcl
+++ /dev/null
@@ -1,106 +0,0 @@
-# This file is configured by CMake automatically as DartConfiguration.tcl
-# If you choose not to use CMake, this file may be hand configured, by
-# filling in the required variables.
-
-
-# Configuration directories and files
-SourceDirectory: /experiments/alice/cimag/o2/O2
-BuildDirectory: /experiments/alice/cimag/o2/O2/cmake-build-debug
-
-# Where to place the cost data store
-CostDataFile: 
-
-# Site is something like machine.domain, i.e. pragmatic.crd
-Site: gr3srv.ts.infn.it
-
-# Build name is osname-revision-compiler, i.e. Linux-2.4.2-2smp-c++
-BuildName: Linux-c++
-
-# Subprojects
-LabelsForSubprojects: 
-
-# Submission information
-SubmitURL: http://
-SubmitInactivityTimeout: 
-
-# Dashboard start time
-NightlyStartTime: 00:00:00 EDT
-
-# Commands for the build/test/submit cycle
-ConfigureCommand: "/home/cimag/.cache/JetBrains/RemoteDev/dist/b21c2b5ff1f19_CLion-2023.2.2/bin/cmake/linux/x64/bin/cmake" "/experiments/alice/cimag/o2/O2"
-MakeCommand: /home/cimag/.cache/JetBrains/RemoteDev/dist/b21c2b5ff1f19_CLion-2023.2.2/bin/cmake/linux/x64/bin/cmake --build . --config "${CTEST_CONFIGURATION_TYPE}"
-DefaultCTestConfigurationType: Release
-
-# version control
-UpdateVersionOnly: 
-
-# CVS options
-# Default is "-d -P -A"
-CVSCommand: 
-CVSUpdateOptions: 
-
-# Subversion options
-SVNCommand: 
-SVNOptions: 
-SVNUpdateOptions: 
-
-# Git options
-GITCommand: /usr/bin/git
-GITInitSubmodules: 
-GITUpdateOptions: 
-GITUpdateCustom: 
-
-# Perforce options
-P4Command: 
-P4Client: 
-P4Options: 
-P4UpdateOptions: 
-P4UpdateCustom: 
-
-# Generic update command
-UpdateCommand: /usr/bin/git
-UpdateOptions: 
-UpdateType: git
-
-# Compiler info
-Compiler: /usr/bin/c++
-CompilerVersion: 4.8.5
-
-# Dynamic analysis (MemCheck)
-PurifyCommand: 
-ValgrindCommand: 
-ValgrindCommandOptions: 
-DrMemoryCommand: 
-DrMemoryCommandOptions: 
-CudaSanitizerCommand: 
-CudaSanitizerCommandOptions: 
-MemoryCheckType: 
-MemoryCheckSanitizerOptions: 
-MemoryCheckCommand: /usr/bin/valgrind
-MemoryCheckCommandOptions: 
-MemoryCheckSuppressionFile: 
-
-# Coverage
-CoverageCommand: /usr/bin/gcov
-CoverageExtraFlags: -l
-
-# Testing options
-# TimeOut is the amount of time in seconds to wait for processes
-# to complete during testing.  After TimeOut seconds, the
-# process will be summarily terminated.
-# Currently set to 25 minutes
-TimeOut: 1500
-
-# During parallel testing CTest will not start a new test if doing
-# so would cause the system load to exceed this value.
-TestLoad: 
-
-UseLaunchers: 
-CurlOptions: 
-# warning, if you add new options here that have to do with submit,
-# you have to update cmCTestSubmitCommand.cxx
-
-# For CTest submissions that timeout, these options
-# specify behavior for retrying the submission
-CTestSubmitRetryDelay: 5
-CTestSubmitRetryCount: 3