Skip to content

Commit

Permalink
GPU TPC: Provide local occupancy info to error estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
davidrohr committed Feb 24, 2024
1 parent 4dab8cf commit 7b690c1
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 41 deletions.
8 changes: 4 additions & 4 deletions GPU/GPUTracking/Base/GPUParam.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ struct GPUParam_t {
float ParamErrors[2][4][4]; // cluster error parameterization used during seeding and fit
#else
float ParamErrorsSeeding0[2][3][4]; // cluster error parameterization used during seeding
float ParamS0Par[2][3][6]; // cluster error parameterization used during track fit
float ParamS0Par[2][3][6]; // cluster error parameterization used during track fit
#endif
};
} // namespace internal
Expand Down Expand Up @@ -98,13 +98,13 @@ struct GPUParam : public internal::GPUParam_t<GPUSettingsRec, GPUSettingsParam>
return 0.174533f + par.dAlpha * iSlice;
}
GPUd() float GetClusterErrorSeeding(int yz, int type, float z, float angle2) const;
GPUd() void GetClusterErrorsSeeding2(char sector, int row, float z, float sinPhi, float DzDs, float time, float avgCharge, float& ErrY2, float& ErrZ2) const;
GPUd() void GetClusterErrorsSeeding2(char sector, int row, float z, float sinPhi, float DzDs, float time, float avgCharge, float& ErrY2, float& ErrZ2) const;
GPUd() float GetSystematicClusterErrorIFC2(float x, float y, float z, bool sideC) const;

GPUd() float GetClusterError2(int yz, int type, float z, float angle2, float time, float avgCharge) const;
GPUd() float GetClusterError2(int yz, int type, float z, float angle2, float scaledMult, float scaledAvgCharge) const;
GPUd() void GetClusterErrors2(char sector, int row, float z, float sinPhi, float DzDs, float time, float avgCharge, float& ErrY2, float& ErrZ2) const;
GPUd() void UpdateClusterError2ByState(short clusterState, float& ErrY2, float& ErrZ2) const;
GPUd() float GetScaledMult(int iSlice, int iRow, float t) const;
GPUd() float GetScaledMult(int iSlice, int iRow, float time) const;

GPUd() void Slice2Global(int iSlice, float x, float y, float z, float* X, float* Y, float* Z) const;
GPUd() void Global2Slice(int iSlice, float x, float y, float z, float* X, float* Y, float* Z) const;
Expand Down
27 changes: 13 additions & 14 deletions GPU/GPUTracking/Base/GPUParam.inc
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@ GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(char sector, int iRow, f
}

MEM_CLASS_PRE()
GPUdi() float MEM_LG(GPUParam)::GetClusterError2(int yz, int type, float z, float angle2, float time, float avgCharge) const
GPUdi() float MEM_LG(GPUParam)::GetClusterError2(int yz, int type, float z, float angle2, float scaledMult, float scaledAvgCharge) const
{
MakeType(const float*) c = ParamErrors[yz][type];
float v = c[0] + c[1] * angle2 + c[2] * z + c[3] * 0;
float v = c[0] + c[1] * angle2 + c[2] * z + c[3] * scaledMult;
v = CAMath::Abs(v);
v *= yz ? rec.tpc.clusterError2CorrectionZ : rec.tpc.clusterError2CorrectionY;
v += yz ? rec.tpc.clusterError2AdditionalZ : rec.tpc.clusterError2AdditionalY;
Expand All @@ -69,27 +69,26 @@ GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorIFC2(float x, float y,
float sysErr = 0.f;
const float kMaxExpArg = 9.f; // limit r-dumped error to this exp. argument

const float rIFC=83.5;
const float rIFC = 83.5;
const float r = CAMath::Sqrt(x * x + y * y);
if (r-rIFC < rec.tpc.sysClusErrorMinDist) {
if (r - rIFC < rec.tpc.sysClusErrorMinDist) {
return rec.tpc.sysClusErrorMaskError;
}

if (sideC && rec.tpc.sysClusErrorNormIFCCE)
{
if (sideC && rec.tpc.sysClusErrorNormIFCCE) {
float dr = CAMath::Abs(x - 85.f);
float argExpIFCCE = dr * rec.tpc.sysClusErrorSlopeIFCCE;
float dz = CAMath::Abs((rec.tpc.sysClusErrorIFCCEZRegion - z) * rec.tpc.sysClusErrorslopeIFCCEZ);
argExpIFCCE += 0.5f * dz * dz;
if (argExpIFCCE < kMaxExpArg) {
float tmp = rec.tpc.sysClusErrorNormIFCCE * CAMath::Exp(-argExpIFCCE);
sysErr += tmp * tmp;
float tmp = rec.tpc.sysClusErrorNormIFCCE * CAMath::Exp(-argExpIFCCE);
sysErr += tmp * tmp;
}
}

if (rec.tpc.sysClusErrorNormIFC) {
float argExpIFC = (r - rIFC) * rec.tpc.sysClusErrorSlopeIFC;
if (argExpIFC<kMaxExpArg) {
if (argExpIFC < kMaxExpArg) {
float tmp = rec.tpc.sysClusErrorNormIFC * CAMath::Exp(-argExpIFC);
sysErr += tmp * tmp;
}
Expand Down Expand Up @@ -163,9 +162,9 @@ GPUdi() void MEM_LG(GPUParam)::GetClusterErrors2(char sector, int iRow, float z,
float sec2 = 1.f / (1.f - s2);
float angleY2 = s2 * sec2; // dy/dx
float angleZ2 = DzDs * DzDs * sec2; // dz/dx
float mult = time >= 0.f ? GetScaledMult(sector, iRow, time) : 0.f;

float mult = time >= 0.f ? GetScaledMult(sector, iRow, time) / tpcGeometry.Row2X(iRow) : 0.f;

ErrY2 = GetClusterError2(0, rowType, z, angleY2, mult, avgCharge);
ErrZ2 = GetClusterError2(1, rowType, z, angleZ2, mult, avgCharge);
}
Expand All @@ -192,13 +191,13 @@ GPUdi() void MEM_LG(GPUParam)::UpdateClusterError2ByState(short clusterState, fl
}

MEM_CLASS_PRE()
GPUdi() float MEM_LG(GPUParam)::GetScaledMult(int iSlice, int iRow, float t) const
GPUdi() float MEM_LG(GPUParam)::GetScaledMult(int iSlice, int iRow, float time) const
{
#if !defined(__OPENCL__) || defined(__OPENCLCPP__)
if (!occupancyMap) {
return 0.f;
}
const unsigned int bin = CAMath::Max(0.f, t / rec.tpc.occupancyMapTimeBins);
const unsigned int bin = CAMath::Max(0.f, time / rec.tpc.occupancyMapTimeBins);
return occupancyMap[bin].bin[iSlice][iRow] * rec.tpc.clusterErrorOccupancyScaler;
#else
return 0.f;
Expand Down
9 changes: 9 additions & 0 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -374,3 +374,12 @@ unsigned int GPUReconstructionCPU::SetAndGetNestedLoopOmpFactor(bool condition,
}
return mNestedLoopOmpFactor;
}

void GPUReconstructionCPU::UpdateParamOccupancyMap(const GPUTPCClusterOccupancyMapBin* mapHost, const GPUTPCClusterOccupancyMapBin* mapGPU, int stream)
{
param().occupancyMap = mapHost;
if (IsGPU()) {
const auto threadContext = GetThreadContext();
WriteToConstantMemory((char*)&processors()->param.occupancyMap - (char*)processors(), &mapGPU, sizeof(mapGPU), stream);
}
}
4 changes: 3 additions & 1 deletion GPU/GPUTracking/Base/GPUReconstructionCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
void SetNestedLoopOmpFactor(unsigned int f) { mNestedLoopOmpFactor = f; }
unsigned int SetAndGetNestedLoopOmpFactor(bool condition, unsigned int max);

void UpdateParamOccupancyMap(const GPUTPCClusterOccupancyMapBin* mapHost, const GPUTPCClusterOccupancyMapBin* mapGPU, int stream = -1);

protected:
struct GPUProcessorProcessors : public GPUProcessor {
GPUConstantMem* mProcessorsProc = nullptr;
Expand Down Expand Up @@ -192,7 +194,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
size_t TransferMemoryResourceLinkToHost(short res, int stream = -1, deviceEvent ev = nullptr, deviceEvent* evList = nullptr, int nEvents = 1) { return TransferMemoryResourceToHost(&mMemoryResources[res], stream, ev, evList, nEvents); }
virtual size_t GPUMemCpy(void* dst, const void* src, size_t size, int stream, int toGPU, deviceEvent ev = nullptr, deviceEvent* evList = nullptr, int nEvents = 1);
virtual size_t GPUMemCpyAlways(bool onGpu, void* dst, const void* src, size_t size, int stream, int toGPU, deviceEvent ev = nullptr, deviceEvent* evList = nullptr, int nEvents = 1);
size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int stream, deviceEvent ev) override;
size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int stream = -1, deviceEvent ev = nullptr) override;
virtual size_t TransferMemoryInternal(GPUMemoryResource* res, int stream, deviceEvent ev, deviceEvent* evList, int nEvents, bool toGPU, const void* src, void* dst);

int InitDevice() override;
Expand Down
8 changes: 5 additions & 3 deletions GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,16 @@ int GPUChainTracking::RunTPCTrackingSlices_internal()
AllocateRegisteredMemory(mInputsHost->mResourceOccupancyMap, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcOccupancyMap)]);
ReleaseEvent(mEvents->init);
auto* ptr = doGPU ? mInputsShadow->mTPCClusterOccupancyMap : mInputsHost->mTPCClusterOccupancyMap;
runKernel<GPUMemClean16>(GetGridAutoStep(mRec->NStreams() - 1, RecoStep::TPCSliceTracking), krnlRunRangeNone, {}, ptr, GPUTPCClusterOccupancyMapBin::getTotalSize(param()));
runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fill>(GetGridBlk(GPUCA_NSLICES * GPUCA_ROW_COUNT, mRec->NStreams() - 1), krnlRunRangeNone, krnlEventNone, ptr);
runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(GetGridBlk(GPUCA_NSLICES * GPUCA_ROW_COUNT, mRec->NStreams() - 1), krnlRunRangeNone, {&mEvents->init}, ptr);
int streamOccMap = mRec->NStreams() - 1;
runKernel<GPUMemClean16>(GetGridAutoStep(streamOccMap, RecoStep::TPCSliceTracking), krnlRunRangeNone, {}, ptr, GPUTPCClusterOccupancyMapBin::getTotalSize(param()));
runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fill>(GetGridBlk(GPUCA_NSLICES * GPUCA_ROW_COUNT, streamOccMap), krnlRunRangeNone, krnlEventNone, ptr);
runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(GetGridBlk(GPUCA_NSLICES * GPUCA_ROW_COUNT, streamOccMap), krnlRunRangeNone, {&mEvents->init}, ptr);
if (doGPU) {
TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, mInputsHost->mResourceOccupancyMap);
} else {
TransferMemoryResourceLinkToGPU(RecoStep::TPCSliceTracking, mInputsHost->mResourceOccupancyMap);
}
mRec->UpdateParamOccupancyMap(mInputsHost->mTPCClusterOccupancyMap, mInputsShadow->mTPCClusterOccupancyMap, streamOccMap);
}

int streamMap[NSLICES];
Expand Down
21 changes: 11 additions & 10 deletions GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,8 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int iT
tup.Fill((float)clusters[ihit].row, xx, yy, zz, clAlpha, mX, ImP0, ImP1, mP[2], mP[3], mP[4], ImC0, ImC2, mC[14]);
}
#endif
retVal = prop.Update(yy, zz, clusters[ihit].row, param, clusterState, rejectChi2, &interpolation.hit[ihit], refit, clusters[ihit].slice, -1.f, 0.f GPUCA_DEBUG_STREAMER_CHECK(, iTrk)); // TODO: Use correct time, avgCharge
float time = merger->Param().par.earlyTpcTransform ? -1.f : merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[clusters[ihit].num].getTime();
retVal = prop.Update(yy, zz, clusters[ihit].row, param, clusterState, rejectChi2, &interpolation.hit[ihit], refit, clusters[ihit].slice, time, 0.f GPUCA_DEBUG_STREAMER_CHECK(, iTrk)); // TODO: Use correct time, avgCharge
GPUCA_DEBUG_STREAMER_CHECK(if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamUpdateTrack, iTrk)) {
o2::utils::DebugStreamer::instance()->getStreamer("debug_update_track", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("tree_update_track").data() << "iTrk=" << iTrk << "ihit=" << ihit << "yy=" << yy << "zz=" << zz << "cluster=" << clusters[ihit] << "track=" << this << "rejectChi2=" << rejectChi2 << "interpolationhit=" << interpolation.hit[ihit] << "refit=" << refit << "retVal=" << retVal << "\n";
})
Expand Down Expand Up @@ -558,25 +559,25 @@ GPUd() void GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestrict
int bin, ny, nz;

float err2Y, err2Z;
Merger->Param().GetClusterErrors2(slice, iRow, Z, mP[2], mP[3], -1.f, 0.f, err2Y, err2Z); // TODO: Use correct time/avgCharge
Merger->Param().GetClusterErrors2(slice, iRow, Z, mP[2], mP[3], -1.f, 0.f, err2Y, err2Z); // TODO: Use correct time/avgCharge
const float sy2 = CAMath::Min(Merger->Param().rec.tpc.tubeMaxSize2, Merger->Param().rec.tpc.tubeChi2 * (err2Y + CAMath::Abs(mC[0]))); // Cov can be bogus when following circle
const float sz2 = CAMath::Min(Merger->Param().rec.tpc.tubeMaxSize2, Merger->Param().rec.tpc.tubeChi2 * (err2Z + CAMath::Abs(mC[2]))); // In that case we should provide the track error externally
const float tubeY = CAMath::Sqrt(sy2);
const float tubeZ = CAMath::Sqrt(sz2);
const float sy21 = 1.f / sy2;
const float sz21 = 1.f / sz2;
float nY, nZ;
float uncorrectedY, uncorrectedZ;
if (Merger->Param().par.earlyTpcTransform) {
nY = Y;
nZ = Z;
uncorrectedY = Y;
uncorrectedZ = Z;
} else {
Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(slice, iRow, Y, Z, nY, nZ);
Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(slice, iRow, Y, Z, uncorrectedY, uncorrectedZ);
}

if (CAMath::Abs(nY) > row.getTPCMaxY()) {
if (CAMath::Abs(uncorrectedY) > row.getTPCMaxY()) {
return;
}
row.Grid().GetBinArea(nY, nZ + zOffset, tubeY, tubeZ, bin, ny, nz);
row.Grid().GetBinArea(uncorrectedY, uncorrectedZ + zOffset, tubeY, tubeZ, bin, ny, nz);

const int nBinsY = row.Grid().Ny();
const int idOffset = tracker.Data().ClusterIdOffset();
Expand All @@ -601,8 +602,8 @@ GPUd() void GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestrict
const cahit2 hh = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits, ih);
const float y = y0 + hh.x * stepY;
const float z = z0 + hh.y * stepZ;
const float dy = y - nY;
const float dz = z - nZ;
const float dy = y - uncorrectedY;
const float dz = z - uncorrectedZ;
if (dy * dy * sy21 + dz * dz * sz21 <= CAMath::Sqrt(2.f)) {
// CADEBUG(printf("Found Y %f Z %f\n", y, z));
CAMath::AtomicMax(weight, myWeight);
Expand Down
24 changes: 16 additions & 8 deletions GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,26 @@ GPUdii() void GPUTPCCreateOccupancyMap::Thread<GPUTPCCreateOccupancyMap::fold>(i
if (iSliceRow > GPUCA_ROW_COUNT * GPUCA_NSLICES) {
return;
}
static constexpr unsigned int FOLD_BINS_BEEFORE_AFTER = 2;
static constexpr unsigned int FOLD_BINS = FOLD_BINS_BEEFORE_AFTER * 2 + 1;
const unsigned int iSlice = iSliceRow / GPUCA_ROW_COUNT;
const unsigned int iRow = iSliceRow % GPUCA_ROW_COUNT;
const unsigned int nBins = GPUTPCClusterOccupancyMapBin::getNBins(param);
const unsigned int nFoldBins = CAMath::Min(5u, nBins);
unsigned int sum = 0;
for (unsigned int i = 0; i < nFoldBins; i++) {
sum += map[i].bin[iSlice][iRow];
if (nBins < FOLD_BINS) {
return;
}
unsigned short lastVal[FOLD_BINS_BEEFORE_AFTER];
unsigned int sum = (FOLD_BINS_BEEFORE_AFTER + 1) * map[0].bin[iSlice][iRow];
for (unsigned int i = 0; i < FOLD_BINS_BEEFORE_AFTER; i++) {
sum += map[i + 1].bin[iSlice][iRow];
lastVal[i] = map[0].bin[iSlice][iRow];
}
unsigned short lastVal;
unsigned int lastValIndex = 0;
for (unsigned int i = 0; i < nBins; i++) {
lastVal = map[i].bin[iSlice][iRow];
map[i].bin[iSlice][iRow] = sum / nFoldBins;
sum += map[CAMath::Min(i + nFoldBins, nBins - 1)].bin[iSlice][iRow] - lastVal;
unsigned short useLastVal = lastVal[lastValIndex];
lastVal[lastValIndex] = map[i].bin[iSlice][iRow];
map[i].bin[iSlice][iRow] = sum / FOLD_BINS;
sum += map[CAMath::Min(i + FOLD_BINS_BEEFORE_AFTER + 1, nBins - 1)].bin[iSlice][iRow] - useLastVal;
lastValIndex = lastValIndex < FOLD_BINS_BEEFORE_AFTER - 1 ? lastValIndex + 1 : 0;
}
}
2 changes: 1 addition & 1 deletion GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int /*nBlocks*/, int
#endif //! GPUCA_TEXTURE_FETCH_CONSTRUCTOR
#if !defined(__OPENCL__) || defined(__OPENCLCPP__)
tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(tracker.ISlice(), iRow, yUncorrected, zUncorrected, yUncorrected, zUncorrected);

#endif
calink best = CALINK_INVAL;

Expand Down

0 comments on commit 7b690c1

Please sign in to comment.