Skip to content

Commit f4c88b9

Browse files
committed
GPU/TPC: Simplify pad indexing in noisy-pad filter
1 parent decf573 commit f4c88b9

File tree

11 files changed

+40
-78
lines changed

11 files changed

+40
-78
lines changed

GPU/GPUTracking/DataTypes/TPCPadBitMap.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ struct TPCPadBitMap {
6868
{
6969
public:
7070
using T = uint32_t;
71-
static constexpr int32_t NWORDS = (TPC_PADS_IN_SECTOR + sizeof(T) * 8 - 1) / sizeof(T);
71+
static constexpr int32_t NWORDS = (TPC_REAL_PADS_IN_SECTOR + sizeof(T) * 8 - 1) / sizeof(T);
7272
GPUdi() SectorBitMap()
7373
{
7474
reset();

GPU/GPUTracking/DataTypes/TPCPadGainCalib.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,14 @@ struct TPCPadGainCalib {
120120

121121
GPUd() void reset()
122122
{
123-
for (uint16_t p = 0; p < TPC_PADS_IN_SECTOR; p++) {
123+
for (uint16_t p = 0; p < TPC_REAL_PADS_IN_SECTOR; p++) {
124124
set(p, 1.0f);
125125
}
126126
}
127127

128128
private:
129+
T mGainCorrection[TPC_REAL_PADS_IN_SECTOR];
130+
129131
GPUd() T pack(float f) const
130132
{
131133
f = CAMath::Clamp(f, mMinCorrectionFactor, mMaxCorrectionFactor);
@@ -140,8 +142,6 @@ struct TPCPadGainCalib {
140142
return mMinCorrectionFactor + (mMaxCorrectionFactor - mMinCorrectionFactor) * float(c) / float(NumOfSteps);
141143
}
142144

143-
T mGainCorrection[TPC_PADS_IN_SECTOR];
144-
145145
GPUdi() T& at(uint16_t globalPad)
146146
{
147147
return mGainCorrection[globalPad];

GPU/GPUTracking/Definitions/clusterFinderDefs.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,19 @@
3232
#endif
3333

3434
// Padding of 2 and 3 respectively would be enough. But this ensures that
35-
// rows are always aligned along cache lines. Likewise for TPC_PADS_PER_ROW.
35+
// rows are always aligned along cache lines. Likewise for TPC_CLUSTERER_ROW_PAD_CAPACITY.
3636
#define GPUCF_PADDING_PAD 8
3737
#define GPUCF_PADDING_TIME 4
38-
#define TPC_PADS_PER_ROW 144
38+
// Largest possible number of pads in a TPC row
39+
#define TPC_CLUSTERER_ROW_PAD_CAPACITY 144
3940

40-
#define TPC_ROWS_PER_CRU 18
41-
#define TPC_PADS_PER_ROW_PADDED (TPC_PADS_PER_ROW + GPUCF_PADDING_PAD)
42-
#define TPC_NUM_OF_PADS (GPUCA_ROW_COUNT * TPC_PADS_PER_ROW_PADDED + GPUCF_PADDING_PAD)
43-
#define TPC_PADS_IN_SECTOR 14560
41+
// Stride between rows as stored internally by the clusterizer
42+
#define TPC_CLUSTERER_ROW_STRIDE (TPC_CLUSTERER_ROW_PAD_CAPACITY + GPUCF_PADDING_PAD)
43+
// Number of pads in a sector as stored internally by the clusterizer.
44+
// This includes fake pads for constant strides between rows
45+
#define TPC_CLUSTERER_STRIDED_PAD_COUNT (GPUCA_ROW_COUNT * TPC_CLUSTERER_ROW_STRIDE + GPUCF_PADDING_PAD)
46+
// Real of number of pads in a sector
47+
#define TPC_REAL_PADS_IN_SECTOR 14560
4448
#define TPC_FEC_IDS_IN_SECTOR 23296
4549
#define TPC_MAX_FRAGMENT_LEN_GPU 4000
4650
#define TPC_MAX_FRAGMENT_LEN_HOST 1000

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
861861
runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout<ChargeMapType>::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType));
862862
runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap, TPCMapMemoryLayout<PeakMapType>::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(PeakMapType));
863863
if (fragment.index == 0) {
864-
runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy));
864+
runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_CLUSTERER_STRIDED_PAD_COUNT * sizeof(*clustererShadow.mPpadIsNoisy));
865865
}
866866
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererZeroedCharges, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges");
867867

@@ -965,7 +965,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
965965
const int32_t nBlocks = GPUTPCCFCheckPadBaseline::GetNBlocks(doGPU);
966966

967967
runKernel<GPUTPCCFCheckPadBaseline>({GetGridBlk(nBlocks, lane), {iSector}});
968-
getKernelTimer<GPUTPCCFCheckPadBaseline>(RecoStep::TPCClusterFinding, iSector, TPC_PADS_IN_SECTOR * fragment.lengthWithoutOverlap() * sizeof(PackedCharge), false);
968+
getKernelTimer<GPUTPCCFCheckPadBaseline>(RecoStep::TPCClusterFinding, iSector, TPC_REAL_PADS_IN_SECTOR * fragment.lengthWithoutOverlap() * sizeof(PackedCharge), false);
969969
}
970970

971971
runKernel<GPUTPCCFPeakFinder>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});

GPU/GPUTracking/TPCClusterFinder/CfArray2D.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class TilingLayout
4949
enum {
5050
Height = Grid::Height,
5151
Width = Grid::Width,
52-
WidthInTiles = (TPC_NUM_OF_PADS + Width - 1) / Width,
52+
WidthInTiles = (TPC_CLUSTERER_STRIDED_PAD_COUNT + Width - 1) / Width,
5353
};
5454

5555
GPUdi() static tpccf::SizeT idx(const CfChargePos& p)
@@ -65,7 +65,7 @@ class TilingLayout
6565

6666
GPUd() static size_t items(size_t fragmentLen)
6767
{
68-
return (TPC_NUM_OF_PADS + Width - 1) / Width * Width * (TPC_MAX_FRAGMENT_LEN_PADDED(fragmentLen) + Height - 1) / Height * Height;
68+
return (TPC_CLUSTERER_STRIDED_PAD_COUNT + Width - 1) / Width * Width * (TPC_MAX_FRAGMENT_LEN_PADDED(fragmentLen) + Height - 1) / Height * Height;
6969
}
7070
};
7171

@@ -74,12 +74,12 @@ class LinearLayout
7474
public:
7575
GPUdi() static tpccf::SizeT idx(const CfChargePos& p)
7676
{
77-
return TPC_NUM_OF_PADS * p.timePadded + p.gpad;
77+
return TPC_CLUSTERER_STRIDED_PAD_COUNT * p.timePadded + p.gpad;
7878
}
7979

8080
GPUd() static size_t items(size_t fragmentLen)
8181
{
82-
return TPC_NUM_OF_PADS * TPC_MAX_FRAGMENT_LEN_PADDED(fragmentLen);
82+
return TPC_CLUSTERER_STRIDED_PAD_COUNT * TPC_MAX_FRAGMENT_LEN_PADDED(fragmentLen);
8383
}
8484
};
8585

GPU/GPUTracking/TPCClusterFinder/CfChargePos.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ struct CfChargePos {
4242

4343
GPUdi() bool valid() const { return timePadded >= 0; }
4444

45-
GPUdi() tpccf::Row row() const { return gpad / TPC_PADS_PER_ROW_PADDED; }
46-
GPUdi() tpccf::Pad pad() const { return gpad % TPC_PADS_PER_ROW_PADDED - GPUCF_PADDING_PAD; }
45+
GPUdi() tpccf::Row row() const { return gpad / TPC_CLUSTERER_ROW_STRIDE; }
46+
GPUdi() tpccf::Pad pad() const { return gpad % TPC_CLUSTERER_ROW_STRIDE - GPUCF_PADDING_PAD; }
4747
GPUdi() tpccf::TPCFragmentTime time() const { return timePadded - GPUCF_PADDING_TIME; }
4848
GPUdi() tpccf::TPCFragmentTime globalTime() const { return timePadded; }
4949

@@ -52,7 +52,7 @@ struct CfChargePos {
5252
// index between 0 and TPC_NUM_OF_PADS.
5353
static constexpr GPUdi() tpccf::GlobalPad tpcGlobalPadIdx(tpccf::Row row, tpccf::Pad pad)
5454
{
55-
return TPC_PADS_PER_ROW_PADDED * row + pad + GPUCF_PADDING_PAD;
55+
return TPC_CLUSTERER_ROW_STRIDE * row + pad + GPUCF_PADDING_PAD;
5656
}
5757
};
5858

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx

Lines changed: 10 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,10 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t
5050
const CfFragment& fragment = clusterer.mPmemory->fragment;
5151
CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
5252

53+
constexpr GPUTPCGeometry geo;
54+
5355
const auto iRow = iBlock;
54-
const auto rowinfo = GetRowInfo(iRow);
56+
const auto nPads = geo.NPads(iRow);
5557
const CfChargePos basePos{(Row)iRow, 0, 0};
5658

5759
int32_t totalCharges = 0;
@@ -62,7 +64,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t
6264
const int16_t iPadOffset = iThread % MaxNPadsPerRow;
6365
const int16_t iTimeOffset = iThread / MaxNPadsPerRow;
6466
const int16_t iPadHandle = iThread;
65-
const bool handlePad = iPadHandle < rowinfo.nPads;
67+
const bool handlePad = iPadHandle < nPads;
6668

6769
const auto firstTB = fragment.firstNonOverlapTimeBin();
6870
const auto lastTB = fragment.lastNonOverlapTimeBin();
@@ -73,7 +75,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t
7375

7476
const CfChargePos pos = basePos.delta({iPadOffset, iTime});
7577

76-
smem.charges[iTimeOffset][iPadOffset] = iTime < lastTB && iPadOffset < rowinfo.nPads ? chargeMap[pos].unpack() : 0;
78+
smem.charges[iTimeOffset][iPadOffset] = iTime < lastTB && iPadOffset < nPads ? chargeMap[pos].unpack() : 0;
7779

7880
GPUbarrier();
7981

@@ -91,7 +93,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t
9193
}
9294

9395
if (handlePad) {
94-
updatePadBaseline(rowinfo.globalPadOffset + iPadOffset, clusterer, totalCharges, maxConsecCharges, maxCharge);
96+
updatePadBaseline(basePos.gpad + iPadHandle, clusterer, totalCharges, maxConsecCharges, maxCharge);
9597
}
9698
#endif
9799
}
@@ -102,11 +104,10 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineCPU(int32_t nBlocks, int32_t
102104
const CfFragment& fragment = clusterer.mPmemory->fragment;
103105
CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
104106

105-
int32_t basePad = iBlock * PadsPerCacheline;
106-
int32_t padsPerRow;
107-
CfChargePos basePos = padToCfChargePos<PadsPerCacheline>(basePad, clusterer, padsPerRow);
107+
CfChargePos basePos(iBlock * PadsPerCacheline, 0);
108108

109-
if (not basePos.valid()) {
109+
constexpr GPUTPCGeometry geo;
110+
if (basePos.pad() >= geo.NPads(basePos.row())) {
110111
return;
111112
}
112113

@@ -153,45 +154,11 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineCPU(int32_t nBlocks, int32_t
153154
}
154155

155156
for (tpccf::Pad localpad = 0; localpad < PadsPerCacheline; localpad++) {
156-
updatePadBaseline(basePad + localpad, clusterer, totalCharges[localpad], maxConsecCharges[localpad], maxCharge[localpad]);
157+
updatePadBaseline(basePos.gpad + localpad, clusterer, totalCharges[localpad], maxConsecCharges[localpad], maxCharge[localpad]);
157158
}
158159
#endif
159160
}
160161

161-
template <int32_t PadsPerBlock>
162-
GPUd() CfChargePos GPUTPCCFCheckPadBaseline::padToCfChargePos(int32_t& pad, const GPUTPCClusterFinder& clusterer, int32_t& padsPerRow)
163-
{
164-
constexpr GPUTPCGeometry geo;
165-
166-
int32_t padOffset = 0;
167-
for (Row r = 0; r < GPUCA_ROW_COUNT; r++) {
168-
int32_t npads = geo.NPads(r);
169-
int32_t padInRow = pad - padOffset;
170-
if (0 <= padInRow && padInRow < npads) {
171-
int32_t cachelineOffset = padInRow % PadsPerBlock;
172-
pad -= cachelineOffset;
173-
padsPerRow = npads;
174-
return CfChargePos{r, Pad(padInRow - cachelineOffset), 0};
175-
}
176-
padOffset += npads;
177-
}
178-
179-
padsPerRow = 0;
180-
return CfChargePos{0, 0, INVALID_TIME_BIN};
181-
}
182-
183-
GPUd() GPUTPCCFCheckPadBaseline::RowInfo GPUTPCCFCheckPadBaseline::GetRowInfo(int16_t row)
184-
{
185-
constexpr GPUTPCGeometry geo;
186-
187-
int16_t padOffset = 0;
188-
for (int16_t r = 0; r < row; r++) {
189-
padOffset += geo.NPads(r);
190-
}
191-
192-
return RowInfo{padOffset, geo.NPads(row)};
193-
}
194-
195162
GPUd() void GPUTPCCFCheckPadBaseline::updatePadBaseline(int32_t pad, const GPUTPCClusterFinder& clusterer, int32_t totalCharges, int32_t consecCharges, Charge maxCharge)
196163
{
197164
const CfFragment& fragment = clusterer.mPmemory->fragment;

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate
6363

6464
static int32_t GetNBlocks(bool isGPU)
6565
{
66-
const int32_t nBlocks = TPC_PADS_IN_SECTOR / PadsPerCacheline;
66+
const int32_t nBlocks = TPC_CLUSTERER_STRIDED_PAD_COUNT / PadsPerCacheline;
6767
return isGPU ? GPUCA_ROW_COUNT : nBlocks;
6868
}
6969

@@ -74,15 +74,6 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate
7474
GPUd() static void CheckBaselineGPU(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer);
7575
GPUd() static void CheckBaselineCPU(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer);
7676

77-
template <int32_t PadsPerBlock>
78-
GPUd() static CfChargePos padToCfChargePos(int32_t& pad, const GPUTPCClusterFinder&, int32_t& padsPerRow);
79-
80-
struct RowInfo {
81-
int16_t globalPadOffset;
82-
int16_t nPads;
83-
};
84-
GPUd() static RowInfo GetRowInfo(int16_t row);
85-
8677
GPUd() static void updatePadBaseline(int32_t pad, const GPUTPCClusterFinder&, int32_t totalCharges, int32_t consecCharges, tpccf::Charge maxCharge);
8778
};
8879

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,11 @@ GPUd() void GPUTPCCFPeakFinder::findPeaksImpl(int32_t nBlocks, int32_t nThreads,
105105
// For certain configurations dummy work items are added, so the total
106106
// number of work items is dividable by 64.
107107
// These dummy items also compute the last digit but discard the result.
108-
CfChargePos pos = positions[CAMath::Min(idx, (SizeT)(digitnum - 1))];
108+
CfChargePos pos = positions[CAMath::Min<SizeT>(idx, digitnum - 1)];
109109
Charge charge = pos.valid() ? chargeMap[pos].unpack() : Charge(0);
110110

111-
bool hasLostBaseline = padHasLostBaseline[gainCorrection.globalPad(pos.row(), pos.pad())];
112-
charge = (hasLostBaseline) ? 0.f : charge;
111+
bool hasLostBaseline = padHasLostBaseline[pos.gpad];
112+
charge = hasLostBaseline ? 0.f : charge;
113113

114114
uint8_t peak = isPeak(smem, charge, pos, SCRATCH_PAD_SEARCH_N, chargeMap, calib, smem.posBcast, smem.buf);
115115

GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ void* GPUTPCClusterFinder::SetPointersOutput(void* mem)
7979

8080
void* GPUTPCClusterFinder::SetPointersScratch(void* mem)
8181
{
82-
computePointerWithAlignment(mem, mPpadIsNoisy, TPC_PADS_IN_SECTOR);
82+
computePointerWithAlignment(mem, mPpadIsNoisy, TPC_CLUSTERER_STRIDED_PAD_COUNT);
8383
computePointerWithAlignment(mem, mPpositions, mNMaxDigitsFragment);
8484
computePointerWithAlignment(mem, mPpeakPositions, mNMaxPeaks);
8585
computePointerWithAlignment(mem, mPfilteredPeakPositions, mNMaxClusters);

0 commit comments

Comments
 (0)