Skip to content

Commit 7984605

Browse files
committed
ITS: new round of comments
1 parent d5d30e7 commit 7984605

8 files changed

Lines changed: 86 additions & 88 deletions

File tree

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,6 @@
2121

2222
namespace o2::its
2323
{
24-
namespace
25-
{
26-
constexpr int trackExtensionLaunchThreads = 60 * 256;
27-
}
28-
2924
template <int NLayers>
3025
void TrackerTraitsGPU<NLayers>::initialiseTimeFrame(const int iteration)
3126
{
@@ -371,7 +366,7 @@ void TrackerTraitsGPU<NLayers>::findRoads(const int iteration)
371366
mTimeFrameGPU->getFrameworkAllocator());
372367
mTimeFrameGPU->createTrackITSExtDevice(trackSeeds.size());
373368
if (extendTracks) {
374-
mTimeFrameGPU->createTrackExtensionScratchDevice(trackExtensionLaunchThreads, this->mTrkParams[iteration].TrackFollowerMaxHypotheses);
369+
mTimeFrameGPU->createTrackExtensionScratchDevice(constants::GPUThreadsTotal, this->mTrkParams[iteration].TrackFollowerMaxHypotheses);
375370
}
376371
computeTrackSeedHandler(mTimeFrameGPU->getDeviceTrackSeeds(),
377372
mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 50 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ GPUdi() void finaliseTrackExtensionTrial(const uint32_t backupPattern,
138138
}
139139

140140
template <int NLayers>
141-
GPUg() void __launch_bounds__(256, 1) countTrackSeedsKernel(
141+
GPUg() void __launch_bounds__(constants::GPUThreads, 1) countTrackSeedsKernel(
142142
TrackSeed<NLayers>* trackSeeds,
143143
const TrackingFrameInfo** foundTrackingFrameInfo,
144144
const Cluster** unsortedClusters,
@@ -158,28 +158,28 @@ GPUg() void __launch_bounds__(256, 1) countTrackSeedsKernel(
158158
{
159159
for (int iCurrentTrackSeedIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentTrackSeedIndex < nSeeds; iCurrentTrackSeedIndex += blockDim.x * gridDim.x) {
160160
TrackITSInternal<NLayers> temporaryTrack;
161-
if (o2::its::track::refitTrack(trackSeeds[iCurrentTrackSeedIndex],
162-
temporaryTrack,
163-
maxChi2ClusterAttachment,
164-
maxChi2NDF,
165-
bz,
166-
foundTrackingFrameInfo,
167-
unsortedClusters,
168-
layerxX0,
169-
layerRadii,
170-
minPts,
171-
propagator,
172-
matCorrType,
173-
reseedIfShorter,
174-
shiftRefToCluster,
175-
repeatRefitOut)) {
161+
if (o2::its::track::refitTrackSeed(trackSeeds[iCurrentTrackSeedIndex],
162+
temporaryTrack,
163+
maxChi2ClusterAttachment,
164+
maxChi2NDF,
165+
bz,
166+
foundTrackingFrameInfo,
167+
unsortedClusters,
168+
layerxX0,
169+
layerRadii,
170+
minPts,
171+
propagator,
172+
matCorrType,
173+
reseedIfShorter,
174+
shiftRefToCluster,
175+
repeatRefitOut)) {
176176
seedLUT[iCurrentTrackSeedIndex] = 1;
177177
}
178178
}
179179
}
180180

181181
template <int NLayers>
182-
GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel(
182+
GPUg() void __launch_bounds__(constants::GPUThreads, 1) fitTrackSeedsKernel(
183183
TrackSeed<NLayers>* trackSeeds,
184184
const TrackingFrameInfo** foundTrackingFrameInfo,
185185
const Cluster** unsortedClusters,
@@ -219,21 +219,21 @@ GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel(
219219
continue;
220220
}
221221
TrackITSInternal<NLayers> temporaryTrack;
222-
bool refitSuccess = o2::its::track::refitTrack(trackSeeds[iCurrentTrackSeedIndex],
223-
temporaryTrack,
224-
maxChi2ClusterAttachment,
225-
maxChi2NDF,
226-
bz,
227-
foundTrackingFrameInfo,
228-
unsortedClusters,
229-
layerxX0,
230-
layerRadii,
231-
minPts,
232-
propagator,
233-
matCorrType,
234-
reseedIfShorter,
235-
shiftRefToCluster,
236-
repeatRefitOut);
222+
bool refitSuccess = o2::its::track::refitTrackSeed(trackSeeds[iCurrentTrackSeedIndex],
223+
temporaryTrack,
224+
maxChi2ClusterAttachment,
225+
maxChi2NDF,
226+
bz,
227+
foundTrackingFrameInfo,
228+
unsortedClusters,
229+
layerxX0,
230+
layerRadii,
231+
minPts,
232+
propagator,
233+
matCorrType,
234+
reseedIfShorter,
235+
shiftRefToCluster,
236+
repeatRefitOut);
237237
if (refitSuccess) {
238238
if ((extendTop || extendBot) && activeHypothesesScratch && nextHypothesesScratch) {
239239
const int maxHypotheses = o2::gpu::CAMath::Max(maxHypothesesConfig, 1);
@@ -400,7 +400,7 @@ GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel(
400400
}
401401

402402
template <bool initRun, int NLayers>
403-
GPUg() void __launch_bounds__(256, 1) computeLayerCellNeighboursKernel(
403+
GPUg() void __launch_bounds__(constants::GPUThreads, 1) computeLayerCellNeighboursKernel(
404404
CellSeed** cellSeedArray,
405405
int* neighboursCursor,
406406
int** cellsLUTs,
@@ -447,7 +447,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellNeighboursKernel(
447447
}
448448

449449
template <bool initRun, int NLayers>
450-
GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel(
450+
GPUg() void __launch_bounds__(constants::GPUThreads, 1) computeLayerCellsKernel(
451451
const Cluster** sortedClusters,
452452
const Cluster** unsortedClusters,
453453
const TrackingFrameInfo** tfInfo,
@@ -545,7 +545,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel(
545545
}
546546

547547
template <bool initRun, int NLayers>
548-
GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel(
548+
GPUg() void __launch_bounds__(constants::GPUThreads, 1) computeLayerTrackletsMultiROFKernel(
549549
const IndexTableUtils<NLayers>* utils,
550550
const typename ROFMaskTable<NLayers>::View rofMask,
551551
const int transitionId,
@@ -692,7 +692,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel(
692692
}
693693
}
694694

695-
GPUg() void __launch_bounds__(256, 1) compileTrackletsLookupTableKernel(
695+
GPUg() void __launch_bounds__(constants::GPUThreads, 1) compileTrackletsLookupTableKernel(
696696
const Tracklet* tracklets,
697697
int* trackletsLookUpTable,
698698
const int nTracklets)
@@ -703,7 +703,7 @@ GPUg() void __launch_bounds__(256, 1) compileTrackletsLookupTableKernel(
703703
}
704704

705705
template <bool dryRun, int NLayers, typename CurrentSeed>
706-
GPUg() void __launch_bounds__(256, 1) processNeighboursKernel(
706+
GPUg() void __launch_bounds__(constants::GPUThreads, 1) processNeighboursKernel(
707707
const int defaultCellTopologyId,
708708
const int level,
709709
CellSeed** allCellSeeds,
@@ -852,7 +852,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
852852
o2::its::ExternalAllocator* alloc,
853853
gpu::Streams& streams)
854854
{
855-
gpu::computeLayerTrackletsMultiROFKernel<true><<<60, 256, 0, streams[transitionId].get()>>>(
855+
gpu::computeLayerTrackletsMultiROFKernel<true><<<constants::GPUBlocks, constants::GPUThreads, 0, streams[transitionId].get()>>>(
856856
utils,
857857
rofMask,
858858
transitionId,
@@ -915,7 +915,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
915915
o2::its::ExternalAllocator* alloc,
916916
gpu::Streams& streams)
917917
{
918-
gpu::computeLayerTrackletsMultiROFKernel<false><<<60, 256, 0, streams[transitionId].get()>>>(
918+
gpu::computeLayerTrackletsMultiROFKernel<false><<<constants::GPUBlocks, constants::GPUThreads, 0, streams[transitionId].get()>>>(
919919
utils,
920920
rofMask,
921921
transitionId,
@@ -947,7 +947,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
947947
nTracklets[transitionId] = unique_end - tracklets_ptr;
948948
if (fromLayer > 0) {
949949
GPUChkErrS(cudaMemsetAsync(trackletsLUTsHost[transitionId], 0, (nClusters[fromLayer] + 1) * sizeof(int), streams[transitionId].get()));
950-
gpu::compileTrackletsLookupTableKernel<<<60, 256, 0, streams[transitionId].get()>>>(
950+
gpu::compileTrackletsLookupTableKernel<<<constants::GPUBlocks, constants::GPUThreads, 0, streams[transitionId].get()>>>(
951951
spanTracklets[transitionId],
952952
trackletsLUTsHost[transitionId],
953953
nTracklets[transitionId]);
@@ -977,7 +977,7 @@ void countCellsHandler(
977977
gpu::Streams& streams)
978978
{
979979
thrust::device_vector<float> layerxX0(layerxX0Host);
980-
gpu::computeLayerCellsKernel<true, NLayers><<<60, 256, 0, streams[cellTopologyId].get()>>>(
980+
gpu::computeLayerCellsKernel<true, NLayers><<<constants::GPUBlocks, constants::GPUThreads, 0, streams[cellTopologyId].get()>>>(
981981
sortedClusters, // const Cluster**
982982
unsortedClusters, // const Cluster**
983983
tfInfo, // const TrackingFrameInfo**
@@ -1018,7 +1018,7 @@ void computeCellsHandler(
10181018
gpu::Streams& streams)
10191019
{
10201020
thrust::device_vector<float> layerxX0(layerxX0Host);
1021-
gpu::computeLayerCellsKernel<false, NLayers><<<60, 256, 0, streams[cellTopologyId].get()>>>(
1021+
gpu::computeLayerCellsKernel<false, NLayers><<<constants::GPUBlocks, constants::GPUThreads, 0, streams[cellTopologyId].get()>>>(
10221022
sortedClusters, // const Cluster**
10231023
unsortedClusters, // const Cluster**
10241024
tfInfo, // const TrackingFrameInfo**
@@ -1047,7 +1047,7 @@ void countCellNeighboursHandler(CellSeed** cellsLayersDevice,
10471047
const unsigned int nCells,
10481048
gpu::Stream& stream)
10491049
{
1050-
gpu::computeLayerCellNeighboursKernel<true, NLayers><<<60, 256, 0, stream.get()>>>(
1050+
gpu::computeLayerCellNeighboursKernel<true, NLayers><<<constants::GPUBlocks, constants::GPUThreads, 0, stream.get()>>>(
10511051
cellsLayersDevice,
10521052
neighboursCursor,
10531053
cellsLUTs,
@@ -1082,7 +1082,7 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
10821082
const unsigned int nCells,
10831083
gpu::Stream& stream)
10841084
{
1085-
gpu::computeLayerCellNeighboursKernel<false, NLayers><<<60, 256, 0, stream.get()>>>(
1085+
gpu::computeLayerCellNeighboursKernel<false, NLayers><<<constants::GPUBlocks, constants::GPUThreads, 0, stream.get()>>>(
10861086
cellsLayersDevice,
10871087
neighboursCursor,
10881088
cellsLUTs,
@@ -1142,7 +1142,7 @@ void processNeighboursHandler(const int startLevel,
11421142
thrust::device_vector<int, gpu::TypedAllocator<int>> foundSeedsTable(nCells[defaultCellTopologyId] + 1, 0, allocInt);
11431143
auto nosync_policy = THRUST_NAMESPACE::par_nosync(gpu::TypedAllocator<char>(alloc)).on(gpu::Stream::DefaultStream);
11441144

1145-
gpu::processNeighboursKernel<true, NLayers, CellSeed><<<60, 256>>>(
1145+
gpu::processNeighboursKernel<true, NLayers, CellSeed><<<constants::GPUBlocks, constants::GPUThreads>>>(
11461146
defaultCellTopologyId,
11471147
startLevel,
11481148
allCellSeeds,
@@ -1168,7 +1168,7 @@ void processNeighboursHandler(const int startLevel,
11681168
thrust::device_vector<int, gpu::TypedAllocator<int>> updatedCellId(foundSeedsTable.back(), 0, allocInt);
11691169
thrust::device_vector<int, gpu::TypedAllocator<int>> updatedCellTopologyId(foundSeedsTable.back(), 0, allocInt);
11701170
thrust::device_vector<TrackSeed<NLayers>, gpu::TypedAllocator<TrackSeed<NLayers>>> updatedCellSeed(foundSeedsTable.back(), allocTrackSeed);
1171-
gpu::processNeighboursKernel<false, NLayers, CellSeed><<<60, 256>>>(
1171+
gpu::processNeighboursKernel<false, NLayers, CellSeed><<<constants::GPUBlocks, constants::GPUThreads>>>(
11721172
defaultCellTopologyId,
11731173
startLevel,
11741174
allCellSeeds,
@@ -1207,7 +1207,7 @@ void processNeighboursHandler(const int startLevel,
12071207
thrust::fill(nosync_policy, foundSeedsTable.begin(), foundSeedsTable.end(), 0);
12081208

12091209
--level;
1210-
gpu::processNeighboursKernel<true, NLayers, TrackSeed<NLayers>><<<60, 256>>>(
1210+
gpu::processNeighboursKernel<true, NLayers, TrackSeed<NLayers>><<<constants::GPUBlocks, constants::GPUThreads>>>(
12111211
constants::UnusedIndex,
12121212
level,
12131213
allCellSeeds,
@@ -1238,7 +1238,7 @@ void processNeighboursHandler(const int startLevel,
12381238
updatedCellSeed.resize(foundSeeds);
12391239
thrust::fill(nosync_policy, updatedCellSeed.begin(), updatedCellSeed.end(), TrackSeed<NLayers>());
12401240

1241-
gpu::processNeighboursKernel<false, NLayers, TrackSeed<NLayers>><<<60, 256>>>(
1241+
gpu::processNeighboursKernel<false, NLayers, TrackSeed<NLayers>><<<constants::GPUBlocks, constants::GPUThreads>>>(
12421242
constants::UnusedIndex,
12431243
level,
12441244
allCellSeeds,
@@ -1294,7 +1294,7 @@ void countTrackSeedHandler(TrackSeed<NLayers>* trackSeeds,
12941294
thrust::device_vector<float> minPts(minPtsHost);
12951295
thrust::device_vector<float> layerRadii(layerRadiiHost);
12961296
thrust::device_vector<float> layerxX0(layerxX0Host);
1297-
gpu::countTrackSeedsKernel<NLayers><<<60, 256>>>(
1297+
gpu::countTrackSeedsKernel<NLayers><<<constants::GPUBlocks, constants::GPUThreads>>>(
12981298
trackSeeds, // CellSeed*
12991299
foundTrackingFrameInfo, // TrackingFrameInfo**
13001300
unsortedClusters, // Cluster**
@@ -1355,7 +1355,7 @@ void computeTrackSeedHandler(TrackSeed<NLayers>* trackSeeds,
13551355
thrust::device_vector<float> minPts(minPtsHost);
13561356
thrust::device_vector<float> layerRadii(layerRadiiHost);
13571357
thrust::device_vector<float> layerxX0(layerxX0Host);
1358-
gpu::fitTrackSeedsKernel<NLayers><<<60, 256>>>(
1358+
gpu::fitTrackSeedsKernel<NLayers><<<constants::GPUBlocks, constants::GPUThreads>>>(
13591359
trackSeeds, // CellSeed*
13601360
foundTrackingFrameInfo, // TrackingFrameInfo**
13611361
unsortedClusters, // Cluster**

Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ constexpr float Radl = 9.36f; // Radiation length of Si [c
3535
constexpr float Rho = 2.33f; // Density of Si [g/cm^3]
3636
constexpr int MaxIter = 4; // Max. supported iterations
3737
constexpr int MaxSelectedTrackletsPerCluster = 100; // vertexer: max lines per cluster
38+
constexpr int GPUBlocks = 60; // default CUDA/HIP launch blocks
39+
constexpr int GPUThreads = 256; // default CUDA/HIP launch threads
40+
constexpr int GPUThreadsTotal = GPUBlocks * GPUThreads;
3841

3942
namespace helpers
4043
{

Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackExtensionHypothesis.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
1+
// Copyright 2019-2026 CERN and copyright holders of ALICE O2.
22
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
33
// All rights not expressly granted are reserved.
44
//

Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackHelpers.h

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -310,21 +310,21 @@ GPUdi() bool refitTrack(TrackITSInternal<NLayers>& track,
310310
}
311311

312312
template <int NLayers>
313-
GPUdi() bool refitTrack(const TrackSeed<NLayers>& trackSeed,
314-
TrackITSInternal<NLayers>& temporaryTrack,
315-
float chi2clcut,
316-
float chi2ndfcut,
317-
const float bz,
318-
const TrackingFrameInfo* const* tfInfos,
319-
const Cluster* const* clusters,
320-
const float* layerxX0,
321-
const float* layerRadii,
322-
const float* minPt,
323-
const o2::base::Propagator* propagator,
324-
const o2::base::PropagatorF::MatCorrType matCorrType,
325-
const int reseedIfShorter,
326-
const bool shiftRefToCluster,
327-
const bool repeatRefitOut)
313+
GPUdi() bool refitTrackSeed(const TrackSeed<NLayers>& trackSeed,
314+
TrackITSInternal<NLayers>& temporaryTrack,
315+
float chi2clcut,
316+
float chi2ndfcut,
317+
const float bz,
318+
const TrackingFrameInfo* const* tfInfos,
319+
const Cluster* const* clusters,
320+
const float* layerxX0,
321+
const float* layerRadii,
322+
const float* minPt,
323+
const o2::base::Propagator* propagator,
324+
const o2::base::PropagatorF::MatCorrType matCorrType,
325+
const int reseedIfShorter,
326+
const bool shiftRefToCluster,
327+
const bool repeatRefitOut)
328328
{
329329
temporaryTrack = seedTrackForRefit(trackSeed,
330330
tfInfos,

Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ void TimeFrame<NLayers>::initTrackerTopologies(gsl::span<const TrackingParameter
265265
template <int NLayers>
266266
void TimeFrame<NLayers>::initialise(const TrackingParameters& trkParam, const int maxLayers, const int iteration)
267267
{
268+
resetTrackExtensionCounters();
268269
mTrackingTopologyView = iteration != constants::UnusedIndex ? mTrackerTopologies[iteration].getView() : (maxLayers == 3 ? mVertexingTopology.getView() : mDefaultTrackingTopology.getView());
269270

270271
if (trkParam.PassFlags[IterationStep::FirstPass]) {

Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ float Tracker<NLayers>::clustersToTracks(const LogFunc& logger, const LogFunc& e
7575
float timeFrame{0.}, timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.};
7676
size_t nTracklets{0}, nCells{0}, nNeighbours{0};
7777
int nTracks{-static_cast<int>(mTimeFrame->getNumberOfTracks())};
78-
mTimeFrame->resetTrackExtensionCounters();
7978
iVertex = std::min(maxNvertices, 0);
8079
logger(std::format("==== ITS {} Tracking iteration {} summary ====", mTraits->getName(), iteration));
8180
total += timeFrame = evaluateTask(&Tracker::initialiseTimeFrame, StateNames[mCurStep = TFInit], iteration, evalLog, iteration);

0 commit comments

Comments
 (0)