@@ -138,7 +138,7 @@ GPUdi() void finaliseTrackExtensionTrial(const uint32_t backupPattern,
138138}
139139
140140template <int NLayers>
141- GPUg () void __launch_bounds__ (256 , 1 ) countTrackSeedsKernel(
141+ GPUg () void __launch_bounds__ (constants::GPUThreads , 1 ) countTrackSeedsKernel(
142142 TrackSeed<NLayers>* trackSeeds,
143143 const TrackingFrameInfo** foundTrackingFrameInfo,
144144 const Cluster** unsortedClusters,
@@ -158,28 +158,28 @@ GPUg() void __launch_bounds__(256, 1) countTrackSeedsKernel(
158158{
159159 for (int iCurrentTrackSeedIndex = blockIdx .x * blockDim .x + threadIdx .x ; iCurrentTrackSeedIndex < nSeeds; iCurrentTrackSeedIndex += blockDim .x * gridDim .x ) {
160160 TrackITSInternal<NLayers> temporaryTrack;
161- if (o2::its::track::refitTrack (trackSeeds[iCurrentTrackSeedIndex],
162- temporaryTrack,
163- maxChi2ClusterAttachment,
164- maxChi2NDF,
165- bz,
166- foundTrackingFrameInfo,
167- unsortedClusters,
168- layerxX0,
169- layerRadii,
170- minPts,
171- propagator,
172- matCorrType,
173- reseedIfShorter,
174- shiftRefToCluster,
175- repeatRefitOut)) {
161+ if (o2::its::track::refitTrackSeed (trackSeeds[iCurrentTrackSeedIndex],
162+ temporaryTrack,
163+ maxChi2ClusterAttachment,
164+ maxChi2NDF,
165+ bz,
166+ foundTrackingFrameInfo,
167+ unsortedClusters,
168+ layerxX0,
169+ layerRadii,
170+ minPts,
171+ propagator,
172+ matCorrType,
173+ reseedIfShorter,
174+ shiftRefToCluster,
175+ repeatRefitOut)) {
176176 seedLUT[iCurrentTrackSeedIndex] = 1 ;
177177 }
178178 }
179179}
180180
181181template <int NLayers>
182- GPUg () void __launch_bounds__ (256 , 1 ) fitTrackSeedsKernel(
182+ GPUg () void __launch_bounds__ (constants::GPUThreads , 1 ) fitTrackSeedsKernel(
183183 TrackSeed<NLayers>* trackSeeds,
184184 const TrackingFrameInfo** foundTrackingFrameInfo,
185185 const Cluster** unsortedClusters,
@@ -219,21 +219,21 @@ GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel(
219219 continue ;
220220 }
221221 TrackITSInternal<NLayers> temporaryTrack;
222- bool refitSuccess = o2::its::track::refitTrack (trackSeeds[iCurrentTrackSeedIndex],
223- temporaryTrack,
224- maxChi2ClusterAttachment,
225- maxChi2NDF,
226- bz,
227- foundTrackingFrameInfo,
228- unsortedClusters,
229- layerxX0,
230- layerRadii,
231- minPts,
232- propagator,
233- matCorrType,
234- reseedIfShorter,
235- shiftRefToCluster,
236- repeatRefitOut);
222+ bool refitSuccess = o2::its::track::refitTrackSeed (trackSeeds[iCurrentTrackSeedIndex],
223+ temporaryTrack,
224+ maxChi2ClusterAttachment,
225+ maxChi2NDF,
226+ bz,
227+ foundTrackingFrameInfo,
228+ unsortedClusters,
229+ layerxX0,
230+ layerRadii,
231+ minPts,
232+ propagator,
233+ matCorrType,
234+ reseedIfShorter,
235+ shiftRefToCluster,
236+ repeatRefitOut);
237237 if (refitSuccess) {
238238 if ((extendTop || extendBot) && activeHypothesesScratch && nextHypothesesScratch) {
239239 const int maxHypotheses = o2::gpu::CAMath::Max (maxHypothesesConfig, 1 );
@@ -400,7 +400,7 @@ GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel(
400400}
401401
402402template <bool initRun, int NLayers>
403- GPUg () void __launch_bounds__ (256 , 1 ) computeLayerCellNeighboursKernel(
403+ GPUg () void __launch_bounds__ (constants::GPUThreads , 1 ) computeLayerCellNeighboursKernel(
404404 CellSeed** cellSeedArray,
405405 int * neighboursCursor,
406406 int ** cellsLUTs,
@@ -447,7 +447,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellNeighboursKernel(
447447}
448448
449449template <bool initRun, int NLayers>
450- GPUg () void __launch_bounds__ (256 , 1 ) computeLayerCellsKernel(
450+ GPUg () void __launch_bounds__ (constants::GPUThreads , 1 ) computeLayerCellsKernel(
451451 const Cluster** sortedClusters,
452452 const Cluster** unsortedClusters,
453453 const TrackingFrameInfo** tfInfo,
@@ -545,7 +545,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel(
545545}
546546
547547template <bool initRun, int NLayers>
548- GPUg () void __launch_bounds__ (256 , 1 ) computeLayerTrackletsMultiROFKernel(
548+ GPUg () void __launch_bounds__ (constants::GPUThreads , 1 ) computeLayerTrackletsMultiROFKernel(
549549 const IndexTableUtils<NLayers>* utils,
550550 const typename ROFMaskTable<NLayers>::View rofMask,
551551 const int transitionId,
@@ -692,7 +692,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel(
692692 }
693693}
694694
695- GPUg () void __launch_bounds__ (256 , 1 ) compileTrackletsLookupTableKernel(
695+ GPUg () void __launch_bounds__ (constants::GPUThreads , 1 ) compileTrackletsLookupTableKernel(
696696 const Tracklet* tracklets,
697697 int * trackletsLookUpTable,
698698 const int nTracklets)
@@ -703,7 +703,7 @@ GPUg() void __launch_bounds__(256, 1) compileTrackletsLookupTableKernel(
703703}
704704
705705template <bool dryRun, int NLayers, typename CurrentSeed>
706- GPUg () void __launch_bounds__ (256 , 1 ) processNeighboursKernel(
706+ GPUg () void __launch_bounds__ (constants::GPUThreads , 1 ) processNeighboursKernel(
707707 const int defaultCellTopologyId,
708708 const int level,
709709 CellSeed** allCellSeeds,
@@ -852,7 +852,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
852852 o2::its::ExternalAllocator* alloc,
853853 gpu::Streams& streams)
854854{
855- gpu::computeLayerTrackletsMultiROFKernel<true ><<<60 , 256 , 0 , streams[transitionId].get()>>> (
855+ gpu::computeLayerTrackletsMultiROFKernel<true ><<<constants::GPUBlocks, constants::GPUThreads , 0 , streams[transitionId].get()>>> (
856856 utils,
857857 rofMask,
858858 transitionId,
@@ -915,7 +915,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
915915 o2::its::ExternalAllocator* alloc,
916916 gpu::Streams& streams)
917917{
918- gpu::computeLayerTrackletsMultiROFKernel<false ><<<60 , 256 , 0 , streams[transitionId].get()>>> (
918+ gpu::computeLayerTrackletsMultiROFKernel<false ><<<constants::GPUBlocks, constants::GPUThreads , 0 , streams[transitionId].get()>>> (
919919 utils,
920920 rofMask,
921921 transitionId,
@@ -947,7 +947,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
947947 nTracklets[transitionId] = unique_end - tracklets_ptr;
948948 if (fromLayer > 0 ) {
949949 GPUChkErrS (cudaMemsetAsync (trackletsLUTsHost[transitionId], 0 , (nClusters[fromLayer] + 1 ) * sizeof (int ), streams[transitionId].get ()));
950- gpu::compileTrackletsLookupTableKernel<<<60 , 256 , 0 , streams[transitionId].get()>>> (
950+ gpu::compileTrackletsLookupTableKernel<<<constants::GPUBlocks, constants::GPUThreads , 0 , streams[transitionId].get()>>> (
951951 spanTracklets[transitionId],
952952 trackletsLUTsHost[transitionId],
953953 nTracklets[transitionId]);
@@ -977,7 +977,7 @@ void countCellsHandler(
977977 gpu::Streams& streams)
978978{
979979 thrust::device_vector<float > layerxX0 (layerxX0Host);
980- gpu::computeLayerCellsKernel<true , NLayers><<<60 , 256 , 0 , streams[cellTopologyId].get()>>> (
980+ gpu::computeLayerCellsKernel<true , NLayers><<<constants::GPUBlocks, constants::GPUThreads , 0 , streams[cellTopologyId].get()>>> (
981981 sortedClusters, // const Cluster**
982982 unsortedClusters, // const Cluster**
983983 tfInfo, // const TrackingFrameInfo**
@@ -1018,7 +1018,7 @@ void computeCellsHandler(
10181018 gpu::Streams& streams)
10191019{
10201020 thrust::device_vector<float > layerxX0 (layerxX0Host);
1021- gpu::computeLayerCellsKernel<false , NLayers><<<60 , 256 , 0 , streams[cellTopologyId].get()>>> (
1021+ gpu::computeLayerCellsKernel<false , NLayers><<<constants::GPUBlocks, constants::GPUThreads , 0 , streams[cellTopologyId].get()>>> (
10221022 sortedClusters, // const Cluster**
10231023 unsortedClusters, // const Cluster**
10241024 tfInfo, // const TrackingFrameInfo**
@@ -1047,7 +1047,7 @@ void countCellNeighboursHandler(CellSeed** cellsLayersDevice,
10471047 const unsigned int nCells,
10481048 gpu::Stream& stream)
10491049{
1050- gpu::computeLayerCellNeighboursKernel<true , NLayers><<<60 , 256 , 0 , stream.get()>>> (
1050+ gpu::computeLayerCellNeighboursKernel<true , NLayers><<<constants::GPUBlocks, constants::GPUThreads , 0 , stream.get()>>> (
10511051 cellsLayersDevice,
10521052 neighboursCursor,
10531053 cellsLUTs,
@@ -1082,7 +1082,7 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
10821082 const unsigned int nCells,
10831083 gpu::Stream& stream)
10841084{
1085- gpu::computeLayerCellNeighboursKernel<false , NLayers><<<60 , 256 , 0 , stream.get()>>> (
1085+ gpu::computeLayerCellNeighboursKernel<false , NLayers><<<constants::GPUBlocks, constants::GPUThreads , 0 , stream.get()>>> (
10861086 cellsLayersDevice,
10871087 neighboursCursor,
10881088 cellsLUTs,
@@ -1142,7 +1142,7 @@ void processNeighboursHandler(const int startLevel,
11421142 thrust::device_vector<int , gpu::TypedAllocator<int >> foundSeedsTable (nCells[defaultCellTopologyId] + 1 , 0 , allocInt);
11431143 auto nosync_policy = THRUST_NAMESPACE::par_nosync (gpu::TypedAllocator<char >(alloc)).on (gpu::Stream::DefaultStream);
11441144
1145- gpu::processNeighboursKernel<true , NLayers, CellSeed><<<60 , 256 >>> (
1145+ gpu::processNeighboursKernel<true , NLayers, CellSeed><<<constants::GPUBlocks, constants::GPUThreads >>> (
11461146 defaultCellTopologyId,
11471147 startLevel,
11481148 allCellSeeds,
@@ -1168,7 +1168,7 @@ void processNeighboursHandler(const int startLevel,
11681168 thrust::device_vector<int , gpu::TypedAllocator<int >> updatedCellId (foundSeedsTable.back (), 0 , allocInt);
11691169 thrust::device_vector<int , gpu::TypedAllocator<int >> updatedCellTopologyId (foundSeedsTable.back (), 0 , allocInt);
11701170 thrust::device_vector<TrackSeed<NLayers>, gpu::TypedAllocator<TrackSeed<NLayers>>> updatedCellSeed (foundSeedsTable.back (), allocTrackSeed);
1171- gpu::processNeighboursKernel<false , NLayers, CellSeed><<<60 , 256 >>> (
1171+ gpu::processNeighboursKernel<false , NLayers, CellSeed><<<constants::GPUBlocks, constants::GPUThreads >>> (
11721172 defaultCellTopologyId,
11731173 startLevel,
11741174 allCellSeeds,
@@ -1207,7 +1207,7 @@ void processNeighboursHandler(const int startLevel,
12071207 thrust::fill (nosync_policy, foundSeedsTable.begin (), foundSeedsTable.end (), 0 );
12081208
12091209 --level;
1210- gpu::processNeighboursKernel<true , NLayers, TrackSeed<NLayers>><<<60 , 256 >>> (
1210+ gpu::processNeighboursKernel<true , NLayers, TrackSeed<NLayers>><<<constants::GPUBlocks, constants::GPUThreads >>> (
12111211 constants::UnusedIndex,
12121212 level,
12131213 allCellSeeds,
@@ -1238,7 +1238,7 @@ void processNeighboursHandler(const int startLevel,
12381238 updatedCellSeed.resize (foundSeeds);
12391239 thrust::fill (nosync_policy, updatedCellSeed.begin (), updatedCellSeed.end (), TrackSeed<NLayers>());
12401240
1241- gpu::processNeighboursKernel<false , NLayers, TrackSeed<NLayers>><<<60 , 256 >>> (
1241+ gpu::processNeighboursKernel<false , NLayers, TrackSeed<NLayers>><<<constants::GPUBlocks, constants::GPUThreads >>> (
12421242 constants::UnusedIndex,
12431243 level,
12441244 allCellSeeds,
@@ -1294,7 +1294,7 @@ void countTrackSeedHandler(TrackSeed<NLayers>* trackSeeds,
12941294 thrust::device_vector<float > minPts (minPtsHost);
12951295 thrust::device_vector<float > layerRadii (layerRadiiHost);
12961296 thrust::device_vector<float > layerxX0 (layerxX0Host);
1297- gpu::countTrackSeedsKernel<NLayers><<<60 , 256 >>> (
1297+ gpu::countTrackSeedsKernel<NLayers><<<constants::GPUBlocks, constants::GPUThreads >>> (
12981298 trackSeeds, // CellSeed*
12991299 foundTrackingFrameInfo, // TrackingFrameInfo**
13001300 unsortedClusters, // Cluster**
@@ -1355,7 +1355,7 @@ void computeTrackSeedHandler(TrackSeed<NLayers>* trackSeeds,
13551355 thrust::device_vector<float > minPts (minPtsHost);
13561356 thrust::device_vector<float > layerRadii (layerRadiiHost);
13571357 thrust::device_vector<float > layerxX0 (layerxX0Host);
1358- gpu::fitTrackSeedsKernel<NLayers><<<60 , 256 >>> (
1358+ gpu::fitTrackSeedsKernel<NLayers><<<constants::GPUBlocks, constants::GPUThreads >>> (
13591359 trackSeeds, // CellSeed*
13601360 foundTrackingFrameInfo, // TrackingFrameInfo**
13611361 unsortedClusters, // Cluster**
0 commit comments