File indexing completed on 2024-11-14 04:15:52
0001
0002 #ifdef DUMP_GPU_TK_TUPLES
0003 #include <mutex>
0004 #endif
0005
0006
0007 #include <alpaka/alpaka.hpp>
0008
0009
0010 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014
0015
0016 #include "CAFishbone.h"
0017 #include "CAHitNtupletGeneratorKernels.h"
0018 #include "CAHitNtupletGeneratorKernelsImpl.h"
0019
0020
0021
0022
0023 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0024
0025 template <typename TrackerTraits>
0026 CAHitNtupletGeneratorKernels<TrackerTraits>::CAHitNtupletGeneratorKernels(Params const ¶ms,
0027 uint32_t nhits,
0028 uint32_t offsetBPIX2,
0029 Queue &queue)
0030 : m_params(params),
0031
0032
0033
0034 counters_{cms::alpakatools::make_device_buffer<Counters>(queue)},
0035
0036
0037 device_hitToTuple_{cms::alpakatools::make_device_buffer<HitToTuple>(queue)},
0038 device_hitToTupleStorage_{
0039 cms::alpakatools::make_device_buffer<typename HitToTuple::Counter[]>(queue, nhits + 1)},
0040 device_tupleMultiplicity_{cms::alpakatools::make_device_buffer<TupleMultiplicity>(queue)},
0041
0042
0043 device_theCells_{
0044 cms::alpakatools::make_device_buffer<CACell[]>(queue, m_params.caParams_.maxNumberOfDoublets_)},
0045
0046 device_isOuterHitOfCell_{
0047 cms::alpakatools::make_device_buffer<OuterHitOfCellContainer[]>(queue, std::max(1u, nhits - offsetBPIX2))},
0048 isOuterHitOfCell_{cms::alpakatools::make_device_buffer<OuterHitOfCell>(queue)},
0049
0050 device_theCellNeighbors_{cms::alpakatools::make_device_buffer<CellNeighborsVector>(queue)},
0051 device_theCellTracks_{cms::alpakatools::make_device_buffer<CellTracksVector>(queue)},
0052
0053 cellStorage_{cms::alpakatools::make_device_buffer<unsigned char[]>(
0054 queue,
0055 TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors) +
0056 TrackerTraits::maxNumOfActiveDoublets * sizeof(CellTracks))},
0057 device_cellCuts_{cms::alpakatools::make_device_buffer<CellCuts>(queue)},
0058 device_theCellNeighborsContainer_{reinterpret_cast<CellNeighbors *>(cellStorage_.data())},
0059 device_theCellTracksContainer_{reinterpret_cast<CellTracks *>(
0060 cellStorage_.data() + TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors))},
0061
0062
0063 device_storage_{
0064 cms::alpakatools::make_device_buffer<cms::alpakatools::AtomicPairCounter::DoubleWord[]>(queue, 3u)},
0065 device_hitTuple_apc_{reinterpret_cast<cms::alpakatools::AtomicPairCounter *>(device_storage_.data())},
0066 device_hitToTuple_apc_{reinterpret_cast<cms::alpakatools::AtomicPairCounter *>(device_storage_.data() + 1)},
0067 device_nCells_{
0068 cms::alpakatools::make_device_view(queue, *reinterpret_cast<uint32_t *>(device_storage_.data() + 2))} {
0069 #ifdef GPU_DEBUG
0070 std::cout << "Allocation for tuple building. N hits " << nhits << std::endl;
0071 #endif
0072
0073 alpaka::memset(queue, counters_, 0);
0074 alpaka::memset(queue, device_nCells_, 0);
0075 alpaka::memset(queue, cellStorage_, 0);
0076
0077 auto cellCuts_h = cms::alpakatools::make_host_view(m_params.cellCuts_);
0078 alpaka::memcpy(queue, device_cellCuts_, cellCuts_h);
0079
0080 [[maybe_unused]] TupleMultiplicity *tupleMultiplicityDeviceData = device_tupleMultiplicity_.data();
0081 using TM = cms::alpakatools::OneToManyAssocRandomAccess<typename TrackerTraits::tindex_type,
0082 TrackerTraits::maxHitsOnTrack + 1,
0083 TrackerTraits::maxNumberOfTuples>;
0084 TM *tm = device_tupleMultiplicity_.data();
0085 TM::template launchZero<Acc1D>(tm, queue);
0086 TupleMultiplicity::template launchZero<Acc1D>(tupleMultiplicityDeviceData, queue);
0087
0088 device_hitToTupleView_.assoc = device_hitToTuple_.data();
0089 device_hitToTupleView_.offStorage = device_hitToTupleStorage_.data();
0090 device_hitToTupleView_.offSize = nhits + 1;
0091
0092 HitToTuple::template launchZero<Acc1D>(device_hitToTupleView_, queue);
0093 #ifdef GPU_DEBUG
0094 std::cout << "Allocations for CAHitNtupletGeneratorKernels: done!" << std::endl;
0095 #endif
0096 }
0097
0098 template <typename TrackerTraits>
0099 void CAHitNtupletGeneratorKernels<TrackerTraits>::launchKernels(const HitsConstView &hh,
0100 uint32_t offsetBPIX2,
0101 TkSoAView &tracks_view,
0102 Queue &queue) {
0103 using namespace caPixelDoublets;
0104 using namespace caHitNtupletGeneratorKernels;
0105
0106
0107 HitContainer::template launchZero<Acc1D>(&(tracks_view.hitIndices()), queue);
0108
0109 uint32_t nhits = hh.metadata().size();
0110
0111 #ifdef NTUPLE_DEBUG
0112 std::cout << "start tuple building. N hits " << nhits << std::endl;
0113 if (nhits < 2)
0114 std::cout << "too few hits " << nhits << std::endl;
0115 #endif
0116
0117
0118
0119
0120
0121 const auto nthTot = 64;
0122 const auto stride = 4;
0123 auto blockSize = nthTot / stride;
0124 auto numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize);
0125 const auto rescale = numberOfBlocks / 65536;
0126 blockSize *= (rescale + 1);
0127 numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize);
0128 assert(numberOfBlocks < 65536);
0129 assert(blockSize > 0 && 0 == blockSize % 16);
0130 const Vec2D blks{numberOfBlocks, 1u};
0131 const Vec2D thrs{blockSize, stride};
0132 const auto kernelConnectWorkDiv = cms::alpakatools::make_workdiv<Acc2D>(blks, thrs);
0133
0134 alpaka::exec<Acc2D>(queue,
0135 kernelConnectWorkDiv,
0136 Kernel_connect<TrackerTraits>{},
0137 this->device_hitTuple_apc_,
0138 this->device_hitToTuple_apc_,
0139 hh,
0140 this->device_theCells_.data(),
0141 this->device_nCells_.data(),
0142 this->device_theCellNeighbors_.data(),
0143 this->isOuterHitOfCell_.data(),
0144 this->m_params.caParams_);
0145
0146
0147 if (this->m_params.earlyFishbone_ and nhits > offsetBPIX2) {
0148 const auto nthTot = 128;
0149 const auto stride = 16;
0150 const auto blockSize = nthTot / stride;
0151 const auto numberOfBlocks = cms::alpakatools::divide_up_by(nhits - offsetBPIX2, blockSize);
0152 const Vec2D blks{numberOfBlocks, 1u};
0153 const Vec2D thrs{blockSize, stride};
0154 const auto fishboneWorkDiv = cms::alpakatools::make_workdiv<Acc2D>(blks, thrs);
0155 alpaka::exec<Acc2D>(queue,
0156 fishboneWorkDiv,
0157 CAFishbone<TrackerTraits>{},
0158 hh,
0159 this->device_theCells_.data(),
0160 this->device_nCells_.data(),
0161 this->isOuterHitOfCell_.data(),
0162 nhits,
0163 false);
0164 }
0165 blockSize = 64;
0166 numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize);
0167 auto workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0168 alpaka::exec<Acc1D>(queue,
0169 workDiv1D,
0170 Kernel_find_ntuplets<TrackerTraits>{},
0171 hh,
0172 tracks_view,
0173 this->device_theCells_.data(),
0174 this->device_nCells_.data(),
0175 this->device_theCellTracks_.data(),
0176 this->device_hitTuple_apc_,
0177 this->m_params.caParams_);
0178 #ifdef GPU_DEBUG
0179 alpaka::wait(queue);
0180 #endif
0181
0182 if (this->m_params.doStats_)
0183 alpaka::exec<Acc1D>(queue,
0184 workDiv1D,
0185 Kernel_mark_used<TrackerTraits>{},
0186 this->device_theCells_.data(),
0187 this->device_nCells_.data());
0188
0189 #ifdef GPU_DEBUG
0190 alpaka::wait(queue);
0191 #endif
0192
0193 blockSize = 128;
0194 numberOfBlocks = cms::alpakatools::divide_up_by(HitContainer{}.totOnes(), blockSize);
0195 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0196
0197 alpaka::exec<Acc1D>(
0198 queue, workDiv1D, typename HitContainer::finalizeBulk{}, this->device_hitTuple_apc_, &tracks_view.hitIndices());
0199
0200 #ifdef GPU_DEBUG
0201 alpaka::wait(queue);
0202 #endif
0203
0204 alpaka::exec<Acc1D>(queue, workDiv1D, Kernel_fillHitDetIndices<TrackerTraits>{}, tracks_view, hh);
0205
0206 #ifdef GPU_DEBUG
0207 alpaka::wait(queue);
0208 #endif
0209 alpaka::exec<Acc1D>(queue, workDiv1D, Kernel_fillNLayers<TrackerTraits>{}, tracks_view, this->device_hitTuple_apc_);
0210
0211 #ifdef GPU_DEBUG
0212 alpaka::wait(queue);
0213 #endif
0214
0215
0216 numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize);
0217 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0218
0219 alpaka::exec<Acc1D>(queue,
0220 workDiv1D,
0221 Kernel_earlyDuplicateRemover<TrackerTraits>{},
0222 this->device_theCells_.data(),
0223 this->device_nCells_.data(),
0224 tracks_view,
0225 this->m_params.dupPassThrough_);
0226 #ifdef GPU_DEBUG
0227 alpaka::wait(queue);
0228 #endif
0229
0230 blockSize = 128;
0231 numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfTuples / 4, blockSize);
0232 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0233
0234 alpaka::exec<Acc1D>(queue,
0235 workDiv1D,
0236 Kernel_countMultiplicity<TrackerTraits>{},
0237 tracks_view,
0238 this->device_tupleMultiplicity_.data());
0239 TupleMultiplicity::template launchFinalize<Acc1D>(this->device_tupleMultiplicity_.data(), queue);
0240
0241 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0242 alpaka::exec<Acc1D>(
0243 queue, workDiv1D, Kernel_fillMultiplicity<TrackerTraits>{}, tracks_view, this->device_tupleMultiplicity_.data());
0244 #ifdef GPU_DEBUG
0245 alpaka::wait(queue);
0246 #endif
0247
0248 if (this->m_params.lateFishbone_ and nhits > offsetBPIX2) {
0249 const auto nthTot = 128;
0250 const auto stride = 16;
0251 const auto blockSize = nthTot / stride;
0252 const auto numberOfBlocks = cms::alpakatools::divide_up_by(nhits - offsetBPIX2, blockSize);
0253 const Vec2D blks{numberOfBlocks, 1u};
0254 const Vec2D thrs{blockSize, stride};
0255 const auto workDiv2D = cms::alpakatools::make_workdiv<Acc2D>(blks, thrs);
0256
0257 alpaka::exec<Acc2D>(queue,
0258 workDiv2D,
0259 CAFishbone<TrackerTraits>{},
0260 hh,
0261 this->device_theCells_.data(),
0262 this->device_nCells_.data(),
0263 this->isOuterHitOfCell_.data(),
0264 nhits,
0265 true);
0266 }
0267
0268 #ifdef GPU_DEBUG
0269 alpaka::wait(queue);
0270 #endif
0271 }
0272
0273 template <typename TrackerTraits>
0274 void CAHitNtupletGeneratorKernels<TrackerTraits>::buildDoublets(const HitsConstView &hh,
0275 uint32_t offsetBPIX2,
0276 Queue &queue) {
0277 using namespace caPixelDoublets;
0278 using CACell = CACellT<TrackerTraits>;
0279 using OuterHitOfCell = typename CACell::OuterHitOfCell;
0280 using CellNeighbors = typename CACell::CellNeighbors;
0281 using CellTracks = typename CACell::CellTracks;
0282 using OuterHitOfCellContainer = typename CACell::OuterHitOfCellContainer;
0283
0284 auto nhits = hh.metadata().size();
0285 #ifdef NTUPLE_DEBUG
0286 std::cout << "building Doublets out of " << nhits << " Hits" << std::endl;
0287 #endif
0288
0289 #ifdef GPU_DEBUG
0290 alpaka::wait(queue);
0291 #endif
0292
0293
0294 ALPAKA_ASSERT_ACC(this->device_isOuterHitOfCell_.data());
0295
0296 alpaka::exec<Acc1D>(
0297 queue,
0298 cms::alpakatools::make_workdiv<Acc1D>(1, 1),
0299 [] ALPAKA_FN_ACC(Acc1D const &acc,
0300 OuterHitOfCell *isOuterHitOfCell,
0301 OuterHitOfCellContainer *container,
0302 int32_t const *offset) {
0303
0304 isOuterHitOfCell->container = container;
0305 isOuterHitOfCell->offset = *offset;
0306 },
0307 this->isOuterHitOfCell_.data(),
0308 this->device_isOuterHitOfCell_.data(),
0309 &hh.offsetBPIX2());
0310
0311 {
0312 int threadsPerBlock = 128;
0313
0314 int blocks = std::max(1u, cms::alpakatools::divide_up_by(nhits - offsetBPIX2, threadsPerBlock));
0315 const auto workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlock);
0316
0317 alpaka::exec<Acc1D>(queue,
0318 workDiv1D,
0319 InitDoublets<TrackerTraits>{},
0320 this->isOuterHitOfCell_.data(),
0321 nhits,
0322 this->device_theCellNeighbors_.data(),
0323 this->device_theCellNeighborsContainer_,
0324 this->device_theCellTracks_.data(),
0325 this->device_theCellTracksContainer_);
0326 }
0327
0328 #ifdef GPU_DEBUG
0329 alpaka::wait(queue);
0330 #endif
0331
0332 if (0 == nhits)
0333 return;
0334
0335
0336 auto nActualPairs = this->m_params.nPairs();
0337
0338 const int stride = 4;
0339 const int threadsPerBlock = TrackerTraits::getDoubletsFromHistoMaxBlockSize / stride;
0340 int blocks = (4 * nhits + threadsPerBlock - 1) / threadsPerBlock;
0341 const Vec2D blks{blocks, 1u};
0342 const Vec2D thrs{threadsPerBlock, stride};
0343 const auto workDiv2D = cms::alpakatools::make_workdiv<Acc2D>(blks, thrs);
0344
0345 alpaka::exec<Acc2D>(queue,
0346 workDiv2D,
0347 GetDoubletsFromHisto<TrackerTraits>{},
0348 this->device_theCells_.data(),
0349 this->device_nCells_.data(),
0350 this->device_theCellNeighbors_.data(),
0351 this->device_theCellTracks_.data(),
0352 hh,
0353 this->isOuterHitOfCell_.data(),
0354 nActualPairs,
0355 this->m_params.caParams_.maxNumberOfDoublets_,
0356 this->m_params.cellCuts_);
0357
0358 #ifdef GPU_DEBUG
0359 alpaka::wait(queue);
0360 #endif
0361 }
0362
0363 template <typename TrackerTraits>
0364 void CAHitNtupletGeneratorKernels<TrackerTraits>::classifyTuples(const HitsConstView &hh,
0365 TkSoAView &tracks_view,
0366 Queue &queue) {
0367 using namespace caHitNtupletGeneratorKernels;
0368
0369 uint32_t nhits = hh.metadata().size();
0370
0371 auto blockSize = 64;
0372
0373
0374 auto numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfQuadruplets / 4, blockSize);
0375 auto workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0376 alpaka::exec<Acc1D>(
0377 queue, workDiv1D, Kernel_classifyTracks<TrackerTraits>{}, tracks_view, this->m_params.qualityCuts_);
0378
0379 if (this->m_params.lateFishbone_) {
0380
0381 numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize);
0382 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0383 alpaka::exec<Acc1D>(queue,
0384 workDiv1D,
0385 Kernel_fishboneCleaner<TrackerTraits>{},
0386 this->device_theCells_.data(),
0387 this->device_nCells_.data(),
0388 tracks_view);
0389 }
0390
0391
0392 numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize);
0393 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0394 alpaka::exec<Acc1D>(queue,
0395 workDiv1D,
0396 Kernel_fastDuplicateRemover<TrackerTraits>{},
0397 this->device_theCells_.data(),
0398 this->device_nCells_.data(),
0399 tracks_view,
0400 this->m_params.dupPassThrough_);
0401 #ifdef GPU_DEBUG
0402 alpaka::wait(queue);
0403 #endif
0404
0405 if (this->m_params.doSharedHitCut_ || this->m_params.doStats_) {
0406
0407 numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfQuadruplets / 4, blockSize);
0408 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0409 alpaka::exec<Acc1D>(queue,
0410 workDiv1D,
0411 Kernel_countHitInTracks<TrackerTraits>{},
0412 tracks_view,
0413 this->device_hitToTuple_.data());
0414
0415 HitToTuple::template launchFinalize<Acc1D>(this->device_hitToTupleView_, queue);
0416 alpaka::exec<Acc1D>(
0417 queue, workDiv1D, Kernel_fillHitInTracks<TrackerTraits>{}, tracks_view, this->device_hitToTuple_.data());
0418 #ifdef GPU_DEBUG
0419 alpaka::wait(queue);
0420 #endif
0421 }
0422
0423 if (this->m_params.doSharedHitCut_) {
0424
0425 numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfQuadruplets / 4,
0426 blockSize);
0427 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0428 alpaka::exec<Acc1D>(queue,
0429 workDiv1D,
0430 Kernel_rejectDuplicate<TrackerTraits>{},
0431 tracks_view,
0432 this->m_params.minHitsForSharingCut_,
0433 this->m_params.dupPassThrough_,
0434 this->device_hitToTuple_.data());
0435
0436 alpaka::exec<Acc1D>(queue,
0437 workDiv1D,
0438 Kernel_sharedHitCleaner<TrackerTraits>{},
0439 hh,
0440 tracks_view,
0441 this->m_params.minHitsForSharingCut_,
0442 this->m_params.dupPassThrough_,
0443 this->device_hitToTuple_.data());
0444
0445 if (this->m_params.useSimpleTripletCleaner_) {
0446
0447 numberOfBlocks = cms::alpakatools::divide_up_by(HitToTuple{}.capacity(), blockSize);
0448 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0449 alpaka::exec<Acc1D>(queue,
0450 workDiv1D,
0451 Kernel_simpleTripletCleaner<TrackerTraits>{},
0452 tracks_view,
0453 this->m_params.minHitsForSharingCut_,
0454 this->m_params.dupPassThrough_,
0455 this->device_hitToTuple_.data());
0456 } else {
0457 numberOfBlocks = cms::alpakatools::divide_up_by(HitToTuple{}.capacity(), blockSize);
0458 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0459 alpaka::exec<Acc1D>(queue,
0460 workDiv1D,
0461 Kernel_tripletCleaner<TrackerTraits>{},
0462 tracks_view,
0463 this->m_params.minHitsForSharingCut_,
0464 this->m_params.dupPassThrough_,
0465 this->device_hitToTuple_.data());
0466 }
0467 #ifdef GPU_DEBUG
0468 alpaka::wait(queue);
0469 #endif
0470 }
0471
0472 if (this->m_params.doStats_) {
0473 numberOfBlocks =
0474 cms::alpakatools::divide_up_by(std::max(nhits, m_params.caParams_.maxNumberOfDoublets_), blockSize);
0475 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0476
0477 alpaka::exec<Acc1D>(queue,
0478 workDiv1D,
0479 Kernel_checkOverflows<TrackerTraits>{},
0480 tracks_view,
0481 this->device_tupleMultiplicity_.data(),
0482 this->device_hitToTuple_.data(),
0483 this->device_hitTuple_apc_,
0484 this->device_theCells_.data(),
0485 this->device_nCells_.data(),
0486 this->device_theCellNeighbors_.data(),
0487 this->device_theCellTracks_.data(),
0488 this->isOuterHitOfCell_.data(),
0489 nhits,
0490 this->m_params.caParams_.maxNumberOfDoublets_,
0491 this->counters_.data());
0492 }
0493
0494 if (this->m_params.doStats_) {
0495
0496
0497 numberOfBlocks = cms::alpakatools::divide_up_by(HitToTuple{}.capacity(), blockSize);
0498 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0499 alpaka::exec<Acc1D>(queue,
0500 workDiv1D,
0501 Kernel_doStatsForHitInTracks<TrackerTraits>{},
0502 this->device_hitToTuple_.data(),
0503 this->counters_.data());
0504
0505 numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfQuadruplets / 4, blockSize);
0506 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0507 alpaka::exec<Acc1D>(
0508 queue, workDiv1D, Kernel_doStatsForTracks<TrackerTraits>{}, tracks_view, this->counters_.data());
0509 }
0510 #ifdef GPU_DEBUG
0511 alpaka::wait(queue);
0512 #endif
0513
0514 #ifdef DUMP_GPU_TK_TUPLES
0515 static std::atomic<int> iev(0);
0516 static std::mutex lock;
0517 workDiv1D = cms::alpakatools::make_workdiv<Acc1D>(1u, 32u);
0518 {
0519 std::lock_guard<std::mutex> guard(lock);
0520 ++iev;
0521 for (int k = 0; k < 20000; k += 500) {
0522 alpaka::exec<Acc1D>(queue,
0523 workDiv1D,
0524 Kernel_print_found_ntuplets<TrackerTraits>{},
0525 hh,
0526 tracks_view,
0527 this->device_hitToTuple_.data(),
0528 k,
0529 k + 500,
0530 iev);
0531 alpaka::wait(queue);
0532 }
0533 alpaka::exec<Acc1D>(queue,
0534 workDiv1D,
0535 Kernel_print_found_ntuplets<TrackerTraits>{},
0536 hh,
0537 tracks_view,
0538 this->device_hitToTuple_.data(),
0539 20000,
0540 1000000,
0541 iev);
0542
0543 alpaka::wait(queue);
0544 }
0545 #endif
0546 }
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557 template class CAHitNtupletGeneratorKernels<pixelTopology::Phase1>;
0558 template class CAHitNtupletGeneratorKernels<pixelTopology::Phase2>;
0559 template class CAHitNtupletGeneratorKernels<pixelTopology::HIonPhase1>;
0560
0561 }