File indexing completed on 2025-07-03 00:42:36
0001
0002 #include <algorithm>
0003 #include <cassert>
0004 #include <cstdint>
0005 #include <cstdio>
0006 #include <type_traits>
0007
0008
0009 #include <alpaka/alpaka.hpp>
0010
0011
0012 #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h"
0013 #include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h"
0014 #include "DataFormats/DetId/interface/DetId.h"
0015 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0016 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0017 #include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h"
0018 #include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h"
0019 #include "DataFormats/SiPixelDigi/interface/SiPixelDigiConstants.h"
0020 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h"
0021 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0022 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h"
0023 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h"
0024 #include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h"
0025 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0026 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0027 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0028 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0029 #include "HeterogeneousCore/AlpakaInterface/interface/warpsize.h"
0030 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0031 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
0032
0033
0034 #include "CalibPixel.h"
0035 #include "ClusterChargeCut.h"
0036 #include "PixelClustering.h"
0037 #include "SiPixelRawToClusterKernel.h"
0038
0039
0040
0041 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0042 namespace pixelDetails {
0043
0044 ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) {
0045 return (PixelSubdetector::PixelBarrel == ((rawId >> DetId::kSubdetOffset) & DetId::kSubdetMask));
0046 }
0047
0048 ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap,
0049 uint8_t fed,
0050 uint32_t link,
0051 uint32_t roc) {
0052 using namespace ::pixelDetails;
0053 uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc;
0054 DetIdGPU detId = {cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]};
0055 return detId;
0056 }
0057
0058
0059
0060
0061 ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(
0062 bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) {
0063 int slopeRow = 0, slopeCol = 0;
0064 int rowOffset = 0, colOffset = 0;
0065
0066 if (bpix) {
0067 if (side == -1 && layer != 1) {
0068 if (rocIdInDetUnit < 8) {
0069 slopeRow = 1;
0070 slopeCol = -1;
0071 rowOffset = 0;
0072 colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0073 } else {
0074 slopeRow = -1;
0075 slopeCol = 1;
0076 rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0077 colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0078 }
0079 } else {
0080 if (rocIdInDetUnit < 8) {
0081 slopeRow = -1;
0082 slopeCol = 1;
0083 rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0084 colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc;
0085 } else {
0086 slopeRow = 1;
0087 slopeCol = -1;
0088 rowOffset = 0;
0089 colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0090 }
0091 }
0092
0093 } else {
0094 if (side == -1) {
0095 if (rocIdInDetUnit < 8) {
0096 slopeRow = 1;
0097 slopeCol = -1;
0098 rowOffset = 0;
0099 colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0100 } else {
0101 slopeRow = -1;
0102 slopeCol = 1;
0103 rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0104 colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0105 }
0106 } else {
0107 if (rocIdInDetUnit < 8) {
0108 slopeRow = 1;
0109 slopeCol = -1;
0110 rowOffset = 0;
0111 colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0112 } else {
0113 slopeRow = -1;
0114 slopeCol = 1;
0115 rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0116 colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0117 }
0118
0119 }
0120 }
0121
0122 uint32_t gRow = rowOffset + slopeRow * local.row;
0123 uint32_t gCol = colOffset + slopeCol * local.col;
0124
0125 ::pixelDetails::Pixel global = {gRow, gCol};
0126 return global;
0127 }
0128
0129
0130 template <bool debug = false>
0131 ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status) {
0132 uint8_t errorType = 0;
0133
0134 switch (status) {
0135 case 1: {
0136 if constexpr (debug)
0137 printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId);
0138 errorType = 35;
0139 break;
0140 }
0141 case 2: {
0142 if constexpr (debug)
0143 printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId);
0144 errorType = 36;
0145 break;
0146 }
0147 case 3: {
0148 if constexpr (debug)
0149 printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId);
0150 errorType = 37;
0151 break;
0152 }
0153 case 4: {
0154 if constexpr (debug)
0155 printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId);
0156 errorType = 38;
0157 break;
0158 }
0159 default:
0160 if constexpr (debug)
0161 printf("Cabling check returned unexpected result, status = %i\n", status);
0162 };
0163
0164 return errorType;
0165 }
0166
0167 ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) {
0168
0169 return ((rocRow < ::pixelDetails::numRowsInRoc) & (rocCol < ::pixelDetails::numColsInRoc));
0170 }
0171
0172 ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); }
0173
0174
0175 template <bool debug = false>
0176 ALPAKA_FN_ACC uint8_t
0177 checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap) {
0178 uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask;
0179 if (errorType < 25)
0180 return 0;
0181 bool errorFound = false;
0182
0183 switch (errorType) {
0184 case 25: {
0185 errorFound = true;
0186 uint32_t index =
0187 fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + 1;
0188 if (index > 1 && index <= cablingMap.size()) {
0189 if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index]))
0190 errorFound = false;
0191 }
0192 if constexpr (debug)
0193 if (errorFound)
0194 printf("Invalid ROC = 25 found (errorType = 25)\n");
0195 break;
0196 }
0197 case 26: {
0198 if constexpr (debug)
0199 printf("Gap word found (errorType = 26)\n");
0200 break;
0201 }
0202 case 27: {
0203 if constexpr (debug)
0204 printf("Dummy word found (errorType = 27)\n");
0205 break;
0206 }
0207 case 28: {
0208 if constexpr (debug)
0209 printf("Error fifo nearly full (errorType = 28)\n");
0210 errorFound = true;
0211 break;
0212 }
0213 case 29: {
0214 if constexpr (debug)
0215 printf("Timeout on a channel (errorType = 29)\n");
0216 if (!((errorWord >> sipixelconstants::OMIT_ERR_shift) & sipixelconstants::OMIT_ERR_mask)) {
0217 if constexpr (debug)
0218 printf("...2nd errorType=29 error, skip\n");
0219 break;
0220 }
0221 errorFound = true;
0222 break;
0223 }
0224 case 30: {
0225 if constexpr (debug)
0226 printf("TBM error trailer (errorType = 30)\n");
0227 int stateMatch_bits = 4;
0228 int stateMatch_shift = 8;
0229 uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits);
0230 int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask;
0231 if (stateMatch != 1 && stateMatch != 8) {
0232 if constexpr (debug)
0233 printf("FED error 30 with unexpected State Bits (errorType = 30)\n");
0234 break;
0235 }
0236 if (stateMatch == 1)
0237 errorType = 40;
0238 errorFound = true;
0239 break;
0240 }
0241 case 31: {
0242 if constexpr (debug)
0243 printf("Event number error (errorType = 31)\n");
0244 errorFound = true;
0245 break;
0246 }
0247 default:
0248 errorFound = false;
0249 };
0250
0251 return errorFound ? errorType : 0;
0252 }
0253
0254
0255 template <bool debug = false>
0256 ALPAKA_FN_ACC uint32_t
0257 getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap) {
0258 uint32_t rID = 0xffffffff;
0259
0260 switch (errorType) {
0261 case 25:
0262 case 29:
0263 case 30:
0264 case 31:
0265 case 36:
0266 case 40: {
0267 uint32_t roc = 1;
0268 uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0269 uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0270 if (rID_temp != ::pixelClustering::invalidModuleId)
0271 rID = rID_temp;
0272 break;
0273 }
0274 case 37:
0275 case 38: {
0276 uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask;
0277 uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0278 uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0279 if (rID_temp != ::pixelClustering::invalidModuleId)
0280 rID = rID_temp;
0281 break;
0282 }
0283 default:
0284 break;
0285 };
0286
0287 return rID;
0288 }
0289
0290
0291 template <bool debug = false>
0292 struct RawToDigi_kernel {
0293 ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0294 const SiPixelMappingSoAConstView &cablingMap,
0295 const unsigned char *modToUnp,
0296 const uint32_t wordCounter,
0297 const uint32_t *word,
0298 const uint8_t *fedIds,
0299 SiPixelDigisSoAView digisView,
0300 SiPixelDigiErrorsSoAView err,
0301 bool useQualityInfo,
0302 bool includeErrors) const {
0303
0304 if (cms::alpakatools::once_per_grid(acc))
0305 err.size() = 0;
0306
0307 for (auto gIndex : cms::alpakatools::uniform_elements(acc, wordCounter)) {
0308 auto dvgi = digisView[gIndex];
0309 dvgi.xx() = 0;
0310 dvgi.yy() = 0;
0311 dvgi.adc() = 0;
0312
0313
0314 err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0};
0315
0316 uint8_t fedId = fedIds[gIndex / 2];
0317
0318
0319 dvgi.pdigi() = 0;
0320 dvgi.rawIdArr() = 0;
0321 dvgi.moduleId() = ::pixelClustering::invalidModuleId;
0322
0323 uint32_t ww = word[gIndex];
0324 if (ww == 0) {
0325
0326 continue;
0327 }
0328
0329 uint32_t link = sipixelconstants::getLink(ww);
0330 uint32_t roc = sipixelconstants::getROC(ww);
0331
0332 uint8_t errorType = checkROC<debug>(ww, fedId, link, cablingMap);
0333 bool skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0);
0334 if (includeErrors and skipROC) {
0335 uint32_t rawId = getErrRawID<debug>(fedId, ww, errorType, cablingMap);
0336 if (rawId != 0xffffffff)
0337 {
0338 err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, errorType, fedId};
0339 alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0340 }
0341 continue;
0342 }
0343
0344
0345 if (roc > ::pixelDetails::MAX_ROC or link > ::pixelDetails::MAX_LINK) {
0346 uint32_t rawId = getRawId(cablingMap, fedId, link, 1).rawId;
0347 if constexpr (debug) {
0348 printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex);
0349 }
0350 if (roc > ::pixelDetails::MAX_ROC and roc < 25) {
0351 uint8_t error = conversionError<debug>(fedId, 2);
0352 err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0353 alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0354 }
0355 continue;
0356 }
0357
0358 uint32_t index =
0359 fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + roc;
0360 if (useQualityInfo) {
0361 skipROC = cablingMap.badRocs()[index];
0362 if (skipROC)
0363 continue;
0364 }
0365 skipROC = modToUnp[index];
0366 if (skipROC)
0367 continue;
0368
0369 ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc);
0370 uint32_t rawId = detId.rawId;
0371 uint32_t layer = 0;
0372 int side = 0, panel = 0, module = 0;
0373 bool barrel = isBarrel(rawId);
0374
0375 if (barrel) {
0376 layer = (rawId >> ::pixelDetails::layerStartBit) & ::pixelDetails::layerMask;
0377 module = (rawId >> ::pixelDetails::moduleStartBit) & ::pixelDetails::moduleMask;
0378 side = (module < 5) ? -1 : 1;
0379 } else {
0380
0381 layer = 0;
0382 panel = (rawId >> ::pixelDetails::panelStartBit) & ::pixelDetails::panelMask;
0383 side = (panel == 1) ? -1 : 1;
0384 }
0385
0386 ::pixelDetails::Pixel localPix;
0387 if (layer == 1) {
0388
0389 uint32_t col = sipixelconstants::getCol(ww);
0390 uint32_t row = sipixelconstants::getRow(ww);
0391 localPix.row = row;
0392 localPix.col = col;
0393 if (includeErrors and not rocRowColIsValid(row, col)) {
0394 uint8_t error = conversionError<debug>(fedId, 3);
0395 err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0396 alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0397 if constexpr (debug)
0398 printf("BPIX1 Error status: %i\n", error);
0399 continue;
0400 }
0401 } else {
0402
0403 uint32_t dcol = sipixelconstants::getDCol(ww);
0404 uint32_t pxid = sipixelconstants::getPxId(ww);
0405 uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2;
0406 uint32_t col = dcol * 2 + pxid % 2;
0407 localPix.row = row;
0408 localPix.col = col;
0409 if (includeErrors and not dcolIsValid(dcol, pxid)) {
0410 uint8_t error = conversionError<debug>(fedId, 3);
0411 err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0412 alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0413 if constexpr (debug)
0414 printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc);
0415 continue;
0416 }
0417 }
0418
0419 ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, detId.rocInDet, localPix);
0420 dvgi.xx() = globalPix.row;
0421 dvgi.yy() = globalPix.col;
0422 dvgi.adc() = sipixelconstants::getADC(ww);
0423 dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc());
0424 dvgi.moduleId() = detId.moduleId;
0425 dvgi.rawIdArr() = rawId;
0426 }
0427
0428 }
0429 };
0430
0431
0432 template <typename TrackerTraits>
0433 struct ShowHitsModuleStart {
0434 template <typename TAcc>
0435 ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const {
0436 if (cms::alpakatools::once_per_grid(acc)) {
0437 for (int i = 0; i < TrackerTraits::numberOfModules; i++)
0438 printf("%d \n", clus_view[i].clusModuleStart());
0439 }
0440 }
0441 };
0442
0443
0444 template <typename TrackerTraits>
0445 void SiPixelRawToClusterKernel<TrackerTraits>::makePhase1ClustersAsync(
0446 Queue &queue,
0447 const SiPixelClusterThresholds clusterThresholds,
0448 const SiPixelMappingSoAConstView &cablingMap,
0449 const unsigned char *modToUnp,
0450 const SiPixelGainCalibrationForHLTSoAConstView &gains,
0451 const WordFedAppender &wordFed,
0452 const uint32_t wordCounter,
0453 const uint32_t fedCounter,
0454 bool useQualityInfo,
0455 bool includeErrors,
0456 bool debug) {
0457 nDigis = wordCounter;
0458
0459 #ifdef GPU_DEBUG
0460 std::cout << "decoding " << wordCounter << " digis." << std::endl;
0461 #endif
0462 constexpr int numberOfModules = TrackerTraits::numberOfModules;
0463 digis_d = SiPixelDigisSoACollection(wordCounter, queue);
0464 if (includeErrors) {
0465 digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue);
0466 }
0467 clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0468
0469 if (wordCounter) {
0470 const int threadsPerBlockOrElementsPerThread =
0471 cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? 32 : 512;
0472
0473 const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread);
0474 const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0475 assert(0 == wordCounter % 2);
0476
0477 auto word_d = cms::alpakatools::make_device_buffer<uint32_t[]>(queue, wordCounter);
0478
0479
0480
0481 auto fedId_d = cms::alpakatools::make_device_buffer<uint8_t[]>(queue, wordCounter / 2);
0482 alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter);
0483 alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2);
0484
0485 if (debug) {
0486 alpaka::exec<Acc1D>(queue,
0487 workDiv,
0488 RawToDigi_kernel<true>{},
0489 cablingMap,
0490 modToUnp,
0491 wordCounter,
0492 word_d.data(),
0493 fedId_d.data(),
0494 digis_d->view(),
0495 digiErrors_d->view(),
0496 useQualityInfo,
0497 includeErrors);
0498 } else {
0499 alpaka::exec<Acc1D>(queue,
0500 workDiv,
0501 RawToDigi_kernel<false>{},
0502 cablingMap,
0503 modToUnp,
0504 wordCounter,
0505 word_d.data(),
0506 fedId_d.data(),
0507 digis_d->view(),
0508 digiErrors_d->view(),
0509 useQualityInfo,
0510 includeErrors);
0511 }
0512
0513 #ifdef GPU_DEBUG
0514 alpaka::wait(queue);
0515 std::cout << "RawToDigi_kernel was run smoothly!" << std::endl;
0516 #endif
0517 }
0518
0519
0520 {
0521
0522 using namespace pixelClustering;
0523
0524 using namespace calibPixel;
0525 const int threadsPerBlockOrElementsPerThread = []() {
0526 if constexpr (std::is_same_v<Device, alpaka_common::DevHost>) {
0527
0528 return 32;
0529 } else {
0530 return 256;
0531 }
0532 }();
0533 const auto blocks = cms::alpakatools::divide_up_by(std::max<int>(wordCounter, numberOfModules),
0534 threadsPerBlockOrElementsPerThread);
0535 const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0536
0537 if (debug) {
0538 alpaka::exec<Acc1D>(queue,
0539 workDiv,
0540 CalibDigis<true>{},
0541 clusterThresholds,
0542 digis_d->view(),
0543 clusters_d->view(),
0544 gains,
0545 wordCounter);
0546 } else {
0547 alpaka::exec<Acc1D>(queue,
0548 workDiv,
0549 CalibDigis<false>{},
0550 clusterThresholds,
0551 digis_d->view(),
0552 clusters_d->view(),
0553 gains,
0554 wordCounter);
0555 }
0556 #ifdef GPU_DEBUG
0557 alpaka::wait(queue);
0558 std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0559 << " threadsPerBlockOrElementsPerThread\n";
0560 #endif
0561
0562 alpaka::exec<Acc1D>(
0563 queue, workDiv, CountModules<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0564
0565 auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u);
0566 alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0567
0568 const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0569 const auto workDivMaxNumModules =
0570 cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0571 #ifdef GPU_DEBUG
0572 std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0573 << " threadsPerBlockOrElementsPerThread\n";
0574 #endif
0575 alpaka::exec<Acc1D>(
0576 queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0577 #ifdef GPU_DEBUG
0578 alpaka::wait(queue);
0579 #endif
0580
0581 constexpr auto threadsPerBlockChargeCut = 256;
0582 const auto workDivChargeCut = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockChargeCut);
0583
0584 alpaka::exec<Acc1D>(queue,
0585 workDivChargeCut,
0586 ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0587 digis_d->view(),
0588 clusters_d->view(),
0589 clusterThresholds,
0590 wordCounter);
0591
0592
0593
0594
0595
0596 constexpr auto threadsPrefixScan = 1024;
0597 constexpr auto blocksPrefixScan = (TrackerTraits::numberOfModules + threadsPrefixScan - 1) / threadsPrefixScan;
0598 auto workDivPrefixScan = cms::alpakatools::make_workdiv<Acc1D>(blocksPrefixScan, threadsPrefixScan);
0599 auto bCounter = cms::alpakatools::make_device_buffer<int32_t>(queue);
0600 alpaka::memset(queue, bCounter, 0);
0601
0602 alpaka::exec<Acc1D>(queue,
0603 workDivPrefixScan,
0604 cms::alpakatools::multiBlockPrefixScan<uint32_t>(),
0605 clusters_d->view().clusInModule(),
0606 clusters_d->view().clusModuleStart() + 1,
0607 TrackerTraits::numberOfModules,
0608 blocksPrefixScan,
0609 bCounter.data(),
0610 alpaka::getPreferredWarpSize(alpaka::getDev(queue)));
0611
0612
0613 const auto clusModuleStartLastElement =
0614 cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0615 constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0616
0617 const auto bpix2ClusterStart =
0618 cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0619 auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0620 alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0621
0622 auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0623 alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0624
0625 #ifdef GPU_DEBUG
0626 alpaka::wait(queue);
0627 std::cout << "SiPixelClusterizerAlpaka results:" << std::endl
0628 << " > no. of digis: " << nDigis << std::endl
0629 << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0630 << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0631 << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0632 #endif
0633
0634 }
0635 }
0636
0637 template <typename TrackerTraits>
0638 void SiPixelRawToClusterKernel<TrackerTraits>::makePhase2ClustersAsync(
0639 Queue &queue,
0640 const SiPixelClusterThresholds clusterThresholds,
0641 SiPixelDigisSoAView &digis_view,
0642 const uint32_t numDigis) {
0643 using namespace pixelClustering;
0644 using pixelTopology::Phase2;
0645 nDigis = numDigis;
0646 constexpr int numberOfModules = pixelTopology::Phase2::numberOfModules;
0647 clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0648 const auto threadsPerBlockOrElementsPerThread = 512;
0649 const auto blocks =
0650 cms::alpakatools::divide_up_by(std::max<int>(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread);
0651 const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0652
0653 alpaka::exec<Acc1D>(
0654 queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis);
0655
0656 #ifdef GPU_DEBUG
0657 alpaka::wait(queue);
0658 std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0659 << " threadsPerBlockOrElementsPerThread\n";
0660 #endif
0661 alpaka::exec<Acc1D>(
0662 queue, workDiv, CountModules<pixelTopology::Phase2>{}, digis_view, clusters_d->view(), numDigis);
0663
0664 auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u);
0665 alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0666
0667 const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0668 const auto workDivMaxNumModules =
0669 cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0670 #ifdef GPU_DEBUG
0671 alpaka::wait(queue);
0672 std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0673 << " threadsPerBlockOrElementsPerThread\n";
0674 #endif
0675 alpaka::exec<Acc1D>(
0676 queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_view, clusters_d->view(), numDigis);
0677 #ifdef GPU_DEBUG
0678 alpaka::wait(queue);
0679 #endif
0680
0681
0682 alpaka::exec<Acc1D>(queue,
0683 workDivMaxNumModules,
0684 ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0685 digis_view,
0686 clusters_d->view(),
0687 clusterThresholds,
0688 numDigis);
0689
0690
0691
0692
0693
0694
0695 constexpr auto threadsPrefixScan = 1024;
0696 constexpr auto blocksPrefixScan = (TrackerTraits::numberOfModules + threadsPrefixScan - 1) / threadsPrefixScan;
0697 auto workDivPrefixScan = cms::alpakatools::make_workdiv<Acc1D>(blocksPrefixScan, threadsPrefixScan);
0698 auto bCounter = cms::alpakatools::make_device_buffer<int32_t>(queue);
0699 alpaka::memset(queue, bCounter, 0);
0700
0701 alpaka::exec<Acc1D>(queue,
0702 workDivPrefixScan,
0703 cms::alpakatools::multiBlockPrefixScan<uint32_t>(),
0704 clusters_d->view().clusInModule(),
0705 clusters_d->view().clusModuleStart() + 1,
0706 TrackerTraits::numberOfModules,
0707 blocksPrefixScan,
0708 bCounter.data(),
0709 alpaka::getPreferredWarpSize(alpaka::getDev(queue)));
0710
0711
0712 const auto clusModuleStartLastElement =
0713 cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0714 constexpr int startBPIX2 = pixelTopology::Phase2::layerStart[1];
0715
0716 const auto bpix2ClusterStart =
0717 cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0718 auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0719 alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0720
0721 auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0722 alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0723
0724 #ifdef GPU_DEBUG
0725 alpaka::wait(queue);
0726 std::cout << "SiPixelPhase2DigiToCluster: results \n"
0727 << " > no. of digis: " << numDigis << std::endl
0728 << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0729 << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0730 << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0731 #endif
0732 }
0733
0734 template class SiPixelRawToClusterKernel<pixelTopology::Phase1>;
0735 template class SiPixelRawToClusterKernel<pixelTopology::Phase2>;
0736 template class SiPixelRawToClusterKernel<pixelTopology::HIonPhase1>;
0737
0738 }
0739
0740 }