File indexing completed on 2025-02-14 03:16:54
0001
0002 #include <algorithm>
0003 #include <cassert>
0004 #include <cstdint>
0005 #include <cstdio>
0006 #include <type_traits>
0007
0008
0009 #include <alpaka/alpaka.hpp>
0010
0011
0012 #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h"
0013 #include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h"
0014 #include "DataFormats/DetId/interface/DetId.h"
0015 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0016 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0017 #include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h"
0018 #include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h"
0019 #include "DataFormats/SiPixelDigi/interface/SiPixelDigiConstants.h"
0020 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h"
0021 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0022 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h"
0023 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h"
0024 #include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h"
0025 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0026 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0027 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0028 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0029 #include "HeterogeneousCore/AlpakaInterface/interface/warpsize.h"
0030 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0031 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
0032
0033
0034 #include "CalibPixel.h"
0035 #include "ClusterChargeCut.h"
0036 #include "PixelClustering.h"
0037 #include "SiPixelRawToClusterKernel.h"
0038
0039
0040
0041 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0042 namespace pixelDetails {
0043
0044 ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) {
0045 return (PixelSubdetector::PixelBarrel == ((rawId >> DetId::kSubdetOffset) & DetId::kSubdetMask));
0046 }
0047
0048 ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap,
0049 uint8_t fed,
0050 uint32_t link,
0051 uint32_t roc) {
0052 using namespace ::pixelDetails;
0053 uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc;
0054 DetIdGPU detId = {cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]};
0055 return detId;
0056 }
0057
0058
0059
0060
0061 ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(
0062 bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) {
0063 int slopeRow = 0, slopeCol = 0;
0064 int rowOffset = 0, colOffset = 0;
0065
0066 if (bpix) {
0067 if (side == -1 && layer != 1) {
0068 if (rocIdInDetUnit < 8) {
0069 slopeRow = 1;
0070 slopeCol = -1;
0071 rowOffset = 0;
0072 colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0073 } else {
0074 slopeRow = -1;
0075 slopeCol = 1;
0076 rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0077 colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0078 }
0079 } else {
0080 if (rocIdInDetUnit < 8) {
0081 slopeRow = -1;
0082 slopeCol = 1;
0083 rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0084 colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc;
0085 } else {
0086 slopeRow = 1;
0087 slopeCol = -1;
0088 rowOffset = 0;
0089 colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0090 }
0091 }
0092
0093 } else {
0094 if (side == -1) {
0095 if (rocIdInDetUnit < 8) {
0096 slopeRow = 1;
0097 slopeCol = -1;
0098 rowOffset = 0;
0099 colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0100 } else {
0101 slopeRow = -1;
0102 slopeCol = 1;
0103 rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0104 colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0105 }
0106 } else {
0107 if (rocIdInDetUnit < 8) {
0108 slopeRow = 1;
0109 slopeCol = -1;
0110 rowOffset = 0;
0111 colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0112 } else {
0113 slopeRow = -1;
0114 slopeCol = 1;
0115 rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0116 colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0117 }
0118
0119 }
0120 }
0121
0122 uint32_t gRow = rowOffset + slopeRow * local.row;
0123 uint32_t gCol = colOffset + slopeCol * local.col;
0124
0125 ::pixelDetails::Pixel global = {gRow, gCol};
0126 return global;
0127 }
0128
0129
0130 template <bool debug = false>
0131 ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status) {
0132 uint8_t errorType = 0;
0133
0134 switch (status) {
0135 case 1: {
0136 if constexpr (debug)
0137 printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId);
0138 errorType = 35;
0139 break;
0140 }
0141 case 2: {
0142 if constexpr (debug)
0143 printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId);
0144 errorType = 36;
0145 break;
0146 }
0147 case 3: {
0148 if constexpr (debug)
0149 printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId);
0150 errorType = 37;
0151 break;
0152 }
0153 case 4: {
0154 if constexpr (debug)
0155 printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId);
0156 errorType = 38;
0157 break;
0158 }
0159 default:
0160 if constexpr (debug)
0161 printf("Cabling check returned unexpected result, status = %i\n", status);
0162 };
0163
0164 return errorType;
0165 }
0166
0167 ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) {
0168
0169 return ((rocRow < ::pixelDetails::numRowsInRoc) & (rocCol < ::pixelDetails::numColsInRoc));
0170 }
0171
0172 ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); }
0173
0174
0175 template <bool debug = false>
0176 ALPAKA_FN_ACC uint8_t
0177 checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap) {
0178 uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask;
0179 if (errorType < 25)
0180 return 0;
0181 bool errorFound = false;
0182
0183 switch (errorType) {
0184 case 25: {
0185 errorFound = true;
0186 uint32_t index =
0187 fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + 1;
0188 if (index > 1 && index <= cablingMap.size()) {
0189 if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index]))
0190 errorFound = false;
0191 }
0192 if constexpr (debug)
0193 if (errorFound)
0194 printf("Invalid ROC = 25 found (errorType = 25)\n");
0195 break;
0196 }
0197 case 26: {
0198 if constexpr (debug)
0199 printf("Gap word found (errorType = 26)\n");
0200 break;
0201 }
0202 case 27: {
0203 if constexpr (debug)
0204 printf("Dummy word found (errorType = 27)\n");
0205 break;
0206 }
0207 case 28: {
0208 if constexpr (debug)
0209 printf("Error fifo nearly full (errorType = 28)\n");
0210 errorFound = true;
0211 break;
0212 }
0213 case 29: {
0214 if constexpr (debug)
0215 printf("Timeout on a channel (errorType = 29)\n");
0216 if (!((errorWord >> sipixelconstants::OMIT_ERR_shift) & sipixelconstants::OMIT_ERR_mask)) {
0217 if constexpr (debug)
0218 printf("...2nd errorType=29 error, skip\n");
0219 break;
0220 }
0221 errorFound = true;
0222 break;
0223 }
0224 case 30: {
0225 if constexpr (debug)
0226 printf("TBM error trailer (errorType = 30)\n");
0227 int stateMatch_bits = 4;
0228 int stateMatch_shift = 8;
0229 uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits);
0230 int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask;
0231 if (stateMatch != 1 && stateMatch != 8) {
0232 if constexpr (debug)
0233 printf("FED error 30 with unexpected State Bits (errorType = 30)\n");
0234 break;
0235 }
0236 if (stateMatch == 1)
0237 errorType = 40;
0238 errorFound = true;
0239 break;
0240 }
0241 case 31: {
0242 if constexpr (debug)
0243 printf("Event number error (errorType = 31)\n");
0244 errorFound = true;
0245 break;
0246 }
0247 default:
0248 errorFound = false;
0249 };
0250
0251 return errorFound ? errorType : 0;
0252 }
0253
0254
0255 template <bool debug = false>
0256 ALPAKA_FN_ACC uint32_t
0257 getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap) {
0258 uint32_t rID = 0xffffffff;
0259
0260 switch (errorType) {
0261 case 25:
0262 case 29:
0263 case 30:
0264 case 31:
0265 case 36:
0266 case 40: {
0267 uint32_t roc = 1;
0268 uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0269 uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0270 if (rID_temp != ::pixelClustering::invalidModuleId)
0271 rID = rID_temp;
0272 break;
0273 }
0274 case 37:
0275 case 38: {
0276 uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask;
0277 uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0278 uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0279 if (rID_temp != ::pixelClustering::invalidModuleId)
0280 rID = rID_temp;
0281 break;
0282 }
0283 default:
0284 break;
0285 };
0286
0287 return rID;
0288 }
0289
0290
0291 template <bool debug = false>
0292 struct RawToDigi_kernel {
0293 ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0294 const SiPixelMappingSoAConstView &cablingMap,
0295 const unsigned char *modToUnp,
0296 const uint32_t wordCounter,
0297 const uint32_t *word,
0298 const uint8_t *fedIds,
0299 SiPixelDigisSoAView digisView,
0300 SiPixelDigiErrorsSoAView err,
0301 bool useQualityInfo,
0302 bool includeErrors) const {
0303
0304 if (cms::alpakatools::once_per_grid(acc))
0305 err.size() = 0;
0306
0307 for (auto gIndex : cms::alpakatools::uniform_elements(acc, wordCounter)) {
0308 auto dvgi = digisView[gIndex];
0309 dvgi.xx() = 0;
0310 dvgi.yy() = 0;
0311 dvgi.adc() = 0;
0312
0313
0314 err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0};
0315
0316 uint8_t fedId = fedIds[gIndex / 2];
0317
0318
0319 dvgi.pdigi() = 0;
0320 dvgi.rawIdArr() = 0;
0321 dvgi.moduleId() = ::pixelClustering::invalidModuleId;
0322
0323 uint32_t ww = word[gIndex];
0324 if (ww == 0) {
0325
0326 continue;
0327 }
0328
0329 uint32_t link = sipixelconstants::getLink(ww);
0330 uint32_t roc = sipixelconstants::getROC(ww);
0331
0332 uint8_t errorType = checkROC<debug>(ww, fedId, link, cablingMap);
0333 bool skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0);
0334 if (includeErrors and skipROC) {
0335 uint32_t rawId = getErrRawID<debug>(fedId, ww, errorType, cablingMap);
0336 if (rawId != 0xffffffff)
0337 {
0338 err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, errorType, fedId};
0339 alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0340 }
0341 continue;
0342 }
0343
0344
0345 if (roc > ::pixelDetails::MAX_ROC or link > ::pixelDetails::MAX_LINK) {
0346 uint32_t rawId = getRawId(cablingMap, fedId, link, 1).rawId;
0347 if constexpr (debug) {
0348 printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex);
0349 }
0350 if (roc > ::pixelDetails::MAX_ROC and roc < 25) {
0351 uint8_t error = conversionError<debug>(fedId, 2);
0352 err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0353 alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0354 }
0355 continue;
0356 }
0357
0358 uint32_t index =
0359 fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + roc;
0360 if (useQualityInfo) {
0361 skipROC = cablingMap.badRocs()[index];
0362 if (skipROC)
0363 continue;
0364 }
0365 skipROC = modToUnp[index];
0366 if (skipROC)
0367 continue;
0368
0369 ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc);
0370 uint32_t rawId = detId.rawId;
0371 uint32_t layer = 0;
0372 int side = 0, panel = 0, module = 0;
0373 bool barrel = isBarrel(rawId);
0374
0375 if (barrel) {
0376 layer = (rawId >> ::pixelDetails::layerStartBit) & ::pixelDetails::layerMask;
0377 module = (rawId >> ::pixelDetails::moduleStartBit) & ::pixelDetails::moduleMask;
0378 side = (module < 5) ? -1 : 1;
0379 } else {
0380
0381 layer = 0;
0382 panel = (rawId >> ::pixelDetails::panelStartBit) & ::pixelDetails::panelMask;
0383 side = (panel == 1) ? -1 : 1;
0384 }
0385
0386 ::pixelDetails::Pixel localPix;
0387 if (layer == 1) {
0388
0389 uint32_t col = sipixelconstants::getCol(ww);
0390 uint32_t row = sipixelconstants::getRow(ww);
0391 localPix.row = row;
0392 localPix.col = col;
0393 if (includeErrors and not rocRowColIsValid(row, col)) {
0394 uint8_t error = conversionError<debug>(fedId, 3);
0395 err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0396 alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0397 if constexpr (debug)
0398 printf("BPIX1 Error status: %i\n", error);
0399 continue;
0400 }
0401 } else {
0402
0403 uint32_t dcol = sipixelconstants::getDCol(ww);
0404 uint32_t pxid = sipixelconstants::getPxId(ww);
0405 uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2;
0406 uint32_t col = dcol * 2 + pxid % 2;
0407 localPix.row = row;
0408 localPix.col = col;
0409 if (includeErrors and not dcolIsValid(dcol, pxid)) {
0410 uint8_t error = conversionError<debug>(fedId, 3);
0411 err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0412 alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0413 if constexpr (debug)
0414 printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc);
0415 continue;
0416 }
0417 }
0418
0419 ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, detId.rocInDet, localPix);
0420 dvgi.xx() = globalPix.row;
0421 dvgi.yy() = globalPix.col;
0422 dvgi.adc() = sipixelconstants::getADC(ww);
0423 dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc());
0424 dvgi.moduleId() = detId.moduleId;
0425 dvgi.rawIdArr() = rawId;
0426 }
0427
0428 }
0429 };
0430
0431 template <typename TrackerTraits>
0432 struct FillHitsModuleStart {
0433 ALPAKA_FN_ACC void operator()(Acc1D const &acc, SiPixelClustersSoAView clus_view) const {
0434
0435 [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
0436 ALPAKA_ASSERT_ACC(0 == blockIdxLocal);
0437 [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
0438 ALPAKA_ASSERT_ACC(1 == gridDimension);
0439
0440
0441 constexpr int warpSize = cms::alpakatools::warpSize;
0442 constexpr int blockSize = warpSize * warpSize;
0443
0444
0445
0446 constexpr uint16_t numberOfModules = TrackerTraits::numberOfModules;
0447 constexpr uint16_t prefixScanUpperLimit = ((numberOfModules / blockSize) + 1) * blockSize;
0448 ALPAKA_ASSERT_ACC(numberOfModules < prefixScanUpperLimit);
0449
0450
0451 constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule;
0452 for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules)) {
0453 clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule());
0454 }
0455
0456
0457 auto &ws = alpaka::declareSharedVar<uint32_t[warpSize], __COUNTER__>(acc);
0458 uint32_t *clusModuleStart = clus_view.clusModuleStart() + 1;
0459 uint16_t leftModules = numberOfModules;
0460 while (leftModules > blockSize) {
0461 cms::alpakatools::blockPrefixScan(acc, clusModuleStart, clusModuleStart, blockSize, ws);
0462 clusModuleStart += blockSize;
0463 leftModules -= blockSize;
0464 }
0465 cms::alpakatools::blockPrefixScan(acc, clusModuleStart, clusModuleStart, leftModules, ws);
0466
0467
0468
0469 for (uint16_t doneModules = blockSize; doneModules < numberOfModules; doneModules += blockSize) {
0470 uint16_t first = doneModules + 1;
0471 uint16_t last = std::min<uint16_t>(doneModules + blockSize, numberOfModules);
0472 for (uint16_t i : cms::alpakatools::independent_group_elements(acc, first, last + 1)) {
0473 clus_view[i].clusModuleStart() += clus_view[doneModules].clusModuleStart();
0474 }
0475 alpaka::syncBlockThreads(acc);
0476 }
0477
0478 #ifdef GPU_DEBUG
0479 ALPAKA_ASSERT_ACC(0 == clus_view[0].clusModuleStart());
0480 auto c0 = std::min(maxHitsInModule, clus_view[1].clusModuleStart());
0481 ALPAKA_ASSERT_ACC(c0 == clus_view[1].clusModuleStart());
0482 ALPAKA_ASSERT_ACC(clus_view[1024].clusModuleStart() >= clus_view[1023].clusModuleStart());
0483 ALPAKA_ASSERT_ACC(clus_view[1025].clusModuleStart() >= clus_view[1024].clusModuleStart());
0484 ALPAKA_ASSERT_ACC(clus_view[numberOfModules].clusModuleStart() >= clus_view[1025].clusModuleStart());
0485
0486 for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules)) {
0487 ALPAKA_ASSERT_ACC(clus_view[i + 1].clusModuleStart() >= clus_view[i].clusModuleStart());
0488
0489 constexpr auto bpix2 = TrackerTraits::layerStart[1];
0490 constexpr auto fpix1 = TrackerTraits::layerStart[4];
0491 if (i == bpix2 || i == fpix1)
0492 printf("moduleStart %d %d\n", i, clus_view[i].clusModuleStart());
0493 }
0494 #endif
0495
0496 }
0497 };
0498
0499
0500 template <typename TrackerTraits>
0501 void SiPixelRawToClusterKernel<TrackerTraits>::makePhase1ClustersAsync(
0502 Queue &queue,
0503 const SiPixelClusterThresholds clusterThresholds,
0504 const SiPixelMappingSoAConstView &cablingMap,
0505 const unsigned char *modToUnp,
0506 const SiPixelGainCalibrationForHLTSoAConstView &gains,
0507 const WordFedAppender &wordFed,
0508 const uint32_t wordCounter,
0509 const uint32_t fedCounter,
0510 bool useQualityInfo,
0511 bool includeErrors,
0512 bool debug) {
0513 nDigis = wordCounter;
0514
0515 #ifdef GPU_DEBUG
0516 std::cout << "decoding " << wordCounter << " digis." << std::endl;
0517 #endif
0518 constexpr int numberOfModules = TrackerTraits::numberOfModules;
0519 digis_d = SiPixelDigisSoACollection(wordCounter, queue);
0520 if (includeErrors) {
0521 digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue);
0522 }
0523 clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0524
0525 if (wordCounter) {
0526 const int threadsPerBlockOrElementsPerThread =
0527 cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? 32 : 512;
0528
0529 const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread);
0530 const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0531 assert(0 == wordCounter % 2);
0532
0533 auto word_d = cms::alpakatools::make_device_buffer<uint32_t[]>(queue, wordCounter);
0534
0535
0536
0537 auto fedId_d = cms::alpakatools::make_device_buffer<uint8_t[]>(queue, wordCounter / 2);
0538 alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter);
0539 alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2);
0540
0541 if (debug) {
0542 alpaka::exec<Acc1D>(queue,
0543 workDiv,
0544 RawToDigi_kernel<true>{},
0545 cablingMap,
0546 modToUnp,
0547 wordCounter,
0548 word_d.data(),
0549 fedId_d.data(),
0550 digis_d->view(),
0551 digiErrors_d->view(),
0552 useQualityInfo,
0553 includeErrors);
0554 } else {
0555 alpaka::exec<Acc1D>(queue,
0556 workDiv,
0557 RawToDigi_kernel<false>{},
0558 cablingMap,
0559 modToUnp,
0560 wordCounter,
0561 word_d.data(),
0562 fedId_d.data(),
0563 digis_d->view(),
0564 digiErrors_d->view(),
0565 useQualityInfo,
0566 includeErrors);
0567 }
0568
0569 #ifdef GPU_DEBUG
0570 alpaka::wait(queue);
0571 std::cout << "RawToDigi_kernel was run smoothly!" << std::endl;
0572 #endif
0573 }
0574
0575
0576 {
0577
0578 using namespace pixelClustering;
0579
0580 using namespace calibPixel;
0581 const int threadsPerBlockOrElementsPerThread = []() {
0582 if constexpr (std::is_same_v<Device, alpaka_common::DevHost>) {
0583
0584 return 32;
0585 } else {
0586 return 256;
0587 }
0588 }();
0589 const auto blocks = cms::alpakatools::divide_up_by(std::max<int>(wordCounter, numberOfModules),
0590 threadsPerBlockOrElementsPerThread);
0591 const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0592
0593 if (debug) {
0594 alpaka::exec<Acc1D>(queue,
0595 workDiv,
0596 CalibDigis<true>{},
0597 clusterThresholds,
0598 digis_d->view(),
0599 clusters_d->view(),
0600 gains,
0601 wordCounter);
0602 } else {
0603 alpaka::exec<Acc1D>(queue,
0604 workDiv,
0605 CalibDigis<false>{},
0606 clusterThresholds,
0607 digis_d->view(),
0608 clusters_d->view(),
0609 gains,
0610 wordCounter);
0611 }
0612 #ifdef GPU_DEBUG
0613 alpaka::wait(queue);
0614 std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0615 << " threadsPerBlockOrElementsPerThread\n";
0616 #endif
0617
0618 alpaka::exec<Acc1D>(
0619 queue, workDiv, CountModules<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0620
0621 auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u);
0622 alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0623
0624 const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0625 const auto workDivMaxNumModules =
0626 cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0627 #ifdef GPU_DEBUG
0628 std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0629 << " threadsPerBlockOrElementsPerThread\n";
0630 #endif
0631 alpaka::exec<Acc1D>(
0632 queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0633 #ifdef GPU_DEBUG
0634 alpaka::wait(queue);
0635 #endif
0636
0637 constexpr auto threadsPerBlockChargeCut = 256;
0638 const auto workDivChargeCut = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockChargeCut);
0639
0640 alpaka::exec<Acc1D>(queue,
0641 workDivChargeCut,
0642 ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0643 digis_d->view(),
0644 clusters_d->view(),
0645 clusterThresholds,
0646 wordCounter);
0647
0648
0649
0650
0651
0652
0653 const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
0654 alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
0655
0656
0657 const auto clusModuleStartLastElement =
0658 cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0659 constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0660
0661
0662 const auto bpix2ClusterStart =
0663 cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0664 auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0665 alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0666
0667 auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0668 alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0669
0670 #ifdef GPU_DEBUG
0671 alpaka::wait(queue);
0672 std::cout << "SiPixelClusterizerAlpaka results:" << std::endl
0673 << " > no. of digis: " << nDigis << std::endl
0674 << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0675 << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0676 << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0677 #endif
0678
0679 }
0680 }
0681
0682 template <typename TrackerTraits>
0683 void SiPixelRawToClusterKernel<TrackerTraits>::makePhase2ClustersAsync(
0684 Queue &queue,
0685 const SiPixelClusterThresholds clusterThresholds,
0686 SiPixelDigisSoAView &digis_view,
0687 const uint32_t numDigis) {
0688 using namespace pixelClustering;
0689 using pixelTopology::Phase2;
0690 nDigis = numDigis;
0691 constexpr int numberOfModules = pixelTopology::Phase2::numberOfModules;
0692 clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0693 const auto threadsPerBlockOrElementsPerThread = 512;
0694 const auto blocks =
0695 cms::alpakatools::divide_up_by(std::max<int>(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread);
0696 const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0697
0698 alpaka::exec<Acc1D>(
0699 queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis);
0700
0701 #ifdef GPU_DEBUG
0702 alpaka::wait(queue);
0703 std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0704 << " threadsPerBlockOrElementsPerThread\n";
0705 #endif
0706 alpaka::exec<Acc1D>(
0707 queue, workDiv, CountModules<pixelTopology::Phase2>{}, digis_view, clusters_d->view(), numDigis);
0708
0709 auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u);
0710 alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0711
0712 const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0713 const auto workDivMaxNumModules =
0714 cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0715 #ifdef GPU_DEBUG
0716 alpaka::wait(queue);
0717 std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0718 << " threadsPerBlockOrElementsPerThread\n";
0719 #endif
0720 alpaka::exec<Acc1D>(
0721 queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_view, clusters_d->view(), numDigis);
0722 #ifdef GPU_DEBUG
0723 alpaka::wait(queue);
0724 #endif
0725
0726
0727 alpaka::exec<Acc1D>(queue,
0728 workDivMaxNumModules,
0729 ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0730 digis_view,
0731 clusters_d->view(),
0732 clusterThresholds,
0733 numDigis);
0734
0735
0736
0737
0738
0739
0740
0741 const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
0742 alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
0743
0744
0745 const auto clusModuleStartLastElement =
0746 cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0747 constexpr int startBPIX2 = pixelTopology::Phase2::layerStart[1];
0748
0749 const auto bpix2ClusterStart =
0750 cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0751 auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0752 alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0753
0754 auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0755 alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0756
0757 #ifdef GPU_DEBUG
0758 alpaka::wait(queue);
0759 std::cout << "SiPixelPhase2DigiToCluster: results \n"
0760 << " > no. of digis: " << numDigis << std::endl
0761 << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0762 << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0763 << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0764 #endif
0765 }
0766
0767 template class SiPixelRawToClusterKernel<pixelTopology::Phase1>;
0768 template class SiPixelRawToClusterKernel<pixelTopology::Phase2>;
0769 template class SiPixelRawToClusterKernel<pixelTopology::HIonPhase1>;
0770
0771 }
0772
0773 }