File indexing completed on 2024-09-27 23:16:56
0001
0002 #include <algorithm>
0003 #include <cassert>
0004 #include <cstdint>
0005 #include <cstdio>
0006 #include <type_traits>
0007
0008
0009 #include <alpaka/alpaka.hpp>
0010
0011
0012 #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h"
0013 #include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h"
0014 #include "DataFormats/DetId/interface/DetId.h"
0015 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0016 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0017 #include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h"
0018 #include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h"
0019 #include "DataFormats/SiPixelDigi/interface/SiPixelDigiConstants.h"
0020 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h"
0021 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0022 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h"
0023 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h"
0024 #include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h"
0025 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0026 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0027 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0028 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0029 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0030 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
0031
0032
0033 #include "CalibPixel.h"
0034 #include "ClusterChargeCut.h"
0035 #include "PixelClustering.h"
0036 #include "SiPixelRawToClusterKernel.h"
0037
0038
0039
0040 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0041 namespace pixelDetails {
0042
0043 ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) {
0044 return (PixelSubdetector::PixelBarrel == ((rawId >> DetId::kSubdetOffset) & DetId::kSubdetMask));
0045 }
0046
0047 ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap,
0048 uint8_t fed,
0049 uint32_t link,
0050 uint32_t roc) {
0051 using namespace ::pixelDetails;
0052 uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc;
0053 DetIdGPU detId = {cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]};
0054 return detId;
0055 }
0056
0057
0058
0059
0060 ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(
0061 bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) {
0062 int slopeRow = 0, slopeCol = 0;
0063 int rowOffset = 0, colOffset = 0;
0064
0065 if (bpix) {
0066 if (side == -1 && layer != 1) {
0067 if (rocIdInDetUnit < 8) {
0068 slopeRow = 1;
0069 slopeCol = -1;
0070 rowOffset = 0;
0071 colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0072 } else {
0073 slopeRow = -1;
0074 slopeCol = 1;
0075 rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0076 colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0077 }
0078 } else {
0079 if (rocIdInDetUnit < 8) {
0080 slopeRow = -1;
0081 slopeCol = 1;
0082 rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0083 colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc;
0084 } else {
0085 slopeRow = 1;
0086 slopeCol = -1;
0087 rowOffset = 0;
0088 colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0089 }
0090 }
0091
0092 } else {
0093 if (side == -1) {
0094 if (rocIdInDetUnit < 8) {
0095 slopeRow = 1;
0096 slopeCol = -1;
0097 rowOffset = 0;
0098 colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0099 } else {
0100 slopeRow = -1;
0101 slopeCol = 1;
0102 rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0103 colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0104 }
0105 } else {
0106 if (rocIdInDetUnit < 8) {
0107 slopeRow = 1;
0108 slopeCol = -1;
0109 rowOffset = 0;
0110 colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0111 } else {
0112 slopeRow = -1;
0113 slopeCol = 1;
0114 rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0115 colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0116 }
0117
0118 }
0119 }
0120
0121 uint32_t gRow = rowOffset + slopeRow * local.row;
0122 uint32_t gCol = colOffset + slopeCol * local.col;
0123
0124 ::pixelDetails::Pixel global = {gRow, gCol};
0125 return global;
0126 }
0127
0128
0129 template <bool debug = false>
0130 ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status) {
0131 uint8_t errorType = 0;
0132
0133 switch (status) {
0134 case 1: {
0135 if constexpr (debug)
0136 printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId);
0137 errorType = 35;
0138 break;
0139 }
0140 case 2: {
0141 if constexpr (debug)
0142 printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId);
0143 errorType = 36;
0144 break;
0145 }
0146 case 3: {
0147 if constexpr (debug)
0148 printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId);
0149 errorType = 37;
0150 break;
0151 }
0152 case 4: {
0153 if constexpr (debug)
0154 printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId);
0155 errorType = 38;
0156 break;
0157 }
0158 default:
0159 if constexpr (debug)
0160 printf("Cabling check returned unexpected result, status = %i\n", status);
0161 };
0162
0163 return errorType;
0164 }
0165
0166 ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) {
0167
0168 return ((rocRow < ::pixelDetails::numRowsInRoc) & (rocCol < ::pixelDetails::numColsInRoc));
0169 }
0170
0171 ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); }
0172
0173
0174 template <bool debug = false>
0175 ALPAKA_FN_ACC uint8_t
0176 checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap) {
0177 uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask;
0178 if (errorType < 25)
0179 return 0;
0180 bool errorFound = false;
0181
0182 switch (errorType) {
0183 case 25: {
0184 errorFound = true;
0185 uint32_t index =
0186 fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + 1;
0187 if (index > 1 && index <= cablingMap.size()) {
0188 if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index]))
0189 errorFound = false;
0190 }
0191 if constexpr (debug)
0192 if (errorFound)
0193 printf("Invalid ROC = 25 found (errorType = 25)\n");
0194 break;
0195 }
0196 case 26: {
0197 if constexpr (debug)
0198 printf("Gap word found (errorType = 26)\n");
0199 break;
0200 }
0201 case 27: {
0202 if constexpr (debug)
0203 printf("Dummy word found (errorType = 27)\n");
0204 break;
0205 }
0206 case 28: {
0207 if constexpr (debug)
0208 printf("Error fifo nearly full (errorType = 28)\n");
0209 errorFound = true;
0210 break;
0211 }
0212 case 29: {
0213 if constexpr (debug)
0214 printf("Timeout on a channel (errorType = 29)\n");
0215 if (!((errorWord >> sipixelconstants::OMIT_ERR_shift) & sipixelconstants::OMIT_ERR_mask)) {
0216 if constexpr (debug)
0217 printf("...2nd errorType=29 error, skip\n");
0218 break;
0219 }
0220 errorFound = true;
0221 break;
0222 }
0223 case 30: {
0224 if constexpr (debug)
0225 printf("TBM error trailer (errorType = 30)\n");
0226 int stateMatch_bits = 4;
0227 int stateMatch_shift = 8;
0228 uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits);
0229 int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask;
0230 if (stateMatch != 1 && stateMatch != 8) {
0231 if constexpr (debug)
0232 printf("FED error 30 with unexpected State Bits (errorType = 30)\n");
0233 break;
0234 }
0235 if (stateMatch == 1)
0236 errorType = 40;
0237 errorFound = true;
0238 break;
0239 }
0240 case 31: {
0241 if constexpr (debug)
0242 printf("Event number error (errorType = 31)\n");
0243 errorFound = true;
0244 break;
0245 }
0246 default:
0247 errorFound = false;
0248 };
0249
0250 return errorFound ? errorType : 0;
0251 }
0252
0253
0254 template <bool debug = false>
0255 ALPAKA_FN_ACC uint32_t
0256 getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap) {
0257 uint32_t rID = 0xffffffff;
0258
0259 switch (errorType) {
0260 case 25:
0261 case 29:
0262 case 30:
0263 case 31:
0264 case 36:
0265 case 40: {
0266 uint32_t roc = 1;
0267 uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0268 uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0269 if (rID_temp != ::pixelClustering::invalidModuleId)
0270 rID = rID_temp;
0271 break;
0272 }
0273 case 37:
0274 case 38: {
0275 uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask;
0276 uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0277 uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0278 if (rID_temp != ::pixelClustering::invalidModuleId)
0279 rID = rID_temp;
0280 break;
0281 }
0282 default:
0283 break;
0284 };
0285
0286 return rID;
0287 }
0288
0289
0290 template <bool debug = false>
0291 struct RawToDigi_kernel {
0292 template <typename TAcc>
0293 ALPAKA_FN_ACC void operator()(const TAcc &acc,
0294 const SiPixelMappingSoAConstView &cablingMap,
0295 const unsigned char *modToUnp,
0296 const uint32_t wordCounter,
0297 const uint32_t *word,
0298 const uint8_t *fedIds,
0299 SiPixelDigisSoAView digisView,
0300 SiPixelDigiErrorsSoAView err,
0301 bool useQualityInfo,
0302 bool includeErrors) const {
0303
0304 if (cms::alpakatools::once_per_grid(acc))
0305 err.size() = 0;
0306
0307 for (auto gIndex : cms::alpakatools::uniform_elements(acc, wordCounter)) {
0308 auto dvgi = digisView[gIndex];
0309 dvgi.xx() = 0;
0310 dvgi.yy() = 0;
0311 dvgi.adc() = 0;
0312
0313
0314 err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0};
0315
0316 uint8_t fedId = fedIds[gIndex / 2];
0317
0318
0319 dvgi.pdigi() = 0;
0320 dvgi.rawIdArr() = 0;
0321 dvgi.moduleId() = ::pixelClustering::invalidModuleId;
0322
0323 uint32_t ww = word[gIndex];
0324 if (ww == 0) {
0325
0326 continue;
0327 }
0328
0329 uint32_t link = sipixelconstants::getLink(ww);
0330 uint32_t roc = sipixelconstants::getROC(ww);
0331
0332 uint8_t errorType = checkROC<debug>(ww, fedId, link, cablingMap);
0333 bool skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0);
0334 if (includeErrors and skipROC) {
0335 uint32_t rawId = getErrRawID<debug>(fedId, ww, errorType, cablingMap);
0336 if (rawId != 0xffffffff)
0337 {
0338 err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, errorType, fedId};
0339 alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0340 }
0341 continue;
0342 }
0343
0344
0345 if (roc > ::pixelDetails::MAX_ROC or link > ::pixelDetails::MAX_LINK) {
0346 uint32_t rawId = getRawId(cablingMap, fedId, link, 1).rawId;
0347 if constexpr (debug) {
0348 printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex);
0349 }
0350 if (roc > ::pixelDetails::MAX_ROC and roc < 25) {
0351 uint8_t error = conversionError<debug>(fedId, 2);
0352 err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0353 alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0354 }
0355 continue;
0356 }
0357
0358 uint32_t index =
0359 fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + roc;
0360 if (useQualityInfo) {
0361 skipROC = cablingMap.badRocs()[index];
0362 if (skipROC)
0363 continue;
0364 }
0365 skipROC = modToUnp[index];
0366 if (skipROC)
0367 continue;
0368
0369 ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc);
0370 uint32_t rawId = detId.rawId;
0371 uint32_t layer = 0;
0372 int side = 0, panel = 0, module = 0;
0373 bool barrel = isBarrel(rawId);
0374
0375 if (barrel) {
0376 layer = (rawId >> ::pixelDetails::layerStartBit) & ::pixelDetails::layerMask;
0377 module = (rawId >> ::pixelDetails::moduleStartBit) & ::pixelDetails::moduleMask;
0378 side = (module < 5) ? -1 : 1;
0379 } else {
0380
0381 layer = 0;
0382 panel = (rawId >> ::pixelDetails::panelStartBit) & ::pixelDetails::panelMask;
0383 side = (panel == 1) ? -1 : 1;
0384 }
0385
0386 ::pixelDetails::Pixel localPix;
0387 if (layer == 1) {
0388
0389 uint32_t col = sipixelconstants::getCol(ww);
0390 uint32_t row = sipixelconstants::getRow(ww);
0391 localPix.row = row;
0392 localPix.col = col;
0393 if (includeErrors and not rocRowColIsValid(row, col)) {
0394 uint8_t error = conversionError<debug>(fedId, 3);
0395 err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0396 alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0397 if constexpr (debug)
0398 printf("BPIX1 Error status: %i\n", error);
0399 continue;
0400 }
0401 } else {
0402
0403 uint32_t dcol = sipixelconstants::getDCol(ww);
0404 uint32_t pxid = sipixelconstants::getPxId(ww);
0405 uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2;
0406 uint32_t col = dcol * 2 + pxid % 2;
0407 localPix.row = row;
0408 localPix.col = col;
0409 if (includeErrors and not dcolIsValid(dcol, pxid)) {
0410 uint8_t error = conversionError<debug>(fedId, 3);
0411 err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0412 alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0413 if constexpr (debug)
0414 printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc);
0415 continue;
0416 }
0417 }
0418
0419 ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, detId.rocInDet, localPix);
0420 dvgi.xx() = globalPix.row;
0421 dvgi.yy() = globalPix.col;
0422 dvgi.adc() = sipixelconstants::getADC(ww);
0423 dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc());
0424 dvgi.moduleId() = detId.moduleId;
0425 dvgi.rawIdArr() = rawId;
0426 }
0427
0428 }
0429 };
0430
0431 template <typename TrackerTraits>
0432 struct FillHitsModuleStart {
0433 template <typename TAcc>
0434 ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const {
0435 constexpr bool isPhase2 = std::is_base_of<pixelTopology::Phase2, TrackerTraits>::value;
0436
0437
0438
0439
0440
0441
0442 constexpr uint16_t prefixScanUpperLimit = isPhase2 ? 4096 : 2048;
0443 ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < prefixScanUpperLimit);
0444
0445 constexpr int numberOfModules = TrackerTraits::numberOfModules;
0446 constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule;
0447
0448 #ifndef NDEBUG
0449 [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
0450 ALPAKA_ASSERT_ACC(0 == blockIdxLocal);
0451 [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
0452 ALPAKA_ASSERT_ACC(1 == gridDimension);
0453 #endif
0454
0455
0456 for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules)) {
0457 clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule());
0458 }
0459
0460 constexpr auto leftModules = isPhase2 ? 1024 : numberOfModules - 1024;
0461
0462 auto &&ws = alpaka::declareSharedVar<uint32_t[32], __COUNTER__>(acc);
0463
0464 cms::alpakatools::blockPrefixScan(
0465 acc, clus_view.clusModuleStart() + 1, clus_view.clusModuleStart() + 1, 1024, ws);
0466
0467 cms::alpakatools::blockPrefixScan(
0468 acc, clus_view.clusModuleStart() + 1024 + 1, clus_view.clusModuleStart() + 1024 + 1, leftModules, ws);
0469
0470 if constexpr (isPhase2) {
0471 cms::alpakatools::blockPrefixScan(
0472 acc, clus_view.clusModuleStart() + 2048 + 1, clus_view.clusModuleStart() + 2048 + 1, 1024, ws);
0473 cms::alpakatools::blockPrefixScan(acc,
0474 clus_view.clusModuleStart() + 3072 + 1,
0475 clus_view.clusModuleStart() + 3072 + 1,
0476 numberOfModules - 3072,
0477 ws);
0478 }
0479
0480 constexpr auto lastModule = isPhase2 ? 2049u : numberOfModules + 1;
0481 for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 1025u, lastModule)) {
0482 clus_view[i].clusModuleStart() += clus_view[1024].clusModuleStart();
0483 }
0484 alpaka::syncBlockThreads(acc);
0485
0486 if constexpr (isPhase2) {
0487 for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 2049u, 3073u)) {
0488 clus_view[i].clusModuleStart() += clus_view[2048].clusModuleStart();
0489 }
0490 alpaka::syncBlockThreads(acc);
0491
0492 for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 3073u, numberOfModules + 1)) {
0493 clus_view[i].clusModuleStart() += clus_view[3072].clusModuleStart();
0494 }
0495 alpaka::syncBlockThreads(acc);
0496 }
0497 #ifdef GPU_DEBUG
0498 ALPAKA_ASSERT_ACC(0 == clus_view[1].moduleStart());
0499 auto c0 = std::min(maxHitsInModule, clus_view[2].clusModuleStart());
0500 ALPAKA_ASSERT_ACC(c0 == clus_view[2].moduleStart());
0501 ALPAKA_ASSERT_ACC(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart());
0502 ALPAKA_ASSERT_ACC(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart());
0503 ALPAKA_ASSERT_ACC(clus_view[numberOfModules].moduleStart() >= clus_view[1025].moduleStart());
0504
0505 for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules + 1)) {
0506 if (0 != i)
0507 ALPAKA_ASSERT_ACC(clus_view[i].moduleStart() >= clus_view[i - 1].moduleStart());
0508
0509 constexpr auto bpix2 = TrackerTraits::layerStart[1];
0510 constexpr auto fpix1 = TrackerTraits::layerStart[4];
0511 if (i == bpix2 || i == fpix1)
0512 printf("moduleStart %d %d\n", i, clus_view[i].moduleStart());
0513 }
0514
0515 #endif
0516
0517 }
0518 };
0519
0520
0521 template <typename TrackerTraits>
0522 void SiPixelRawToClusterKernel<TrackerTraits>::makePhase1ClustersAsync(
0523 Queue &queue,
0524 const SiPixelClusterThresholds clusterThresholds,
0525 const SiPixelMappingSoAConstView &cablingMap,
0526 const unsigned char *modToUnp,
0527 const SiPixelGainCalibrationForHLTSoAConstView &gains,
0528 const WordFedAppender &wordFed,
0529 const uint32_t wordCounter,
0530 const uint32_t fedCounter,
0531 bool useQualityInfo,
0532 bool includeErrors,
0533 bool debug) {
0534 nDigis = wordCounter;
0535
0536 #ifdef GPU_DEBUG
0537 std::cout << "decoding " << wordCounter << " digis." << std::endl;
0538 #endif
0539 constexpr int numberOfModules = TrackerTraits::numberOfModules;
0540 digis_d = SiPixelDigisSoACollection(wordCounter, queue);
0541 if (includeErrors) {
0542 digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue);
0543 }
0544 clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0545
0546 if (wordCounter) {
0547 const int threadsPerBlockOrElementsPerThread =
0548 cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? 32 : 512;
0549
0550 const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread);
0551 const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0552 assert(0 == wordCounter % 2);
0553
0554 auto word_d = cms::alpakatools::make_device_buffer<uint32_t[]>(queue, wordCounter);
0555
0556
0557
0558 auto fedId_d = cms::alpakatools::make_device_buffer<uint8_t[]>(queue, wordCounter / 2);
0559 alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter);
0560 alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2);
0561
0562 if (debug) {
0563 alpaka::exec<Acc1D>(queue,
0564 workDiv,
0565 RawToDigi_kernel<true>{},
0566 cablingMap,
0567 modToUnp,
0568 wordCounter,
0569 word_d.data(),
0570 fedId_d.data(),
0571 digis_d->view(),
0572 digiErrors_d->view(),
0573 useQualityInfo,
0574 includeErrors);
0575 } else {
0576 alpaka::exec<Acc1D>(queue,
0577 workDiv,
0578 RawToDigi_kernel<false>{},
0579 cablingMap,
0580 modToUnp,
0581 wordCounter,
0582 word_d.data(),
0583 fedId_d.data(),
0584 digis_d->view(),
0585 digiErrors_d->view(),
0586 useQualityInfo,
0587 includeErrors);
0588 }
0589
0590 #ifdef GPU_DEBUG
0591 alpaka::wait(queue);
0592 std::cout << "RawToDigi_kernel was run smoothly!" << std::endl;
0593 #endif
0594 }
0595
0596
0597 {
0598
0599 using namespace pixelClustering;
0600
0601 using namespace calibPixel;
0602 const int threadsPerBlockOrElementsPerThread = []() {
0603 if constexpr (std::is_same_v<Device, alpaka_common::DevHost>) {
0604
0605 return 32;
0606 } else {
0607 return 256;
0608 }
0609 }();
0610 const auto blocks = cms::alpakatools::divide_up_by(std::max<int>(wordCounter, numberOfModules),
0611 threadsPerBlockOrElementsPerThread);
0612 const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0613
0614 if (debug) {
0615 alpaka::exec<Acc1D>(queue,
0616 workDiv,
0617 CalibDigis<true>{},
0618 clusterThresholds,
0619 digis_d->view(),
0620 clusters_d->view(),
0621 gains,
0622 wordCounter);
0623 } else {
0624 alpaka::exec<Acc1D>(queue,
0625 workDiv,
0626 CalibDigis<false>{},
0627 clusterThresholds,
0628 digis_d->view(),
0629 clusters_d->view(),
0630 gains,
0631 wordCounter);
0632 }
0633 #ifdef GPU_DEBUG
0634 alpaka::wait(queue);
0635 std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0636 << " threadsPerBlockOrElementsPerThread\n";
0637 #endif
0638
0639 alpaka::exec<Acc1D>(
0640 queue, workDiv, CountModules<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0641
0642 auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u);
0643 alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0644
0645 const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0646 const auto workDivMaxNumModules =
0647 cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0648 #ifdef GPU_DEBUG
0649 std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0650 << " threadsPerBlockOrElementsPerThread\n";
0651 #endif
0652 alpaka::exec<Acc1D>(
0653 queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0654 #ifdef GPU_DEBUG
0655 alpaka::wait(queue);
0656 #endif
0657
0658 constexpr auto threadsPerBlockChargeCut = 256;
0659 const auto workDivChargeCut = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockChargeCut);
0660
0661 alpaka::exec<Acc1D>(queue,
0662 workDivChargeCut,
0663 ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0664 digis_d->view(),
0665 clusters_d->view(),
0666 clusterThresholds,
0667 wordCounter);
0668
0669
0670
0671
0672
0673
0674 const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
0675 alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
0676
0677
0678 const auto clusModuleStartLastElement =
0679 cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0680 constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0681
0682
0683 const auto bpix2ClusterStart =
0684 cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0685 auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0686 alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0687
0688 auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0689 alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0690
0691 #ifdef GPU_DEBUG
0692 alpaka::wait(queue);
0693 std::cout << "SiPixelClusterizerAlpaka results:" << std::endl
0694 << " > no. of digis: " << nDigis << std::endl
0695 << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0696 << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0697 << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0698 #endif
0699
0700 }
0701 }
0702
0703 template <typename TrackerTraits>
0704 void SiPixelRawToClusterKernel<TrackerTraits>::makePhase2ClustersAsync(
0705 Queue &queue,
0706 const SiPixelClusterThresholds clusterThresholds,
0707 SiPixelDigisSoAView &digis_view,
0708 const uint32_t numDigis) {
0709 using namespace pixelClustering;
0710 using pixelTopology::Phase2;
0711 nDigis = numDigis;
0712 constexpr int numberOfModules = pixelTopology::Phase2::numberOfModules;
0713 clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0714 const auto threadsPerBlockOrElementsPerThread = 512;
0715 const auto blocks =
0716 cms::alpakatools::divide_up_by(std::max<int>(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread);
0717 const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0718
0719 alpaka::exec<Acc1D>(
0720 queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis);
0721
0722 #ifdef GPU_DEBUG
0723 alpaka::wait(queue);
0724 std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0725 << " threadsPerBlockOrElementsPerThread\n";
0726 #endif
0727 alpaka::exec<Acc1D>(
0728 queue, workDiv, CountModules<pixelTopology::Phase2>{}, digis_view, clusters_d->view(), numDigis);
0729
0730 auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u);
0731 alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0732
0733 const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0734 const auto workDivMaxNumModules =
0735 cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0736 #ifdef GPU_DEBUG
0737 alpaka::wait(queue);
0738 std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0739 << " threadsPerBlockOrElementsPerThread\n";
0740 #endif
0741 alpaka::exec<Acc1D>(
0742 queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_view, clusters_d->view(), numDigis);
0743 #ifdef GPU_DEBUG
0744 alpaka::wait(queue);
0745 #endif
0746
0747
0748 alpaka::exec<Acc1D>(queue,
0749 workDivMaxNumModules,
0750 ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0751 digis_view,
0752 clusters_d->view(),
0753 clusterThresholds,
0754 numDigis);
0755
0756
0757
0758
0759
0760
0761
0762 const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
0763 alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
0764
0765
0766 const auto clusModuleStartLastElement =
0767 cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0768 constexpr int startBPIX2 = pixelTopology::Phase2::layerStart[1];
0769
0770 const auto bpix2ClusterStart =
0771 cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0772 auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0773 alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0774
0775 auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0776 alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0777
0778 #ifdef GPU_DEBUG
0779 alpaka::wait(queue);
0780 std::cout << "SiPixelPhase2DigiToCluster: results \n"
0781 << " > no. of digis: " << numDigis << std::endl
0782 << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0783 << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0784 << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0785 #endif
0786 }
0787
0788 template class SiPixelRawToClusterKernel<pixelTopology::Phase1>;
0789 template class SiPixelRawToClusterKernel<pixelTopology::Phase2>;
0790 template class SiPixelRawToClusterKernel<pixelTopology::HIonPhase1>;
0791
0792 }
0793
0794 }