Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:26:18

0001 // C++ includes
0002 #include <algorithm>
0003 #include <cassert>
0004 #include <cstdint>
0005 #include <cstdio>
0006 #include <type_traits>
0007 
0008 // Alpaka includes
0009 #include <alpaka/alpaka.hpp>
0010 
0011 // CMSSW includes
0012 #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h"
0013 #include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h"
0014 #include "DataFormats/DetId/interface/DetId.h"
0015 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0016 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0017 #include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h"
0018 #include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h"
0019 #include "DataFormats/SiPixelDigi/interface/SiPixelDigiConstants.h"
0020 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h"
0021 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0022 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h"
0023 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h"
0024 #include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h"
0025 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0026 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0027 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0028 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0029 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0030 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
0031 
0032 // local includes
0033 #include "CalibPixel.h"
0034 #include "ClusterChargeCut.h"
0035 #include "PixelClustering.h"
0036 #include "SiPixelRawToClusterKernel.h"
0037 
0038 // #define GPU_DEBUG
0039 
0040 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0041   namespace pixelDetails {
0042 
0043     ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) {
0044       return (PixelSubdetector::PixelBarrel == ((rawId >> DetId::kSubdetOffset) & DetId::kSubdetMask));
0045     }
0046 
0047     ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap,
0048                                                     uint8_t fed,
0049                                                     uint32_t link,
0050                                                     uint32_t roc) {
0051       using namespace ::pixelDetails;
0052       uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc;
0053       DetIdGPU detId = {cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]};
0054       return detId;
0055     }
0056 
0057     //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html
0058     //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071
0059     // Convert local pixel to pixelDetails::global pixel
0060     ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(
0061         bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) {
0062       int slopeRow = 0, slopeCol = 0;
0063       int rowOffset = 0, colOffset = 0;
0064 
0065       if (bpix) {
0066         if (side == -1 && layer != 1) {  // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1
0067           if (rocIdInDetUnit < 8) {
0068             slopeRow = 1;
0069             slopeCol = -1;
0070             rowOffset = 0;
0071             colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0072           } else {
0073             slopeRow = -1;
0074             slopeCol = 1;
0075             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0076             colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0077           }       // if roc
0078         } else {  // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1
0079           if (rocIdInDetUnit < 8) {
0080             slopeRow = -1;
0081             slopeCol = 1;
0082             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0083             colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc;
0084           } else {
0085             slopeRow = 1;
0086             slopeCol = -1;
0087             rowOffset = 0;
0088             colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0089           }
0090         }
0091 
0092       } else {             // fpix
0093         if (side == -1) {  // pannel 1
0094           if (rocIdInDetUnit < 8) {
0095             slopeRow = 1;
0096             slopeCol = -1;
0097             rowOffset = 0;
0098             colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0099           } else {
0100             slopeRow = -1;
0101             slopeCol = 1;
0102             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0103             colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0104           }
0105         } else {  // pannel 2
0106           if (rocIdInDetUnit < 8) {
0107             slopeRow = 1;
0108             slopeCol = -1;
0109             rowOffset = 0;
0110             colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0111           } else {
0112             slopeRow = -1;
0113             slopeCol = 1;
0114             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0115             colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0116           }
0117 
0118         }  // side
0119       }
0120 
0121       uint32_t gRow = rowOffset + slopeRow * local.row;
0122       uint32_t gCol = colOffset + slopeCol * local.col;
0123       // inside frameConversion row: gRow, column: gCol
0124       ::pixelDetails::Pixel global = {gRow, gCol};
0125       return global;
0126     }
0127 
0128     // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
0129     template <bool debug = false>
0130     ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status) {
0131       uint8_t errorType = 0;
0132 
0133       switch (status) {
0134         case 1: {
0135           if constexpr (debug)
0136             printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId);
0137           errorType = 35;
0138           break;
0139         }
0140         case 2: {
0141           if constexpr (debug)
0142             printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId);
0143           errorType = 36;
0144           break;
0145         }
0146         case 3: {
0147           if constexpr (debug)
0148             printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId);
0149           errorType = 37;
0150           break;
0151         }
0152         case 4: {
0153           if constexpr (debug)
0154             printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId);
0155           errorType = 38;
0156           break;
0157         }
0158         default:
0159           if constexpr (debug)
0160             printf("Cabling check returned unexpected result, status = %i\n", status);
0161       };
0162 
0163       return errorType;
0164     }
0165 
0166     ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) {
0167       /// row and column in ROC representation
0168       return ((rocRow < ::pixelDetails::numRowsInRoc) & (rocCol < ::pixelDetails::numColsInRoc));
0169     }
0170 
0171     ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); }
0172 
0173     // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
0174     template <bool debug = false>
0175     ALPAKA_FN_ACC uint8_t
0176     checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap) {
0177       uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask;
0178       if (errorType < 25)
0179         return 0;
0180       bool errorFound = false;
0181 
0182       switch (errorType) {
0183         case 25: {
0184           errorFound = true;
0185           uint32_t index =
0186               fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + 1;
0187           if (index > 1 && index <= cablingMap.size()) {
0188             if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index]))
0189               errorFound = false;
0190           }
0191           if constexpr (debug)
0192             if (errorFound)
0193               printf("Invalid ROC = 25 found (errorType = 25)\n");
0194           break;
0195         }
0196         case 26: {
0197           if constexpr (debug)
0198             printf("Gap word found (errorType = 26)\n");
0199           break;
0200         }
0201         case 27: {
0202           if constexpr (debug)
0203             printf("Dummy word found (errorType = 27)\n");
0204           break;
0205         }
0206         case 28: {
0207           if constexpr (debug)
0208             printf("Error fifo nearly full (errorType = 28)\n");
0209           errorFound = true;
0210           break;
0211         }
0212         case 29: {
0213           if constexpr (debug)
0214             printf("Timeout on a channel (errorType = 29)\n");
0215           if (!((errorWord >> sipixelconstants::OMIT_ERR_shift) & sipixelconstants::OMIT_ERR_mask)) {
0216             if constexpr (debug)
0217               printf("...2nd errorType=29 error, skip\n");
0218             break;
0219           }
0220           errorFound = true;
0221           break;
0222         }
0223         case 30: {
0224           if constexpr (debug)
0225             printf("TBM error trailer (errorType = 30)\n");
0226           int stateMatch_bits = 4;
0227           int stateMatch_shift = 8;
0228           uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits);
0229           int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask;
0230           if (stateMatch != 1 && stateMatch != 8) {
0231             if constexpr (debug)
0232               printf("FED error 30 with unexpected State Bits (errorType = 30)\n");
0233             break;
0234           }
0235           if (stateMatch == 1)
0236             errorType = 40;  // 1=Overflow -> 40, 8=number of ROCs -> 30
0237           errorFound = true;
0238           break;
0239         }
0240         case 31: {
0241           if constexpr (debug)
0242             printf("Event number error (errorType = 31)\n");
0243           errorFound = true;
0244           break;
0245         }
0246         default:
0247           errorFound = false;
0248       };
0249 
0250       return errorFound ? errorType : 0;
0251     }
0252 
0253     // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
0254     template <bool debug = false>
0255     ALPAKA_FN_ACC uint32_t
0256     getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap) {
0257       uint32_t rID = 0xffffffff;
0258 
0259       switch (errorType) {
0260         case 25:
0261         case 29:
0262         case 30:
0263         case 31:
0264         case 36:
0265         case 40: {
0266           uint32_t roc = 1;
0267           uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0268           uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0269           if (rID_temp != ::pixelClustering::invalidModuleId)
0270             rID = rID_temp;
0271           break;
0272         }
0273         case 37:
0274         case 38: {
0275           uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask;
0276           uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0277           uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0278           if (rID_temp != ::pixelClustering::invalidModuleId)
0279             rID = rID_temp;
0280           break;
0281         }
0282         default:
0283           break;
0284       };
0285 
0286       return rID;
0287     }
0288 
0289     // Kernel to perform Raw to Digi conversion
0290     template <bool debug = false>
0291     struct RawToDigi_kernel {
0292       template <typename TAcc>
0293       ALPAKA_FN_ACC void operator()(const TAcc &acc,
0294                                     const SiPixelMappingSoAConstView &cablingMap,
0295                                     const unsigned char *modToUnp,
0296                                     const uint32_t wordCounter,
0297                                     const uint32_t *word,
0298                                     const uint8_t *fedIds,
0299                                     SiPixelDigisSoAView digisView,
0300                                     SiPixelDigiErrorsSoAView err,
0301                                     bool useQualityInfo,
0302                                     bool includeErrors) const {
0303         // FIXME there is no guarantee that this is initialised to 0 before any of the atomicInc happens
0304         if (cms::alpakatools::once_per_grid(acc))
0305           err.size() = 0;
0306 
0307         for (auto gIndex : cms::alpakatools::uniform_elements(acc, wordCounter)) {
0308           auto dvgi = digisView[gIndex];
0309           dvgi.xx() = 0;
0310           dvgi.yy() = 0;
0311           dvgi.adc() = 0;
0312 
0313           // initialise the errors
0314           err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0};
0315 
0316           uint8_t fedId = fedIds[gIndex / 2];  // +1200;
0317 
0318           // initialize (too many coninue below)
0319           dvgi.pdigi() = 0;
0320           dvgi.rawIdArr() = 0;
0321           dvgi.moduleId() = ::pixelClustering::invalidModuleId;
0322 
0323           uint32_t ww = word[gIndex];  // Array containing 32 bit raw data
0324           if (ww == 0) {
0325             // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization
0326             continue;
0327           }
0328 
0329           uint32_t link = sipixelconstants::getLink(ww);  // Extract link
0330           uint32_t roc = sipixelconstants::getROC(ww);    // Extract ROC in link
0331 
0332           uint8_t errorType = checkROC<debug>(ww, fedId, link, cablingMap);
0333           bool skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0);
0334           if (includeErrors and skipROC) {
0335             uint32_t rawId = getErrRawID<debug>(fedId, ww, errorType, cablingMap);
0336             if (rawId != 0xffffffff)  // Store errors only for valid DetIds
0337             {
0338               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, errorType, fedId};
0339               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0340             }
0341             continue;
0342           }
0343 
0344           // Check for spurious channels
0345           if (roc > ::pixelDetails::MAX_ROC or link > ::pixelDetails::MAX_LINK) {
0346             uint32_t rawId = getRawId(cablingMap, fedId, link, 1).rawId;
0347             if constexpr (debug) {
0348               printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex);
0349             }
0350             if (roc > ::pixelDetails::MAX_ROC and roc < 25) {
0351               uint8_t error = conversionError<debug>(fedId, 2);
0352               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0353               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0354             }
0355             continue;
0356           }
0357 
0358           uint32_t index =
0359               fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + roc;
0360           if (useQualityInfo) {
0361             skipROC = cablingMap.badRocs()[index];
0362             if (skipROC)
0363               continue;
0364           }
0365           skipROC = modToUnp[index];
0366           if (skipROC)
0367             continue;
0368 
0369           ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc);
0370           uint32_t rawId = detId.rawId;
0371           uint32_t layer = 0;
0372           int side = 0, panel = 0, module = 0;
0373           bool barrel = isBarrel(rawId);
0374 
0375           if (barrel) {
0376             layer = (rawId >> ::pixelDetails::layerStartBit) & ::pixelDetails::layerMask;
0377             module = (rawId >> ::pixelDetails::moduleStartBit) & ::pixelDetails::moduleMask;
0378             side = (module < 5) ? -1 : 1;
0379           } else {
0380             // endcap ids
0381             layer = 0;
0382             panel = (rawId >> ::pixelDetails::panelStartBit) & ::pixelDetails::panelMask;
0383             side = (panel == 1) ? -1 : 1;
0384           }
0385 
0386           ::pixelDetails::Pixel localPix;
0387           if (layer == 1) {
0388             // Special case of barrel layer 1
0389             uint32_t col = sipixelconstants::getCol(ww);
0390             uint32_t row = sipixelconstants::getRow(ww);
0391             localPix.row = row;
0392             localPix.col = col;
0393             if (includeErrors and not rocRowColIsValid(row, col)) {
0394               uint8_t error = conversionError<debug>(fedId, 3);
0395               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0396               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0397               if constexpr (debug)
0398                 printf("BPIX1 Error status: %i\n", error);
0399               continue;
0400             }
0401           } else {
0402             // Other layers with double columns
0403             uint32_t dcol = sipixelconstants::getDCol(ww);
0404             uint32_t pxid = sipixelconstants::getPxId(ww);
0405             uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2;
0406             uint32_t col = dcol * 2 + pxid % 2;
0407             localPix.row = row;
0408             localPix.col = col;
0409             if (includeErrors and not dcolIsValid(dcol, pxid)) {
0410               uint8_t error = conversionError<debug>(fedId, 3);
0411               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0412               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0413               if constexpr (debug)
0414                 printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc);
0415               continue;
0416             }
0417           }
0418 
0419           ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, detId.rocInDet, localPix);
0420           dvgi.xx() = globalPix.row;  // origin shifting by 1 0-159
0421           dvgi.yy() = globalPix.col;  // origin shifting by 1 0-415
0422           dvgi.adc() = sipixelconstants::getADC(ww);
0423           dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc());
0424           dvgi.moduleId() = detId.moduleId;
0425           dvgi.rawIdArr() = rawId;
0426         }  // end of stride on grid
0427 
0428       }  // end of Raw to Digi kernel operator()
0429     };   // end of Raw to Digi struct
0430 
0431     template <typename TrackerTraits>
0432     struct FillHitsModuleStart {
0433       template <typename TAcc>
0434       ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const {
0435         ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < 2048);  // easy to extend at least till 32*1024
0436 
0437         constexpr int numberOfModules = TrackerTraits::numberOfModules;
0438         constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule;
0439 
0440 #ifndef NDEBUG
0441         [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
0442         ALPAKA_ASSERT_ACC(0 == blockIdxLocal);
0443         [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
0444         ALPAKA_ASSERT_ACC(1 == gridDimension);
0445 #endif
0446 
0447         // limit to maxHitsInModule;
0448         for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules)) {
0449           clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule());
0450         }
0451 
0452         constexpr bool isPhase2 = std::is_base_of<pixelTopology::Phase2, TrackerTraits>::value;
0453         constexpr auto leftModules = isPhase2 ? 1024 : numberOfModules - 1024;
0454 
0455         auto &&ws = alpaka::declareSharedVar<uint32_t[32], __COUNTER__>(acc);
0456 
0457         cms::alpakatools::blockPrefixScan(
0458             acc, clus_view.clusModuleStart() + 1, clus_view.clusModuleStart() + 1, 1024, ws);
0459 
0460         cms::alpakatools::blockPrefixScan(
0461             acc, clus_view.clusModuleStart() + 1024 + 1, clus_view.clusModuleStart() + 1024 + 1, leftModules, ws);
0462 
0463         if constexpr (isPhase2) {
0464           cms::alpakatools::blockPrefixScan(
0465               acc, clus_view.clusModuleStart() + 2048 + 1, clus_view.clusModuleStart() + 2048 + 1, 1024, ws);
0466           cms::alpakatools::blockPrefixScan(acc,
0467                                             clus_view.clusModuleStart() + 3072 + 1,
0468                                             clus_view.clusModuleStart() + 3072 + 1,
0469                                             numberOfModules - 3072,
0470                                             ws);
0471         }
0472 
0473         constexpr auto lastModule = isPhase2 ? 2049u : numberOfModules + 1;
0474         for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 1025u, lastModule)) {
0475           clus_view[i].clusModuleStart() += clus_view[1024].clusModuleStart();
0476         }
0477         alpaka::syncBlockThreads(acc);
0478 
0479         if constexpr (isPhase2) {
0480           for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 2049u, 3073u)) {
0481             clus_view[i].clusModuleStart() += clus_view[2048].clusModuleStart();
0482           }
0483           alpaka::syncBlockThreads(acc);
0484 
0485           for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 3073u, numberOfModules + 1)) {
0486             clus_view[i].clusModuleStart() += clus_view[3072].clusModuleStart();
0487           }
0488           alpaka::syncBlockThreads(acc);
0489         }
0490 #ifdef GPU_DEBUG
0491         ALPAKA_ASSERT_ACC(0 == clus_view[0].moduleStart());
0492         auto c0 = std::min(maxHitsInModule, clus_view[1].clusModuleStart());
0493         ALPAKA_ASSERT_ACC(c0 == clus_view[1].moduleStart());
0494         ALPAKA_ASSERT_ACC(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart());
0495         ALPAKA_ASSERT_ACC(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart());
0496         ALPAKA_ASSERT_ACC(clus_view[numberOfModules].moduleStart() >= clus_view[1025].moduleStart());
0497 
0498         for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules + 1)) {
0499           if (0 != i)
0500             ALPAKA_ASSERT_ACC(clus_view[i].moduleStart() >= clus_view[i - 1].moduleStart());
0501           // Check BPX2 (1), FP1 (4)
0502           constexpr auto bpix2 = TrackerTraits::layerStart[1];
0503           constexpr auto fpix1 = TrackerTraits::layerStart[4];
0504           if (i == bpix2 || i == fpix1)
0505             printf("moduleStart %d %d\n", i, clus_view[i].moduleStart());
0506         }
0507 #endif
0508         // avoid overflow
0509         constexpr auto MAX_HITS = TrackerTraits::maxNumberOfHits;
0510         for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules + 1)) {
0511           if (clus_view[i].clusModuleStart() > MAX_HITS)
0512             clus_view[i].clusModuleStart() = MAX_HITS;
0513         }
0514 
0515       }  // end of FillHitsModuleStart kernel operator()
0516     };   // end of FillHitsModuleStart struct
0517 
0518     // Interface to outside
0519     template <typename TrackerTraits>
0520     void SiPixelRawToClusterKernel<TrackerTraits>::makePhase1ClustersAsync(
0521         Queue &queue,
0522         const SiPixelClusterThresholds clusterThresholds,
0523         const SiPixelMappingSoAConstView &cablingMap,
0524         const unsigned char *modToUnp,
0525         const SiPixelGainCalibrationForHLTSoAConstView &gains,
0526         const WordFedAppender &wordFed,
0527         const uint32_t wordCounter,
0528         const uint32_t fedCounter,
0529         bool useQualityInfo,
0530         bool includeErrors,
0531         bool debug) {
0532       nDigis = wordCounter;
0533 
0534 #ifdef GPU_DEBUG
0535       std::cout << "decoding " << wordCounter << " digis." << std::endl;
0536 #endif
0537       constexpr int numberOfModules = TrackerTraits::numberOfModules;
0538       digis_d = SiPixelDigisSoACollection(wordCounter, queue);
0539       if (includeErrors) {
0540         digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue);
0541       }
0542       clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0543       // protect in case of empty event....
0544       if (wordCounter) {
0545         const int threadsPerBlockOrElementsPerThread =
0546             cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? 32 : 512;
0547         // fill it all
0548         const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread);
0549         const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0550         assert(0 == wordCounter % 2);
0551         // wordCounter is the total no of words in each event to be trasfered on device
0552         auto word_d = cms::alpakatools::make_device_buffer<uint32_t[]>(queue, wordCounter);
0553         // NB: IMPORTANT: fedId_d: In legacy, wordCounter elements are allocated.
0554         // However, only the first half of elements end up eventually used:
0555         // hence, here, only wordCounter/2 elements are allocated.
0556         auto fedId_d = cms::alpakatools::make_device_buffer<uint8_t[]>(queue, wordCounter / 2);
0557         alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter);
0558         alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2);
0559         // Launch rawToDigi kernel
0560         if (debug) {
0561           alpaka::exec<Acc1D>(queue,
0562                               workDiv,
0563                               RawToDigi_kernel<true>{},
0564                               cablingMap,
0565                               modToUnp,
0566                               wordCounter,
0567                               word_d.data(),
0568                               fedId_d.data(),
0569                               digis_d->view(),
0570                               digiErrors_d->view(),
0571                               useQualityInfo,
0572                               includeErrors);
0573         } else {
0574           alpaka::exec<Acc1D>(queue,
0575                               workDiv,
0576                               RawToDigi_kernel<false>{},
0577                               cablingMap,
0578                               modToUnp,
0579                               wordCounter,
0580                               word_d.data(),
0581                               fedId_d.data(),
0582                               digis_d->view(),
0583                               digiErrors_d->view(),
0584                               useQualityInfo,
0585                               includeErrors);
0586         }
0587 
0588 #ifdef GPU_DEBUG
0589         alpaka::wait(queue);
0590         std::cout << "RawToDigi_kernel was run smoothly!" << std::endl;
0591 #endif
0592       }
0593       // End of Raw2Digi and passing data for clustering
0594 
0595       {
0596         // clusterizer
0597         using namespace pixelClustering;
0598         // calibrations
0599         using namespace calibPixel;
0600         const int threadsPerBlockOrElementsPerThread = []() {
0601           if constexpr (std::is_same_v<Device, alpaka_common::DevHost>) {
0602             // NB: MPORTANT: This could be tuned to benefit from innermost loop.
0603             return 32;
0604           } else {
0605             return 256;
0606           }
0607         }();
0608         const auto blocks = cms::alpakatools::divide_up_by(std::max<int>(wordCounter, numberOfModules),
0609                                                            threadsPerBlockOrElementsPerThread);
0610         const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0611 
0612         if (debug) {
0613           alpaka::exec<Acc1D>(queue,
0614                               workDiv,
0615                               CalibDigis<true>{},
0616                               clusterThresholds,
0617                               digis_d->view(),
0618                               clusters_d->view(),
0619                               gains,
0620                               wordCounter);
0621         } else {
0622           alpaka::exec<Acc1D>(queue,
0623                               workDiv,
0624                               CalibDigis<false>{},
0625                               clusterThresholds,
0626                               digis_d->view(),
0627                               clusters_d->view(),
0628                               gains,
0629                               wordCounter);
0630         }
0631 #ifdef GPU_DEBUG
0632         alpaka::wait(queue);
0633         std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0634                   << " threadsPerBlockOrElementsPerThread\n";
0635 #endif
0636 
0637         alpaka::exec<Acc1D>(
0638             queue, workDiv, CountModules<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0639 
0640         auto moduleStartFirstElement =
0641             cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u);
0642         alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0643 
0644         const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0645         const auto workDivMaxNumModules =
0646             cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0647 #ifdef GPU_DEBUG
0648         std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0649                   << " threadsPerBlockOrElementsPerThread\n";
0650 #endif
0651         alpaka::exec<Acc1D>(
0652             queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0653 #ifdef GPU_DEBUG
0654         alpaka::wait(queue);
0655 #endif
0656 
0657         constexpr auto threadsPerBlockChargeCut = 256;
0658         const auto workDivChargeCut = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockChargeCut);
0659         // apply charge cut
0660         alpaka::exec<Acc1D>(queue,
0661                             workDivChargeCut,
0662                             ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0663                             digis_d->view(),
0664                             clusters_d->view(),
0665                             clusterThresholds,
0666                             wordCounter);
0667         // count the module start indices already here (instead of
0668         // rechits) so that the number of clusters/hits can be made
0669         // available in the rechit producer without additional points of
0670         // synchronization/ExternalWork
0671 
0672         // MUST be ONE block
0673         const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
0674         alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
0675 
0676         // last element holds the number of all clusters
0677         const auto clusModuleStartLastElement = cms::alpakatools::make_device_view(
0678             alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0679         constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0680 
0681         // element startBPIX2 hold the number of clusters until BPIX2
0682         const auto bpix2ClusterStart = cms::alpakatools::make_device_view(
0683             alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0684         auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0685         alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0686 
0687         auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0688         alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0689 
0690 #ifdef GPU_DEBUG
0691         alpaka::wait(queue);
0692         std::cout << "SiPixelClusterizerAlpaka results:" << std::endl
0693                   << " > no. of digis: " << nDigis << std::endl
0694                   << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0695                   << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0696                   << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0697 #endif
0698 
0699       }  // end clusterizer scope
0700     }
0701 
0702     template <typename TrackerTraits>
0703     void SiPixelRawToClusterKernel<TrackerTraits>::makePhase2ClustersAsync(
0704         Queue &queue,
0705         const SiPixelClusterThresholds clusterThresholds,
0706         SiPixelDigisSoAView &digis_view,
0707         const uint32_t numDigis) {
0708       using namespace pixelClustering;
0709       using pixelTopology::Phase2;
0710       nDigis = numDigis;
0711       constexpr int numberOfModules = pixelTopology::Phase2::numberOfModules;
0712       clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0713       const auto threadsPerBlockOrElementsPerThread = 512;
0714       const auto blocks =
0715           cms::alpakatools::divide_up_by(std::max<int>(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread);
0716       const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0717 
0718       alpaka::exec<Acc1D>(
0719           queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis);
0720 
0721 #ifdef GPU_DEBUG
0722       alpaka::wait(queue);
0723       std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0724                 << " threadsPerBlockOrElementsPerThread\n";
0725 #endif
0726       alpaka::exec<Acc1D>(
0727           queue, workDiv, CountModules<pixelTopology::Phase2>{}, digis_view, clusters_d->view(), numDigis);
0728 
0729       auto moduleStartFirstElement =
0730           cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u);
0731       alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0732 
0733       const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0734       const auto workDivMaxNumModules =
0735           cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0736 #ifdef GPU_DEBUG
0737       alpaka::wait(queue);
0738       std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0739                 << " threadsPerBlockOrElementsPerThread\n";
0740 #endif
0741       alpaka::exec<Acc1D>(
0742           queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_view, clusters_d->view(), numDigis);
0743 #ifdef GPU_DEBUG
0744       alpaka::wait(queue);
0745 #endif
0746 
0747       // apply charge cut
0748       alpaka::exec<Acc1D>(queue,
0749                           workDivMaxNumModules,
0750                           ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0751                           digis_view,
0752                           clusters_d->view(),
0753                           clusterThresholds,
0754                           numDigis);
0755 
0756       // count the module start indices already here (instead of
0757       // rechits) so that the number of clusters/hits can be made
0758       // available in the rechit producer without additional points of
0759       // synchronization/ExternalWork
0760 
0761       // MUST be ONE block
0762       const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
0763       alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
0764 
0765       // last element holds the number of all clusters
0766       const auto clusModuleStartLastElement = cms::alpakatools::make_device_view(
0767           alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0768       constexpr int startBPIX2 = pixelTopology::Phase2::layerStart[1];
0769       // element startBPIX2 hold the number of clusters until BPIX2
0770       const auto bpix2ClusterStart = cms::alpakatools::make_device_view(
0771           alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0772       auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0773       alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0774 
0775       auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0776       alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0777 
0778 #ifdef GPU_DEBUG
0779       alpaka::wait(queue);
0780       std::cout << "SiPixelPhase2DigiToCluster: results \n"
0781                 << " > no. of digis: " << numDigis << std::endl
0782                 << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0783                 << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0784                 << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0785 #endif
0786     }  //
0787 
0788     template class SiPixelRawToClusterKernel<pixelTopology::Phase1>;
0789     template class SiPixelRawToClusterKernel<pixelTopology::Phase2>;
0790     template class SiPixelRawToClusterKernel<pixelTopology::HIonPhase1>;
0791 
0792   }  // namespace pixelDetails
0793 
0794 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE