Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-05-10 02:21:08

0001 // C++ includes
0002 #include <algorithm>
0003 #include <cassert>
0004 #include <cstdint>
0005 #include <cstdio>
0006 #include <type_traits>
0007 
0008 // Alpaka includes
0009 #include <alpaka/alpaka.hpp>
0010 
0011 // CMSSW includes
0012 #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h"
0013 #include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h"
0014 #include "DataFormats/DetId/interface/DetId.h"
0015 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0016 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0017 #include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h"
0018 #include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h"
0019 #include "DataFormats/SiPixelDigi/interface/SiPixelDigiConstants.h"
0020 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h"
0021 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0022 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h"
0023 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h"
0024 #include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h"
0025 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0026 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0027 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0028 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0029 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0030 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
0031 
0032 // local includes
0033 #include "CalibPixel.h"
0034 #include "ClusterChargeCut.h"
0035 #include "PixelClustering.h"
0036 #include "SiPixelRawToClusterKernel.h"
0037 
0038 // #define GPU_DEBUG
0039 
0040 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0041   namespace pixelDetails {
0042 
0043     ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) {
0044       return (PixelSubdetector::PixelBarrel == ((rawId >> DetId::kSubdetOffset) & DetId::kSubdetMask));
0045     }
0046 
0047     ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap,
0048                                                     uint8_t fed,
0049                                                     uint32_t link,
0050                                                     uint32_t roc) {
0051       using namespace ::pixelDetails;
0052       uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc;
0053       DetIdGPU detId = {cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]};
0054       return detId;
0055     }
0056 
0057     //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html
0058     //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071
0059     // Convert local pixel to pixelDetails::global pixel
0060     ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(
0061         bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) {
0062       int slopeRow = 0, slopeCol = 0;
0063       int rowOffset = 0, colOffset = 0;
0064 
0065       if (bpix) {
0066         if (side == -1 && layer != 1) {  // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1
0067           if (rocIdInDetUnit < 8) {
0068             slopeRow = 1;
0069             slopeCol = -1;
0070             rowOffset = 0;
0071             colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0072           } else {
0073             slopeRow = -1;
0074             slopeCol = 1;
0075             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0076             colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0077           }       // if roc
0078         } else {  // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1
0079           if (rocIdInDetUnit < 8) {
0080             slopeRow = -1;
0081             slopeCol = 1;
0082             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0083             colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc;
0084           } else {
0085             slopeRow = 1;
0086             slopeCol = -1;
0087             rowOffset = 0;
0088             colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0089           }
0090         }
0091 
0092       } else {             // fpix
0093         if (side == -1) {  // pannel 1
0094           if (rocIdInDetUnit < 8) {
0095             slopeRow = 1;
0096             slopeCol = -1;
0097             rowOffset = 0;
0098             colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0099           } else {
0100             slopeRow = -1;
0101             slopeCol = 1;
0102             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0103             colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0104           }
0105         } else {  // pannel 2
0106           if (rocIdInDetUnit < 8) {
0107             slopeRow = 1;
0108             slopeCol = -1;
0109             rowOffset = 0;
0110             colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0111           } else {
0112             slopeRow = -1;
0113             slopeCol = 1;
0114             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0115             colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0116           }
0117 
0118         }  // side
0119       }
0120 
0121       uint32_t gRow = rowOffset + slopeRow * local.row;
0122       uint32_t gCol = colOffset + slopeCol * local.col;
0123       // inside frameConversion row: gRow, column: gCol
0124       ::pixelDetails::Pixel global = {gRow, gCol};
0125       return global;
0126     }
0127 
0128     // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
0129     template <bool debug = false>
0130     ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status) {
0131       uint8_t errorType = 0;
0132 
0133       switch (status) {
0134         case 1: {
0135           if constexpr (debug)
0136             printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId);
0137           errorType = 35;
0138           break;
0139         }
0140         case 2: {
0141           if constexpr (debug)
0142             printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId);
0143           errorType = 36;
0144           break;
0145         }
0146         case 3: {
0147           if constexpr (debug)
0148             printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId);
0149           errorType = 37;
0150           break;
0151         }
0152         case 4: {
0153           if constexpr (debug)
0154             printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId);
0155           errorType = 38;
0156           break;
0157         }
0158         default:
0159           if constexpr (debug)
0160             printf("Cabling check returned unexpected result, status = %i\n", status);
0161       };
0162 
0163       return errorType;
0164     }
0165 
0166     ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) {
0167       /// row and column in ROC representation
0168       return ((rocRow < ::pixelDetails::numRowsInRoc) & (rocCol < ::pixelDetails::numColsInRoc));
0169     }
0170 
0171     ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); }
0172 
0173     // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
0174     template <bool debug = false>
0175     ALPAKA_FN_ACC uint8_t
0176     checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap) {
0177       uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask;
0178       if (errorType < 25)
0179         return 0;
0180       bool errorFound = false;
0181 
0182       switch (errorType) {
0183         case 25: {
0184           errorFound = true;
0185           uint32_t index =
0186               fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + 1;
0187           if (index > 1 && index <= cablingMap.size()) {
0188             if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index]))
0189               errorFound = false;
0190           }
0191           if constexpr (debug)
0192             if (errorFound)
0193               printf("Invalid ROC = 25 found (errorType = 25)\n");
0194           break;
0195         }
0196         case 26: {
0197           if constexpr (debug)
0198             printf("Gap word found (errorType = 26)\n");
0199           break;
0200         }
0201         case 27: {
0202           if constexpr (debug)
0203             printf("Dummy word found (errorType = 27)\n");
0204           break;
0205         }
0206         case 28: {
0207           if constexpr (debug)
0208             printf("Error fifo nearly full (errorType = 28)\n");
0209           errorFound = true;
0210           break;
0211         }
0212         case 29: {
0213           if constexpr (debug)
0214             printf("Timeout on a channel (errorType = 29)\n");
0215           if (!((errorWord >> sipixelconstants::OMIT_ERR_shift) & sipixelconstants::OMIT_ERR_mask)) {
0216             if constexpr (debug)
0217               printf("...2nd errorType=29 error, skip\n");
0218             break;
0219           }
0220           errorFound = true;
0221           break;
0222         }
0223         case 30: {
0224           if constexpr (debug)
0225             printf("TBM error trailer (errorType = 30)\n");
0226           int stateMatch_bits = 4;
0227           int stateMatch_shift = 8;
0228           uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits);
0229           int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask;
0230           if (stateMatch != 1 && stateMatch != 8) {
0231             if constexpr (debug)
0232               printf("FED error 30 with unexpected State Bits (errorType = 30)\n");
0233             break;
0234           }
0235           if (stateMatch == 1)
0236             errorType = 40;  // 1=Overflow -> 40, 8=number of ROCs -> 30
0237           errorFound = true;
0238           break;
0239         }
0240         case 31: {
0241           if constexpr (debug)
0242             printf("Event number error (errorType = 31)\n");
0243           errorFound = true;
0244           break;
0245         }
0246         default:
0247           errorFound = false;
0248       };
0249 
0250       return errorFound ? errorType : 0;
0251     }
0252 
0253     // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
0254     template <bool debug = false>
0255     ALPAKA_FN_ACC uint32_t
0256     getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap) {
0257       uint32_t rID = 0xffffffff;
0258 
0259       switch (errorType) {
0260         case 25:
0261         case 29:
0262         case 30:
0263         case 31:
0264         case 36:
0265         case 40: {
0266           uint32_t roc = 1;
0267           uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0268           uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0269           if (rID_temp != ::pixelClustering::invalidModuleId)
0270             rID = rID_temp;
0271           break;
0272         }
0273         case 37:
0274         case 38: {
0275           uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask;
0276           uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0277           uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0278           if (rID_temp != ::pixelClustering::invalidModuleId)
0279             rID = rID_temp;
0280           break;
0281         }
0282         default:
0283           break;
0284       };
0285 
0286       return rID;
0287     }
0288 
0289     // Kernel to perform Raw to Digi conversion
0290     template <bool debug = false>
0291     struct RawToDigi_kernel {
0292       template <typename TAcc>
0293       ALPAKA_FN_ACC void operator()(const TAcc &acc,
0294                                     const SiPixelMappingSoAConstView &cablingMap,
0295                                     const unsigned char *modToUnp,
0296                                     const uint32_t wordCounter,
0297                                     const uint32_t *word,
0298                                     const uint8_t *fedIds,
0299                                     SiPixelDigisSoAView digisView,
0300                                     SiPixelDigiErrorsSoAView err,
0301                                     bool useQualityInfo,
0302                                     bool includeErrors) const {
0303         // FIXME there is no guarantee that this is initialised to 0 before any of the atomicInc happens
0304         if (cms::alpakatools::once_per_grid(acc))
0305           err.size() = 0;
0306 
0307         for (auto gIndex : cms::alpakatools::uniform_elements(acc, wordCounter)) {
0308           auto dvgi = digisView[gIndex];
0309           dvgi.xx() = 0;
0310           dvgi.yy() = 0;
0311           dvgi.adc() = 0;
0312 
0313           // initialise the errors
0314           err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0};
0315 
0316           uint8_t fedId = fedIds[gIndex / 2];  // +1200;
0317 
0318           // initialize (too many coninue below)
0319           dvgi.pdigi() = 0;
0320           dvgi.rawIdArr() = 0;
0321           dvgi.moduleId() = ::pixelClustering::invalidModuleId;
0322 
0323           uint32_t ww = word[gIndex];  // Array containing 32 bit raw data
0324           if (ww == 0) {
0325             // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization
0326             continue;
0327           }
0328 
0329           uint32_t link = sipixelconstants::getLink(ww);  // Extract link
0330           uint32_t roc = sipixelconstants::getROC(ww);    // Extract ROC in link
0331 
0332           uint8_t errorType = checkROC<debug>(ww, fedId, link, cablingMap);
0333           bool skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0);
0334           if (includeErrors and skipROC) {
0335             uint32_t rawId = getErrRawID<debug>(fedId, ww, errorType, cablingMap);
0336             if (rawId != 0xffffffff)  // Store errors only for valid DetIds
0337             {
0338               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, errorType, fedId};
0339               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0340             }
0341             continue;
0342           }
0343 
0344           // Check for spurious channels
0345           if (roc > ::pixelDetails::MAX_ROC or link > ::pixelDetails::MAX_LINK) {
0346             uint32_t rawId = getRawId(cablingMap, fedId, link, 1).rawId;
0347             if constexpr (debug) {
0348               printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex);
0349             }
0350             if (roc > ::pixelDetails::MAX_ROC and roc < 25) {
0351               uint8_t error = conversionError<debug>(fedId, 2);
0352               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0353               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0354             }
0355             continue;
0356           }
0357 
0358           uint32_t index =
0359               fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + roc;
0360           if (useQualityInfo) {
0361             skipROC = cablingMap.badRocs()[index];
0362             if (skipROC)
0363               continue;
0364           }
0365           skipROC = modToUnp[index];
0366           if (skipROC)
0367             continue;
0368 
0369           ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc);
0370           uint32_t rawId = detId.rawId;
0371           uint32_t layer = 0;
0372           int side = 0, panel = 0, module = 0;
0373           bool barrel = isBarrel(rawId);
0374 
0375           if (barrel) {
0376             layer = (rawId >> ::pixelDetails::layerStartBit) & ::pixelDetails::layerMask;
0377             module = (rawId >> ::pixelDetails::moduleStartBit) & ::pixelDetails::moduleMask;
0378             side = (module < 5) ? -1 : 1;
0379           } else {
0380             // endcap ids
0381             layer = 0;
0382             panel = (rawId >> ::pixelDetails::panelStartBit) & ::pixelDetails::panelMask;
0383             side = (panel == 1) ? -1 : 1;
0384           }
0385 
0386           ::pixelDetails::Pixel localPix;
0387           if (layer == 1) {
0388             // Special case of barrel layer 1
0389             uint32_t col = sipixelconstants::getCol(ww);
0390             uint32_t row = sipixelconstants::getRow(ww);
0391             localPix.row = row;
0392             localPix.col = col;
0393             if (includeErrors and not rocRowColIsValid(row, col)) {
0394               uint8_t error = conversionError<debug>(fedId, 3);
0395               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0396               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0397               if constexpr (debug)
0398                 printf("BPIX1 Error status: %i\n", error);
0399               continue;
0400             }
0401           } else {
0402             // Other layers with double columns
0403             uint32_t dcol = sipixelconstants::getDCol(ww);
0404             uint32_t pxid = sipixelconstants::getPxId(ww);
0405             uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2;
0406             uint32_t col = dcol * 2 + pxid % 2;
0407             localPix.row = row;
0408             localPix.col = col;
0409             if (includeErrors and not dcolIsValid(dcol, pxid)) {
0410               uint8_t error = conversionError<debug>(fedId, 3);
0411               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0412               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0413               if constexpr (debug)
0414                 printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc);
0415               continue;
0416             }
0417           }
0418 
0419           ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, detId.rocInDet, localPix);
0420           dvgi.xx() = globalPix.row;  // origin shifting by 1 0-159
0421           dvgi.yy() = globalPix.col;  // origin shifting by 1 0-415
0422           dvgi.adc() = sipixelconstants::getADC(ww);
0423           dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc());
0424           dvgi.moduleId() = detId.moduleId;
0425           dvgi.rawIdArr() = rawId;
0426         }  // end of stride on grid
0427 
0428       }  // end of Raw to Digi kernel operator()
0429     };   // end of Raw to Digi struct
0430 
0431     template <typename TrackerTraits>
0432     struct FillHitsModuleStart {
0433       template <typename TAcc>
0434       ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const {
0435         constexpr bool isPhase2 = std::is_base_of<pixelTopology::Phase2, TrackerTraits>::value;
0436 
0437         // For Phase1 there are 1856 pixel modules
0438         // For Phase2 there are 3872 pixel modules
0439         // For whichever setup with more modules it would be
0440         // easy to extend at least till  32*1024
0441 
0442         constexpr uint16_t prefixScanUpperLimit = isPhase2 ? 4096 : 2048;
0443         ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < prefixScanUpperLimit);
0444 
0445         constexpr int numberOfModules = TrackerTraits::numberOfModules;
0446         constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule;
0447 
0448 #ifndef NDEBUG
0449         [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
0450         ALPAKA_ASSERT_ACC(0 == blockIdxLocal);
0451         [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
0452         ALPAKA_ASSERT_ACC(1 == gridDimension);
0453 #endif
0454 
0455         // limit to maxHitsInModule;
0456         for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules)) {
0457           clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule());
0458         }
0459 
0460         constexpr auto leftModules = isPhase2 ? 1024 : numberOfModules - 1024;
0461 
0462         auto &&ws = alpaka::declareSharedVar<uint32_t[32], __COUNTER__>(acc);
0463 
0464         cms::alpakatools::blockPrefixScan(
0465             acc, clus_view.clusModuleStart() + 1, clus_view.clusModuleStart() + 1, 1024, ws);
0466 
0467         cms::alpakatools::blockPrefixScan(
0468             acc, clus_view.clusModuleStart() + 1024 + 1, clus_view.clusModuleStart() + 1024 + 1, leftModules, ws);
0469 
0470         if constexpr (isPhase2) {
0471           cms::alpakatools::blockPrefixScan(
0472               acc, clus_view.clusModuleStart() + 2048 + 1, clus_view.clusModuleStart() + 2048 + 1, 1024, ws);
0473           cms::alpakatools::blockPrefixScan(acc,
0474                                             clus_view.clusModuleStart() + 3072 + 1,
0475                                             clus_view.clusModuleStart() + 3072 + 1,
0476                                             numberOfModules - 3072,
0477                                             ws);
0478         }
0479 
0480         constexpr auto lastModule = isPhase2 ? 2049u : numberOfModules + 1;
0481         for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 1025u, lastModule)) {
0482           clus_view[i].clusModuleStart() += clus_view[1024].clusModuleStart();
0483         }
0484         alpaka::syncBlockThreads(acc);
0485 
0486         if constexpr (isPhase2) {
0487           for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 2049u, 3073u)) {
0488             clus_view[i].clusModuleStart() += clus_view[2048].clusModuleStart();
0489           }
0490           alpaka::syncBlockThreads(acc);
0491 
0492           for (uint32_t i : cms::alpakatools::independent_group_elements(acc, 3073u, numberOfModules + 1)) {
0493             clus_view[i].clusModuleStart() += clus_view[3072].clusModuleStart();
0494           }
0495           alpaka::syncBlockThreads(acc);
0496         }
0497 #ifdef GPU_DEBUG
0498         ALPAKA_ASSERT_ACC(0 == clus_view[1].moduleStart());
0499         auto c0 = std::min(maxHitsInModule, clus_view[2].clusModuleStart());
0500         ALPAKA_ASSERT_ACC(c0 == clus_view[2].moduleStart());
0501         ALPAKA_ASSERT_ACC(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart());
0502         ALPAKA_ASSERT_ACC(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart());
0503         ALPAKA_ASSERT_ACC(clus_view[numberOfModules].moduleStart() >= clus_view[1025].moduleStart());
0504 
0505         for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules + 1)) {
0506           if (0 != i)
0507             ALPAKA_ASSERT_ACC(clus_view[i].moduleStart() >= clus_view[i - 1].moduleStart());
0508           // Check BPX2 (1), FP1 (4)
0509           constexpr auto bpix2 = TrackerTraits::layerStart[1];
0510           constexpr auto fpix1 = TrackerTraits::layerStart[4];
0511           if (i == bpix2 || i == fpix1)
0512             printf("moduleStart %d %d\n", i, clus_view[i].moduleStart());
0513         }
0514 
0515 #endif
0516 
0517       }  // end of FillHitsModuleStart kernel operator()
0518     };   // end of FillHitsModuleStart struct
0519 
0520     // Interface to outside
0521     template <typename TrackerTraits>
0522     void SiPixelRawToClusterKernel<TrackerTraits>::makePhase1ClustersAsync(
0523         Queue &queue,
0524         const SiPixelClusterThresholds clusterThresholds,
0525         const SiPixelMappingSoAConstView &cablingMap,
0526         const unsigned char *modToUnp,
0527         const SiPixelGainCalibrationForHLTSoAConstView &gains,
0528         const WordFedAppender &wordFed,
0529         const uint32_t wordCounter,
0530         const uint32_t fedCounter,
0531         bool useQualityInfo,
0532         bool includeErrors,
0533         bool debug) {
0534       nDigis = wordCounter;
0535 
0536 #ifdef GPU_DEBUG
0537       std::cout << "decoding " << wordCounter << " digis." << std::endl;
0538 #endif
0539       constexpr int numberOfModules = TrackerTraits::numberOfModules;
0540       digis_d = SiPixelDigisSoACollection(wordCounter, queue);
0541       if (includeErrors) {
0542         digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue);
0543       }
0544       clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0545       // protect in case of empty event....
0546       if (wordCounter) {
0547         const int threadsPerBlockOrElementsPerThread =
0548             cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? 32 : 512;
0549         // fill it all
0550         const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread);
0551         const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0552         assert(0 == wordCounter % 2);
0553         // wordCounter is the total no of words in each event to be trasfered on device
0554         auto word_d = cms::alpakatools::make_device_buffer<uint32_t[]>(queue, wordCounter);
0555         // NB: IMPORTANT: fedId_d: In legacy, wordCounter elements are allocated.
0556         // However, only the first half of elements end up eventually used:
0557         // hence, here, only wordCounter/2 elements are allocated.
0558         auto fedId_d = cms::alpakatools::make_device_buffer<uint8_t[]>(queue, wordCounter / 2);
0559         alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter);
0560         alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2);
0561         // Launch rawToDigi kernel
0562         if (debug) {
0563           alpaka::exec<Acc1D>(queue,
0564                               workDiv,
0565                               RawToDigi_kernel<true>{},
0566                               cablingMap,
0567                               modToUnp,
0568                               wordCounter,
0569                               word_d.data(),
0570                               fedId_d.data(),
0571                               digis_d->view(),
0572                               digiErrors_d->view(),
0573                               useQualityInfo,
0574                               includeErrors);
0575         } else {
0576           alpaka::exec<Acc1D>(queue,
0577                               workDiv,
0578                               RawToDigi_kernel<false>{},
0579                               cablingMap,
0580                               modToUnp,
0581                               wordCounter,
0582                               word_d.data(),
0583                               fedId_d.data(),
0584                               digis_d->view(),
0585                               digiErrors_d->view(),
0586                               useQualityInfo,
0587                               includeErrors);
0588         }
0589 
0590 #ifdef GPU_DEBUG
0591         alpaka::wait(queue);
0592         std::cout << "RawToDigi_kernel was run smoothly!" << std::endl;
0593 #endif
0594       }
0595       // End of Raw2Digi and passing data for clustering
0596 
0597       {
0598         // clusterizer
0599         using namespace pixelClustering;
0600         // calibrations
0601         using namespace calibPixel;
0602         const int threadsPerBlockOrElementsPerThread = []() {
0603           if constexpr (std::is_same_v<Device, alpaka_common::DevHost>) {
0604             // NB: MPORTANT: This could be tuned to benefit from innermost loop.
0605             return 32;
0606           } else {
0607             return 256;
0608           }
0609         }();
0610         const auto blocks = cms::alpakatools::divide_up_by(std::max<int>(wordCounter, numberOfModules),
0611                                                            threadsPerBlockOrElementsPerThread);
0612         const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0613 
0614         if (debug) {
0615           alpaka::exec<Acc1D>(queue,
0616                               workDiv,
0617                               CalibDigis<true>{},
0618                               clusterThresholds,
0619                               digis_d->view(),
0620                               clusters_d->view(),
0621                               gains,
0622                               wordCounter);
0623         } else {
0624           alpaka::exec<Acc1D>(queue,
0625                               workDiv,
0626                               CalibDigis<false>{},
0627                               clusterThresholds,
0628                               digis_d->view(),
0629                               clusters_d->view(),
0630                               gains,
0631                               wordCounter);
0632         }
0633 #ifdef GPU_DEBUG
0634         alpaka::wait(queue);
0635         std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0636                   << " threadsPerBlockOrElementsPerThread\n";
0637 #endif
0638 
0639         alpaka::exec<Acc1D>(
0640             queue, workDiv, CountModules<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0641 
0642         auto moduleStartFirstElement =
0643             cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u);
0644         alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0645 
0646         const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0647         const auto workDivMaxNumModules =
0648             cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0649 #ifdef GPU_DEBUG
0650         std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0651                   << " threadsPerBlockOrElementsPerThread\n";
0652 #endif
0653         alpaka::exec<Acc1D>(
0654             queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0655 #ifdef GPU_DEBUG
0656         alpaka::wait(queue);
0657 #endif
0658 
0659         constexpr auto threadsPerBlockChargeCut = 256;
0660         const auto workDivChargeCut = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockChargeCut);
0661         // apply charge cut
0662         alpaka::exec<Acc1D>(queue,
0663                             workDivChargeCut,
0664                             ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0665                             digis_d->view(),
0666                             clusters_d->view(),
0667                             clusterThresholds,
0668                             wordCounter);
0669         // count the module start indices already here (instead of
0670         // rechits) so that the number of clusters/hits can be made
0671         // available in the rechit producer without additional points of
0672         // synchronization/ExternalWork
0673 
0674         // MUST be ONE block
0675         const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
0676         alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
0677 
0678         // last element holds the number of all clusters
0679         const auto clusModuleStartLastElement = cms::alpakatools::make_device_view(
0680             alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0681         constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0682 
0683         // element startBPIX2 hold the number of clusters until BPIX2
0684         const auto bpix2ClusterStart = cms::alpakatools::make_device_view(
0685             alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0686         auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0687         alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0688 
0689         auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0690         alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0691 
0692 #ifdef GPU_DEBUG
0693         alpaka::wait(queue);
0694         std::cout << "SiPixelClusterizerAlpaka results:" << std::endl
0695                   << " > no. of digis: " << nDigis << std::endl
0696                   << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0697                   << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0698                   << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0699 #endif
0700 
0701       }  // end clusterizer scope
0702     }
0703 
0704     template <typename TrackerTraits>
0705     void SiPixelRawToClusterKernel<TrackerTraits>::makePhase2ClustersAsync(
0706         Queue &queue,
0707         const SiPixelClusterThresholds clusterThresholds,
0708         SiPixelDigisSoAView &digis_view,
0709         const uint32_t numDigis) {
0710       using namespace pixelClustering;
0711       using pixelTopology::Phase2;
0712       nDigis = numDigis;
0713       constexpr int numberOfModules = pixelTopology::Phase2::numberOfModules;
0714       clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0715       const auto threadsPerBlockOrElementsPerThread = 512;
0716       const auto blocks =
0717           cms::alpakatools::divide_up_by(std::max<int>(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread);
0718       const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0719 
0720       alpaka::exec<Acc1D>(
0721           queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis);
0722 
0723 #ifdef GPU_DEBUG
0724       alpaka::wait(queue);
0725       std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0726                 << " threadsPerBlockOrElementsPerThread\n";
0727 #endif
0728       alpaka::exec<Acc1D>(
0729           queue, workDiv, CountModules<pixelTopology::Phase2>{}, digis_view, clusters_d->view(), numDigis);
0730 
0731       auto moduleStartFirstElement =
0732           cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u);
0733       alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0734 
0735       const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0736       const auto workDivMaxNumModules =
0737           cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0738 #ifdef GPU_DEBUG
0739       alpaka::wait(queue);
0740       std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0741                 << " threadsPerBlockOrElementsPerThread\n";
0742 #endif
0743       alpaka::exec<Acc1D>(
0744           queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_view, clusters_d->view(), numDigis);
0745 #ifdef GPU_DEBUG
0746       alpaka::wait(queue);
0747 #endif
0748 
0749       // apply charge cut
0750       alpaka::exec<Acc1D>(queue,
0751                           workDivMaxNumModules,
0752                           ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0753                           digis_view,
0754                           clusters_d->view(),
0755                           clusterThresholds,
0756                           numDigis);
0757 
0758       // count the module start indices already here (instead of
0759       // rechits) so that the number of clusters/hits can be made
0760       // available in the rechit producer without additional points of
0761       // synchronization/ExternalWork
0762 
0763       // MUST be ONE block
0764       const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
0765       alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
0766 
0767       // last element holds the number of all clusters
0768       const auto clusModuleStartLastElement = cms::alpakatools::make_device_view(
0769           alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0770       constexpr int startBPIX2 = pixelTopology::Phase2::layerStart[1];
0771       // element startBPIX2 hold the number of clusters until BPIX2
0772       const auto bpix2ClusterStart = cms::alpakatools::make_device_view(
0773           alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0774       auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0775       alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0776 
0777       auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0778       alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0779 
0780 #ifdef GPU_DEBUG
0781       alpaka::wait(queue);
0782       std::cout << "SiPixelPhase2DigiToCluster: results \n"
0783                 << " > no. of digis: " << numDigis << std::endl
0784                 << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0785                 << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0786                 << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0787 #endif
0788     }  //
0789 
0790     template class SiPixelRawToClusterKernel<pixelTopology::Phase1>;
0791     template class SiPixelRawToClusterKernel<pixelTopology::Phase2>;
0792     template class SiPixelRawToClusterKernel<pixelTopology::HIonPhase1>;
0793 
0794   }  // namespace pixelDetails
0795 
0796 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE