Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-02-14 03:16:54

0001 // C++ includes
0002 #include <algorithm>
0003 #include <cassert>
0004 #include <cstdint>
0005 #include <cstdio>
0006 #include <type_traits>
0007 
0008 // Alpaka includes
0009 #include <alpaka/alpaka.hpp>
0010 
0011 // CMSSW includes
0012 #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h"
0013 #include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h"
0014 #include "DataFormats/DetId/interface/DetId.h"
0015 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0016 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0017 #include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h"
0018 #include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h"
0019 #include "DataFormats/SiPixelDigi/interface/SiPixelDigiConstants.h"
0020 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h"
0021 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0022 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h"
0023 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h"
0024 #include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h"
0025 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0026 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0027 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0028 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0029 #include "HeterogeneousCore/AlpakaInterface/interface/warpsize.h"
0030 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0031 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
0032 
0033 // local includes
0034 #include "CalibPixel.h"
0035 #include "ClusterChargeCut.h"
0036 #include "PixelClustering.h"
0037 #include "SiPixelRawToClusterKernel.h"
0038 
0039 //#define GPU_DEBUG
0040 
0041 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0042   namespace pixelDetails {
0043 
0044     ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) {
0045       return (PixelSubdetector::PixelBarrel == ((rawId >> DetId::kSubdetOffset) & DetId::kSubdetMask));
0046     }
0047 
0048     ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap,
0049                                                     uint8_t fed,
0050                                                     uint32_t link,
0051                                                     uint32_t roc) {
0052       using namespace ::pixelDetails;
0053       uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc;
0054       DetIdGPU detId = {cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]};
0055       return detId;
0056     }
0057 
0058     //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html
0059     //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071
0060     // Convert local pixel to pixelDetails::global pixel
0061     ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(
0062         bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) {
0063       int slopeRow = 0, slopeCol = 0;
0064       int rowOffset = 0, colOffset = 0;
0065 
0066       if (bpix) {
0067         if (side == -1 && layer != 1) {  // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1
0068           if (rocIdInDetUnit < 8) {
0069             slopeRow = 1;
0070             slopeCol = -1;
0071             rowOffset = 0;
0072             colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0073           } else {
0074             slopeRow = -1;
0075             slopeCol = 1;
0076             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0077             colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0078           }  // if roc
0079         } else {  // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1
0080           if (rocIdInDetUnit < 8) {
0081             slopeRow = -1;
0082             slopeCol = 1;
0083             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0084             colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc;
0085           } else {
0086             slopeRow = 1;
0087             slopeCol = -1;
0088             rowOffset = 0;
0089             colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0090           }
0091         }
0092 
0093       } else {             // fpix
0094         if (side == -1) {  // pannel 1
0095           if (rocIdInDetUnit < 8) {
0096             slopeRow = 1;
0097             slopeCol = -1;
0098             rowOffset = 0;
0099             colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0100           } else {
0101             slopeRow = -1;
0102             slopeCol = 1;
0103             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0104             colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0105           }
0106         } else {  // pannel 2
0107           if (rocIdInDetUnit < 8) {
0108             slopeRow = 1;
0109             slopeCol = -1;
0110             rowOffset = 0;
0111             colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0112           } else {
0113             slopeRow = -1;
0114             slopeCol = 1;
0115             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0116             colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0117           }
0118 
0119         }  // side
0120       }
0121 
0122       uint32_t gRow = rowOffset + slopeRow * local.row;
0123       uint32_t gCol = colOffset + slopeCol * local.col;
0124       // inside frameConversion row: gRow, column: gCol
0125       ::pixelDetails::Pixel global = {gRow, gCol};
0126       return global;
0127     }
0128 
0129     // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
0130     template <bool debug = false>
0131     ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status) {
0132       uint8_t errorType = 0;
0133 
0134       switch (status) {
0135         case 1: {
0136           if constexpr (debug)
0137             printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId);
0138           errorType = 35;
0139           break;
0140         }
0141         case 2: {
0142           if constexpr (debug)
0143             printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId);
0144           errorType = 36;
0145           break;
0146         }
0147         case 3: {
0148           if constexpr (debug)
0149             printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId);
0150           errorType = 37;
0151           break;
0152         }
0153         case 4: {
0154           if constexpr (debug)
0155             printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId);
0156           errorType = 38;
0157           break;
0158         }
0159         default:
0160           if constexpr (debug)
0161             printf("Cabling check returned unexpected result, status = %i\n", status);
0162       };
0163 
0164       return errorType;
0165     }
0166 
0167     ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) {
0168       /// row and column in ROC representation
0169       return ((rocRow < ::pixelDetails::numRowsInRoc) & (rocCol < ::pixelDetails::numColsInRoc));
0170     }
0171 
0172     ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); }
0173 
0174     // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
0175     template <bool debug = false>
0176     ALPAKA_FN_ACC uint8_t
0177     checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap) {
0178       uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask;
0179       if (errorType < 25)
0180         return 0;
0181       bool errorFound = false;
0182 
0183       switch (errorType) {
0184         case 25: {
0185           errorFound = true;
0186           uint32_t index =
0187               fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + 1;
0188           if (index > 1 && index <= cablingMap.size()) {
0189             if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index]))
0190               errorFound = false;
0191           }
0192           if constexpr (debug)
0193             if (errorFound)
0194               printf("Invalid ROC = 25 found (errorType = 25)\n");
0195           break;
0196         }
0197         case 26: {
0198           if constexpr (debug)
0199             printf("Gap word found (errorType = 26)\n");
0200           break;
0201         }
0202         case 27: {
0203           if constexpr (debug)
0204             printf("Dummy word found (errorType = 27)\n");
0205           break;
0206         }
0207         case 28: {
0208           if constexpr (debug)
0209             printf("Error fifo nearly full (errorType = 28)\n");
0210           errorFound = true;
0211           break;
0212         }
0213         case 29: {
0214           if constexpr (debug)
0215             printf("Timeout on a channel (errorType = 29)\n");
0216           if (!((errorWord >> sipixelconstants::OMIT_ERR_shift) & sipixelconstants::OMIT_ERR_mask)) {
0217             if constexpr (debug)
0218               printf("...2nd errorType=29 error, skip\n");
0219             break;
0220           }
0221           errorFound = true;
0222           break;
0223         }
0224         case 30: {
0225           if constexpr (debug)
0226             printf("TBM error trailer (errorType = 30)\n");
0227           int stateMatch_bits = 4;
0228           int stateMatch_shift = 8;
0229           uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits);
0230           int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask;
0231           if (stateMatch != 1 && stateMatch != 8) {
0232             if constexpr (debug)
0233               printf("FED error 30 with unexpected State Bits (errorType = 30)\n");
0234             break;
0235           }
0236           if (stateMatch == 1)
0237             errorType = 40;  // 1=Overflow -> 40, 8=number of ROCs -> 30
0238           errorFound = true;
0239           break;
0240         }
0241         case 31: {
0242           if constexpr (debug)
0243             printf("Event number error (errorType = 31)\n");
0244           errorFound = true;
0245           break;
0246         }
0247         default:
0248           errorFound = false;
0249       };
0250 
0251       return errorFound ? errorType : 0;
0252     }
0253 
0254     // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
0255     template <bool debug = false>
0256     ALPAKA_FN_ACC uint32_t
0257     getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap) {
0258       uint32_t rID = 0xffffffff;
0259 
0260       switch (errorType) {
0261         case 25:
0262         case 29:
0263         case 30:
0264         case 31:
0265         case 36:
0266         case 40: {
0267           uint32_t roc = 1;
0268           uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0269           uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0270           if (rID_temp != ::pixelClustering::invalidModuleId)
0271             rID = rID_temp;
0272           break;
0273         }
0274         case 37:
0275         case 38: {
0276           uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask;
0277           uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0278           uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0279           if (rID_temp != ::pixelClustering::invalidModuleId)
0280             rID = rID_temp;
0281           break;
0282         }
0283         default:
0284           break;
0285       };
0286 
0287       return rID;
0288     }
0289 
0290     // Kernel to perform Raw to Digi conversion
0291     template <bool debug = false>
0292     struct RawToDigi_kernel {
0293       ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0294                                     const SiPixelMappingSoAConstView &cablingMap,
0295                                     const unsigned char *modToUnp,
0296                                     const uint32_t wordCounter,
0297                                     const uint32_t *word,
0298                                     const uint8_t *fedIds,
0299                                     SiPixelDigisSoAView digisView,
0300                                     SiPixelDigiErrorsSoAView err,
0301                                     bool useQualityInfo,
0302                                     bool includeErrors) const {
0303         // FIXME there is no guarantee that this is initialised to 0 before any of the atomicInc happens
0304         if (cms::alpakatools::once_per_grid(acc))
0305           err.size() = 0;
0306 
0307         for (auto gIndex : cms::alpakatools::uniform_elements(acc, wordCounter)) {
0308           auto dvgi = digisView[gIndex];
0309           dvgi.xx() = 0;
0310           dvgi.yy() = 0;
0311           dvgi.adc() = 0;
0312 
0313           // initialise the errors
0314           err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0};
0315 
0316           uint8_t fedId = fedIds[gIndex / 2];  // +1200;
0317 
0318           // initialize (too many coninue below)
0319           dvgi.pdigi() = 0;
0320           dvgi.rawIdArr() = 0;
0321           dvgi.moduleId() = ::pixelClustering::invalidModuleId;
0322 
0323           uint32_t ww = word[gIndex];  // Array containing 32 bit raw data
0324           if (ww == 0) {
0325             // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization
0326             continue;
0327           }
0328 
0329           uint32_t link = sipixelconstants::getLink(ww);  // Extract link
0330           uint32_t roc = sipixelconstants::getROC(ww);    // Extract ROC in link
0331 
0332           uint8_t errorType = checkROC<debug>(ww, fedId, link, cablingMap);
0333           bool skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0);
0334           if (includeErrors and skipROC) {
0335             uint32_t rawId = getErrRawID<debug>(fedId, ww, errorType, cablingMap);
0336             if (rawId != 0xffffffff)  // Store errors only for valid DetIds
0337             {
0338               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, errorType, fedId};
0339               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0340             }
0341             continue;
0342           }
0343 
0344           // Check for spurious channels
0345           if (roc > ::pixelDetails::MAX_ROC or link > ::pixelDetails::MAX_LINK) {
0346             uint32_t rawId = getRawId(cablingMap, fedId, link, 1).rawId;
0347             if constexpr (debug) {
0348               printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex);
0349             }
0350             if (roc > ::pixelDetails::MAX_ROC and roc < 25) {
0351               uint8_t error = conversionError<debug>(fedId, 2);
0352               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0353               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0354             }
0355             continue;
0356           }
0357 
0358           uint32_t index =
0359               fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + roc;
0360           if (useQualityInfo) {
0361             skipROC = cablingMap.badRocs()[index];
0362             if (skipROC)
0363               continue;
0364           }
0365           skipROC = modToUnp[index];
0366           if (skipROC)
0367             continue;
0368 
0369           ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc);
0370           uint32_t rawId = detId.rawId;
0371           uint32_t layer = 0;
0372           int side = 0, panel = 0, module = 0;
0373           bool barrel = isBarrel(rawId);
0374 
0375           if (barrel) {
0376             layer = (rawId >> ::pixelDetails::layerStartBit) & ::pixelDetails::layerMask;
0377             module = (rawId >> ::pixelDetails::moduleStartBit) & ::pixelDetails::moduleMask;
0378             side = (module < 5) ? -1 : 1;
0379           } else {
0380             // endcap ids
0381             layer = 0;
0382             panel = (rawId >> ::pixelDetails::panelStartBit) & ::pixelDetails::panelMask;
0383             side = (panel == 1) ? -1 : 1;
0384           }
0385 
0386           ::pixelDetails::Pixel localPix;
0387           if (layer == 1) {
0388             // Special case of barrel layer 1
0389             uint32_t col = sipixelconstants::getCol(ww);
0390             uint32_t row = sipixelconstants::getRow(ww);
0391             localPix.row = row;
0392             localPix.col = col;
0393             if (includeErrors and not rocRowColIsValid(row, col)) {
0394               uint8_t error = conversionError<debug>(fedId, 3);
0395               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0396               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0397               if constexpr (debug)
0398                 printf("BPIX1 Error status: %i\n", error);
0399               continue;
0400             }
0401           } else {
0402             // Other layers with double columns
0403             uint32_t dcol = sipixelconstants::getDCol(ww);
0404             uint32_t pxid = sipixelconstants::getPxId(ww);
0405             uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2;
0406             uint32_t col = dcol * 2 + pxid % 2;
0407             localPix.row = row;
0408             localPix.col = col;
0409             if (includeErrors and not dcolIsValid(dcol, pxid)) {
0410               uint8_t error = conversionError<debug>(fedId, 3);
0411               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0412               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0413               if constexpr (debug)
0414                 printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc);
0415               continue;
0416             }
0417           }
0418 
0419           ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, detId.rocInDet, localPix);
0420           dvgi.xx() = globalPix.row;  // origin shifting by 1 0-159
0421           dvgi.yy() = globalPix.col;  // origin shifting by 1 0-415
0422           dvgi.adc() = sipixelconstants::getADC(ww);
0423           dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc());
0424           dvgi.moduleId() = detId.moduleId;
0425           dvgi.rawIdArr() = rawId;
0426         }  // end of stride on grid
0427 
0428       }  // end of Raw to Digi kernel operator()
0429     };  // end of Raw to Digi struct
0430 
0431     template <typename TrackerTraits>
0432     struct FillHitsModuleStart {
0433       ALPAKA_FN_ACC void operator()(Acc1D const &acc, SiPixelClustersSoAView clus_view) const {
0434         // This kernel must run with a single block
0435         [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
0436         ALPAKA_ASSERT_ACC(0 == blockIdxLocal);
0437         [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
0438         ALPAKA_ASSERT_ACC(1 == gridDimension);
0439 
0440         // For the prefix scan algorithm
0441         constexpr int warpSize = cms::alpakatools::warpSize;
0442         constexpr int blockSize = warpSize * warpSize;
0443 
0444         // For Phase1 there are 1856 pixel modules
0445         // For Phase2 there are up to 4000 pixel modules
0446         constexpr uint16_t numberOfModules = TrackerTraits::numberOfModules;
0447         constexpr uint16_t prefixScanUpperLimit = ((numberOfModules / blockSize) + 1) * blockSize;
0448         ALPAKA_ASSERT_ACC(numberOfModules < prefixScanUpperLimit);
0449 
0450         // Limit to maxHitsInModule;
0451         constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule;
0452         for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules)) {
0453           clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule());
0454         }
0455 
0456         // Use N single-block prefix scan, then update all blocks after the first one.
0457         auto &ws = alpaka::declareSharedVar<uint32_t[warpSize], __COUNTER__>(acc);
0458         uint32_t *clusModuleStart = clus_view.clusModuleStart() + 1;
0459         uint16_t leftModules = numberOfModules;
0460         while (leftModules > blockSize) {
0461           cms::alpakatools::blockPrefixScan(acc, clusModuleStart, clusModuleStart, blockSize, ws);
0462           clusModuleStart += blockSize;
0463           leftModules -= blockSize;
0464         }
0465         cms::alpakatools::blockPrefixScan(acc, clusModuleStart, clusModuleStart, leftModules, ws);
0466 
0467         // The first blockSize modules are properly accounted by the blockPrefixScan.
0468         // The additional modules need to be corrected adding the cuulative value from the last module of the previous block.
0469         for (uint16_t doneModules = blockSize; doneModules < numberOfModules; doneModules += blockSize) {
0470           uint16_t first = doneModules + 1;
0471           uint16_t last = std::min<uint16_t>(doneModules + blockSize, numberOfModules);
0472           for (uint16_t i : cms::alpakatools::independent_group_elements(acc, first, last + 1)) {
0473             clus_view[i].clusModuleStart() += clus_view[doneModules].clusModuleStart();
0474           }
0475           alpaka::syncBlockThreads(acc);
0476         }
0477 
0478 #ifdef GPU_DEBUG
0479         ALPAKA_ASSERT_ACC(0 == clus_view[0].clusModuleStart());
0480         auto c0 = std::min(maxHitsInModule, clus_view[1].clusModuleStart());
0481         ALPAKA_ASSERT_ACC(c0 == clus_view[1].clusModuleStart());
0482         ALPAKA_ASSERT_ACC(clus_view[1024].clusModuleStart() >= clus_view[1023].clusModuleStart());
0483         ALPAKA_ASSERT_ACC(clus_view[1025].clusModuleStart() >= clus_view[1024].clusModuleStart());
0484         ALPAKA_ASSERT_ACC(clus_view[numberOfModules].clusModuleStart() >= clus_view[1025].clusModuleStart());
0485 
0486         for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules)) {
0487           ALPAKA_ASSERT_ACC(clus_view[i + 1].clusModuleStart() >= clus_view[i].clusModuleStart());
0488           // Check BPX2 (1), FP1 (4)
0489           constexpr auto bpix2 = TrackerTraits::layerStart[1];
0490           constexpr auto fpix1 = TrackerTraits::layerStart[4];
0491           if (i == bpix2 || i == fpix1)
0492             printf("moduleStart %d %d\n", i, clus_view[i].clusModuleStart());
0493         }
0494 #endif
0495 
0496       }  // end of FillHitsModuleStart kernel operator()
0497     };  // end of FillHitsModuleStart struct
0498 
0499     // Interface to outside
0500     template <typename TrackerTraits>
0501     void SiPixelRawToClusterKernel<TrackerTraits>::makePhase1ClustersAsync(
0502         Queue &queue,
0503         const SiPixelClusterThresholds clusterThresholds,
0504         const SiPixelMappingSoAConstView &cablingMap,
0505         const unsigned char *modToUnp,
0506         const SiPixelGainCalibrationForHLTSoAConstView &gains,
0507         const WordFedAppender &wordFed,
0508         const uint32_t wordCounter,
0509         const uint32_t fedCounter,
0510         bool useQualityInfo,
0511         bool includeErrors,
0512         bool debug) {
0513       nDigis = wordCounter;
0514 
0515 #ifdef GPU_DEBUG
0516       std::cout << "decoding " << wordCounter << " digis." << std::endl;
0517 #endif
0518       constexpr int numberOfModules = TrackerTraits::numberOfModules;
0519       digis_d = SiPixelDigisSoACollection(wordCounter, queue);
0520       if (includeErrors) {
0521         digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue);
0522       }
0523       clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0524       // protect in case of empty event....
0525       if (wordCounter) {
0526         const int threadsPerBlockOrElementsPerThread =
0527             cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? 32 : 512;
0528         // fill it all
0529         const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread);
0530         const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0531         assert(0 == wordCounter % 2);
0532         // wordCounter is the total no of words in each event to be trasfered on device
0533         auto word_d = cms::alpakatools::make_device_buffer<uint32_t[]>(queue, wordCounter);
0534         // NB: IMPORTANT: fedId_d: In legacy, wordCounter elements are allocated.
0535         // However, only the first half of elements end up eventually used:
0536         // hence, here, only wordCounter/2 elements are allocated.
0537         auto fedId_d = cms::alpakatools::make_device_buffer<uint8_t[]>(queue, wordCounter / 2);
0538         alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter);
0539         alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2);
0540         // Launch rawToDigi kernel
0541         if (debug) {
0542           alpaka::exec<Acc1D>(queue,
0543                               workDiv,
0544                               RawToDigi_kernel<true>{},
0545                               cablingMap,
0546                               modToUnp,
0547                               wordCounter,
0548                               word_d.data(),
0549                               fedId_d.data(),
0550                               digis_d->view(),
0551                               digiErrors_d->view(),
0552                               useQualityInfo,
0553                               includeErrors);
0554         } else {
0555           alpaka::exec<Acc1D>(queue,
0556                               workDiv,
0557                               RawToDigi_kernel<false>{},
0558                               cablingMap,
0559                               modToUnp,
0560                               wordCounter,
0561                               word_d.data(),
0562                               fedId_d.data(),
0563                               digis_d->view(),
0564                               digiErrors_d->view(),
0565                               useQualityInfo,
0566                               includeErrors);
0567         }
0568 
0569 #ifdef GPU_DEBUG
0570         alpaka::wait(queue);
0571         std::cout << "RawToDigi_kernel was run smoothly!" << std::endl;
0572 #endif
0573       }
0574       // End of Raw2Digi and passing data for clustering
0575 
0576       {
0577         // clusterizer
0578         using namespace pixelClustering;
0579         // calibrations
0580         using namespace calibPixel;
0581         const int threadsPerBlockOrElementsPerThread = []() {
0582           if constexpr (std::is_same_v<Device, alpaka_common::DevHost>) {
0583             // NB: MPORTANT: This could be tuned to benefit from innermost loop.
0584             return 32;
0585           } else {
0586             return 256;
0587           }
0588         }();
0589         const auto blocks = cms::alpakatools::divide_up_by(std::max<int>(wordCounter, numberOfModules),
0590                                                            threadsPerBlockOrElementsPerThread);
0591         const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0592 
0593         if (debug) {
0594           alpaka::exec<Acc1D>(queue,
0595                               workDiv,
0596                               CalibDigis<true>{},
0597                               clusterThresholds,
0598                               digis_d->view(),
0599                               clusters_d->view(),
0600                               gains,
0601                               wordCounter);
0602         } else {
0603           alpaka::exec<Acc1D>(queue,
0604                               workDiv,
0605                               CalibDigis<false>{},
0606                               clusterThresholds,
0607                               digis_d->view(),
0608                               clusters_d->view(),
0609                               gains,
0610                               wordCounter);
0611         }
0612 #ifdef GPU_DEBUG
0613         alpaka::wait(queue);
0614         std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0615                   << " threadsPerBlockOrElementsPerThread\n";
0616 #endif
0617 
0618         alpaka::exec<Acc1D>(
0619             queue, workDiv, CountModules<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0620 
0621         auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u);
0622         alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0623 
0624         const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0625         const auto workDivMaxNumModules =
0626             cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0627 #ifdef GPU_DEBUG
0628         std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0629                   << " threadsPerBlockOrElementsPerThread\n";
0630 #endif
0631         alpaka::exec<Acc1D>(
0632             queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0633 #ifdef GPU_DEBUG
0634         alpaka::wait(queue);
0635 #endif
0636 
0637         constexpr auto threadsPerBlockChargeCut = 256;
0638         const auto workDivChargeCut = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockChargeCut);
0639         // apply charge cut
0640         alpaka::exec<Acc1D>(queue,
0641                             workDivChargeCut,
0642                             ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0643                             digis_d->view(),
0644                             clusters_d->view(),
0645                             clusterThresholds,
0646                             wordCounter);
0647         // count the module start indices already here (instead of
0648         // rechits) so that the number of clusters/hits can be made
0649         // available in the rechit producer without additional points of
0650         // synchronization/ExternalWork
0651 
0652         // MUST be ONE block
0653         const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
0654         alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
0655 
0656         // last element holds the number of all clusters
0657         const auto clusModuleStartLastElement =
0658             cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0659         constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0660 
0661         // element startBPIX2 hold the number of clusters until BPIX2
0662         const auto bpix2ClusterStart =
0663             cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0664         auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0665         alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0666 
0667         auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0668         alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0669 
0670 #ifdef GPU_DEBUG
0671         alpaka::wait(queue);
0672         std::cout << "SiPixelClusterizerAlpaka results:" << std::endl
0673                   << " > no. of digis: " << nDigis << std::endl
0674                   << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0675                   << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0676                   << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0677 #endif
0678 
0679       }  // end clusterizer scope
0680     }
0681 
0682     template <typename TrackerTraits>
0683     void SiPixelRawToClusterKernel<TrackerTraits>::makePhase2ClustersAsync(
0684         Queue &queue,
0685         const SiPixelClusterThresholds clusterThresholds,
0686         SiPixelDigisSoAView &digis_view,
0687         const uint32_t numDigis) {
0688       using namespace pixelClustering;
0689       using pixelTopology::Phase2;
0690       nDigis = numDigis;
0691       constexpr int numberOfModules = pixelTopology::Phase2::numberOfModules;
0692       clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0693       const auto threadsPerBlockOrElementsPerThread = 512;
0694       const auto blocks =
0695           cms::alpakatools::divide_up_by(std::max<int>(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread);
0696       const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0697 
0698       alpaka::exec<Acc1D>(
0699           queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis);
0700 
0701 #ifdef GPU_DEBUG
0702       alpaka::wait(queue);
0703       std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0704                 << " threadsPerBlockOrElementsPerThread\n";
0705 #endif
0706       alpaka::exec<Acc1D>(
0707           queue, workDiv, CountModules<pixelTopology::Phase2>{}, digis_view, clusters_d->view(), numDigis);
0708 
0709       auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u);
0710       alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0711 
0712       const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0713       const auto workDivMaxNumModules =
0714           cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0715 #ifdef GPU_DEBUG
0716       alpaka::wait(queue);
0717       std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0718                 << " threadsPerBlockOrElementsPerThread\n";
0719 #endif
0720       alpaka::exec<Acc1D>(
0721           queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_view, clusters_d->view(), numDigis);
0722 #ifdef GPU_DEBUG
0723       alpaka::wait(queue);
0724 #endif
0725 
0726       // apply charge cut
0727       alpaka::exec<Acc1D>(queue,
0728                           workDivMaxNumModules,
0729                           ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0730                           digis_view,
0731                           clusters_d->view(),
0732                           clusterThresholds,
0733                           numDigis);
0734 
0735       // count the module start indices already here (instead of
0736       // rechits) so that the number of clusters/hits can be made
0737       // available in the rechit producer without additional points of
0738       // synchronization/ExternalWork
0739 
0740       // MUST be ONE block
0741       const auto workDivOneBlock = cms::alpakatools::make_workdiv<Acc1D>(1u, 1024u);
0742       alpaka::exec<Acc1D>(queue, workDivOneBlock, FillHitsModuleStart<TrackerTraits>{}, clusters_d->view());
0743 
0744       // last element holds the number of all clusters
0745       const auto clusModuleStartLastElement =
0746           cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0747       constexpr int startBPIX2 = pixelTopology::Phase2::layerStart[1];
0748       // element startBPIX2 hold the number of clusters until BPIX2
0749       const auto bpix2ClusterStart =
0750           cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0751       auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0752       alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0753 
0754       auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0755       alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0756 
0757 #ifdef GPU_DEBUG
0758       alpaka::wait(queue);
0759       std::cout << "SiPixelPhase2DigiToCluster: results \n"
0760                 << " > no. of digis: " << numDigis << std::endl
0761                 << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0762                 << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0763                 << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0764 #endif
0765     }  //
0766 
0767     template class SiPixelRawToClusterKernel<pixelTopology::Phase1>;
0768     template class SiPixelRawToClusterKernel<pixelTopology::Phase2>;
0769     template class SiPixelRawToClusterKernel<pixelTopology::HIonPhase1>;
0770 
0771   }  // namespace pixelDetails
0772 
0773 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE