Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-07-03 00:42:36

0001 // C++ includes
0002 #include <algorithm>
0003 #include <cassert>
0004 #include <cstdint>
0005 #include <cstdio>
0006 #include <type_traits>
0007 
0008 // Alpaka includes
0009 #include <alpaka/alpaka.hpp>
0010 
0011 // CMSSW includes
0012 #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h"
0013 #include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h"
0014 #include "DataFormats/DetId/interface/DetId.h"
0015 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0016 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0017 #include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h"
0018 #include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h"
0019 #include "DataFormats/SiPixelDigi/interface/SiPixelDigiConstants.h"
0020 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h"
0021 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0022 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h"
0023 #include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h"
0024 #include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h"
0025 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0026 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0027 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0028 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0029 #include "HeterogeneousCore/AlpakaInterface/interface/warpsize.h"
0030 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0031 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
0032 
0033 // local includes
0034 #include "CalibPixel.h"
0035 #include "ClusterChargeCut.h"
0036 #include "PixelClustering.h"
0037 #include "SiPixelRawToClusterKernel.h"
0038 
0039 //#define GPU_DEBUG
0040 
0041 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0042   namespace pixelDetails {
0043 
0044     ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) {
0045       return (PixelSubdetector::PixelBarrel == ((rawId >> DetId::kSubdetOffset) & DetId::kSubdetMask));
0046     }
0047 
0048     ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap,
0049                                                     uint8_t fed,
0050                                                     uint32_t link,
0051                                                     uint32_t roc) {
0052       using namespace ::pixelDetails;
0053       uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc;
0054       DetIdGPU detId = {cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]};
0055       return detId;
0056     }
0057 
0058     //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html
0059     //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071
0060     // Convert local pixel to pixelDetails::global pixel
0061     ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion(
0062         bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) {
0063       int slopeRow = 0, slopeCol = 0;
0064       int rowOffset = 0, colOffset = 0;
0065 
0066       if (bpix) {
0067         if (side == -1 && layer != 1) {  // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1
0068           if (rocIdInDetUnit < 8) {
0069             slopeRow = 1;
0070             slopeCol = -1;
0071             rowOffset = 0;
0072             colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0073           } else {
0074             slopeRow = -1;
0075             slopeCol = 1;
0076             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0077             colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0078           }  // if roc
0079         } else {  // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1
0080           if (rocIdInDetUnit < 8) {
0081             slopeRow = -1;
0082             slopeCol = 1;
0083             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0084             colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc;
0085           } else {
0086             slopeRow = 1;
0087             slopeCol = -1;
0088             rowOffset = 0;
0089             colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0090           }
0091         }
0092 
0093       } else {             // fpix
0094         if (side == -1) {  // pannel 1
0095           if (rocIdInDetUnit < 8) {
0096             slopeRow = 1;
0097             slopeCol = -1;
0098             rowOffset = 0;
0099             colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0100           } else {
0101             slopeRow = -1;
0102             slopeCol = 1;
0103             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0104             colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0105           }
0106         } else {  // pannel 2
0107           if (rocIdInDetUnit < 8) {
0108             slopeRow = 1;
0109             slopeCol = -1;
0110             rowOffset = 0;
0111             colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1;
0112           } else {
0113             slopeRow = -1;
0114             slopeCol = 1;
0115             rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1;
0116             colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc;
0117           }
0118 
0119         }  // side
0120       }
0121 
0122       uint32_t gRow = rowOffset + slopeRow * local.row;
0123       uint32_t gCol = colOffset + slopeCol * local.col;
0124       // inside frameConversion row: gRow, column: gCol
0125       ::pixelDetails::Pixel global = {gRow, gCol};
0126       return global;
0127     }
0128 
0129     // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
0130     template <bool debug = false>
0131     ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status) {
0132       uint8_t errorType = 0;
0133 
0134       switch (status) {
0135         case 1: {
0136           if constexpr (debug)
0137             printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId);
0138           errorType = 35;
0139           break;
0140         }
0141         case 2: {
0142           if constexpr (debug)
0143             printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId);
0144           errorType = 36;
0145           break;
0146         }
0147         case 3: {
0148           if constexpr (debug)
0149             printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId);
0150           errorType = 37;
0151           break;
0152         }
0153         case 4: {
0154           if constexpr (debug)
0155             printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId);
0156           errorType = 38;
0157           break;
0158         }
0159         default:
0160           if constexpr (debug)
0161             printf("Cabling check returned unexpected result, status = %i\n", status);
0162       };
0163 
0164       return errorType;
0165     }
0166 
0167     ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) {
0168       /// row and column in ROC representation
0169       return ((rocRow < ::pixelDetails::numRowsInRoc) & (rocCol < ::pixelDetails::numColsInRoc));
0170     }
0171 
0172     ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); }
0173 
0174     // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
0175     template <bool debug = false>
0176     ALPAKA_FN_ACC uint8_t
0177     checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelMappingSoAConstView &cablingMap) {
0178       uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask;
0179       if (errorType < 25)
0180         return 0;
0181       bool errorFound = false;
0182 
0183       switch (errorType) {
0184         case 25: {
0185           errorFound = true;
0186           uint32_t index =
0187               fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + 1;
0188           if (index > 1 && index <= cablingMap.size()) {
0189             if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index]))
0190               errorFound = false;
0191           }
0192           if constexpr (debug)
0193             if (errorFound)
0194               printf("Invalid ROC = 25 found (errorType = 25)\n");
0195           break;
0196         }
0197         case 26: {
0198           if constexpr (debug)
0199             printf("Gap word found (errorType = 26)\n");
0200           break;
0201         }
0202         case 27: {
0203           if constexpr (debug)
0204             printf("Dummy word found (errorType = 27)\n");
0205           break;
0206         }
0207         case 28: {
0208           if constexpr (debug)
0209             printf("Error fifo nearly full (errorType = 28)\n");
0210           errorFound = true;
0211           break;
0212         }
0213         case 29: {
0214           if constexpr (debug)
0215             printf("Timeout on a channel (errorType = 29)\n");
0216           if (!((errorWord >> sipixelconstants::OMIT_ERR_shift) & sipixelconstants::OMIT_ERR_mask)) {
0217             if constexpr (debug)
0218               printf("...2nd errorType=29 error, skip\n");
0219             break;
0220           }
0221           errorFound = true;
0222           break;
0223         }
0224         case 30: {
0225           if constexpr (debug)
0226             printf("TBM error trailer (errorType = 30)\n");
0227           int stateMatch_bits = 4;
0228           int stateMatch_shift = 8;
0229           uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits);
0230           int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask;
0231           if (stateMatch != 1 && stateMatch != 8) {
0232             if constexpr (debug)
0233               printf("FED error 30 with unexpected State Bits (errorType = 30)\n");
0234             break;
0235           }
0236           if (stateMatch == 1)
0237             errorType = 40;  // 1=Overflow -> 40, 8=number of ROCs -> 30
0238           errorFound = true;
0239           break;
0240         }
0241         case 31: {
0242           if constexpr (debug)
0243             printf("Event number error (errorType = 31)\n");
0244           errorFound = true;
0245           break;
0246         }
0247         default:
0248           errorFound = false;
0249       };
0250 
0251       return errorFound ? errorType : 0;
0252     }
0253 
0254     // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc
0255     template <bool debug = false>
0256     ALPAKA_FN_ACC uint32_t
0257     getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelMappingSoAConstView &cablingMap) {
0258       uint32_t rID = 0xffffffff;
0259 
0260       switch (errorType) {
0261         case 25:
0262         case 29:
0263         case 30:
0264         case 31:
0265         case 36:
0266         case 40: {
0267           uint32_t roc = 1;
0268           uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0269           uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0270           if (rID_temp != ::pixelClustering::invalidModuleId)
0271             rID = rID_temp;
0272           break;
0273         }
0274         case 37:
0275         case 38: {
0276           uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask;
0277           uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask;
0278           uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId;
0279           if (rID_temp != ::pixelClustering::invalidModuleId)
0280             rID = rID_temp;
0281           break;
0282         }
0283         default:
0284           break;
0285       };
0286 
0287       return rID;
0288     }
0289 
0290     // Kernel to perform Raw to Digi conversion
0291     template <bool debug = false>
0292     struct RawToDigi_kernel {
0293       ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0294                                     const SiPixelMappingSoAConstView &cablingMap,
0295                                     const unsigned char *modToUnp,
0296                                     const uint32_t wordCounter,
0297                                     const uint32_t *word,
0298                                     const uint8_t *fedIds,
0299                                     SiPixelDigisSoAView digisView,
0300                                     SiPixelDigiErrorsSoAView err,
0301                                     bool useQualityInfo,
0302                                     bool includeErrors) const {
0303         // FIXME there is no guarantee that this is initialised to 0 before any of the atomicInc happens
0304         if (cms::alpakatools::once_per_grid(acc))
0305           err.size() = 0;
0306 
0307         for (auto gIndex : cms::alpakatools::uniform_elements(acc, wordCounter)) {
0308           auto dvgi = digisView[gIndex];
0309           dvgi.xx() = 0;
0310           dvgi.yy() = 0;
0311           dvgi.adc() = 0;
0312 
0313           // initialise the errors
0314           err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0};
0315 
0316           uint8_t fedId = fedIds[gIndex / 2];  // +1200;
0317 
0318           // initialize (too many coninue below)
0319           dvgi.pdigi() = 0;
0320           dvgi.rawIdArr() = 0;
0321           dvgi.moduleId() = ::pixelClustering::invalidModuleId;
0322 
0323           uint32_t ww = word[gIndex];  // Array containing 32 bit raw data
0324           if (ww == 0) {
0325             // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization
0326             continue;
0327           }
0328 
0329           uint32_t link = sipixelconstants::getLink(ww);  // Extract link
0330           uint32_t roc = sipixelconstants::getROC(ww);    // Extract ROC in link
0331 
0332           uint8_t errorType = checkROC<debug>(ww, fedId, link, cablingMap);
0333           bool skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0);
0334           if (includeErrors and skipROC) {
0335             uint32_t rawId = getErrRawID<debug>(fedId, ww, errorType, cablingMap);
0336             if (rawId != 0xffffffff)  // Store errors only for valid DetIds
0337             {
0338               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, errorType, fedId};
0339               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0340             }
0341             continue;
0342           }
0343 
0344           // Check for spurious channels
0345           if (roc > ::pixelDetails::MAX_ROC or link > ::pixelDetails::MAX_LINK) {
0346             uint32_t rawId = getRawId(cablingMap, fedId, link, 1).rawId;
0347             if constexpr (debug) {
0348               printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex);
0349             }
0350             if (roc > ::pixelDetails::MAX_ROC and roc < 25) {
0351               uint8_t error = conversionError<debug>(fedId, 2);
0352               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0353               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0354             }
0355             continue;
0356           }
0357 
0358           uint32_t index =
0359               fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + roc;
0360           if (useQualityInfo) {
0361             skipROC = cablingMap.badRocs()[index];
0362             if (skipROC)
0363               continue;
0364           }
0365           skipROC = modToUnp[index];
0366           if (skipROC)
0367             continue;
0368 
0369           ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc);
0370           uint32_t rawId = detId.rawId;
0371           uint32_t layer = 0;
0372           int side = 0, panel = 0, module = 0;
0373           bool barrel = isBarrel(rawId);
0374 
0375           if (barrel) {
0376             layer = (rawId >> ::pixelDetails::layerStartBit) & ::pixelDetails::layerMask;
0377             module = (rawId >> ::pixelDetails::moduleStartBit) & ::pixelDetails::moduleMask;
0378             side = (module < 5) ? -1 : 1;
0379           } else {
0380             // endcap ids
0381             layer = 0;
0382             panel = (rawId >> ::pixelDetails::panelStartBit) & ::pixelDetails::panelMask;
0383             side = (panel == 1) ? -1 : 1;
0384           }
0385 
0386           ::pixelDetails::Pixel localPix;
0387           if (layer == 1) {
0388             // Special case of barrel layer 1
0389             uint32_t col = sipixelconstants::getCol(ww);
0390             uint32_t row = sipixelconstants::getRow(ww);
0391             localPix.row = row;
0392             localPix.col = col;
0393             if (includeErrors and not rocRowColIsValid(row, col)) {
0394               uint8_t error = conversionError<debug>(fedId, 3);
0395               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0396               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0397               if constexpr (debug)
0398                 printf("BPIX1 Error status: %i\n", error);
0399               continue;
0400             }
0401           } else {
0402             // Other layers with double columns
0403             uint32_t dcol = sipixelconstants::getDCol(ww);
0404             uint32_t pxid = sipixelconstants::getPxId(ww);
0405             uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2;
0406             uint32_t col = dcol * 2 + pxid % 2;
0407             localPix.row = row;
0408             localPix.col = col;
0409             if (includeErrors and not dcolIsValid(dcol, pxid)) {
0410               uint8_t error = conversionError<debug>(fedId, 3);
0411               err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId};
0412               alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Blocks{});
0413               if constexpr (debug)
0414                 printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc);
0415               continue;
0416             }
0417           }
0418 
0419           ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, detId.rocInDet, localPix);
0420           dvgi.xx() = globalPix.row;  // origin shifting by 1 0-159
0421           dvgi.yy() = globalPix.col;  // origin shifting by 1 0-415
0422           dvgi.adc() = sipixelconstants::getADC(ww);
0423           dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc());
0424           dvgi.moduleId() = detId.moduleId;
0425           dvgi.rawIdArr() = rawId;
0426         }  // end of stride on grid
0427 
0428       }  // end of Raw to Digi kernel operator()
0429     };  // end of Raw to Digi struct
0430 
0431     // just for debugging
0432     template <typename TrackerTraits>
0433     struct ShowHitsModuleStart {
0434       template <typename TAcc>
0435       ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const {
0436         if (cms::alpakatools::once_per_grid(acc)) {
0437           for (int i = 0; i < TrackerTraits::numberOfModules; i++)
0438             printf("%d \n", clus_view[i].clusModuleStart());
0439         }
0440       }
0441     };
0442 
0443     // Interface to outside
0444     template <typename TrackerTraits>
0445     void SiPixelRawToClusterKernel<TrackerTraits>::makePhase1ClustersAsync(
0446         Queue &queue,
0447         const SiPixelClusterThresholds clusterThresholds,
0448         const SiPixelMappingSoAConstView &cablingMap,
0449         const unsigned char *modToUnp,
0450         const SiPixelGainCalibrationForHLTSoAConstView &gains,
0451         const WordFedAppender &wordFed,
0452         const uint32_t wordCounter,
0453         const uint32_t fedCounter,
0454         bool useQualityInfo,
0455         bool includeErrors,
0456         bool debug) {
0457       nDigis = wordCounter;
0458 
0459 #ifdef GPU_DEBUG
0460       std::cout << "decoding " << wordCounter << " digis." << std::endl;
0461 #endif
0462       constexpr int numberOfModules = TrackerTraits::numberOfModules;
0463       digis_d = SiPixelDigisSoACollection(wordCounter, queue);
0464       if (includeErrors) {
0465         digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue);
0466       }
0467       clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0468       // protect in case of empty event....
0469       if (wordCounter) {
0470         const int threadsPerBlockOrElementsPerThread =
0471             cms::alpakatools::requires_single_thread_per_block_v<Acc1D> ? 32 : 512;
0472         // fill it all
0473         const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread);
0474         const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0475         assert(0 == wordCounter % 2);
0476         // wordCounter is the total no of words in each event to be trasfered on device
0477         auto word_d = cms::alpakatools::make_device_buffer<uint32_t[]>(queue, wordCounter);
0478         // NB: IMPORTANT: fedId_d: In legacy, wordCounter elements are allocated.
0479         // However, only the first half of elements end up eventually used:
0480         // hence, here, only wordCounter/2 elements are allocated.
0481         auto fedId_d = cms::alpakatools::make_device_buffer<uint8_t[]>(queue, wordCounter / 2);
0482         alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter);
0483         alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2);
0484         // Launch rawToDigi kernel
0485         if (debug) {
0486           alpaka::exec<Acc1D>(queue,
0487                               workDiv,
0488                               RawToDigi_kernel<true>{},
0489                               cablingMap,
0490                               modToUnp,
0491                               wordCounter,
0492                               word_d.data(),
0493                               fedId_d.data(),
0494                               digis_d->view(),
0495                               digiErrors_d->view(),
0496                               useQualityInfo,
0497                               includeErrors);
0498         } else {
0499           alpaka::exec<Acc1D>(queue,
0500                               workDiv,
0501                               RawToDigi_kernel<false>{},
0502                               cablingMap,
0503                               modToUnp,
0504                               wordCounter,
0505                               word_d.data(),
0506                               fedId_d.data(),
0507                               digis_d->view(),
0508                               digiErrors_d->view(),
0509                               useQualityInfo,
0510                               includeErrors);
0511         }
0512 
0513 #ifdef GPU_DEBUG
0514         alpaka::wait(queue);
0515         std::cout << "RawToDigi_kernel was run smoothly!" << std::endl;
0516 #endif
0517       }
0518       // End of Raw2Digi and passing data for clustering
0519 
0520       {
0521         // clusterizer
0522         using namespace pixelClustering;
0523         // calibrations
0524         using namespace calibPixel;
0525         const int threadsPerBlockOrElementsPerThread = []() {
0526           if constexpr (std::is_same_v<Device, alpaka_common::DevHost>) {
0527             // NB: MPORTANT: This could be tuned to benefit from innermost loop.
0528             return 32;
0529           } else {
0530             return 256;
0531           }
0532         }();
0533         const auto blocks = cms::alpakatools::divide_up_by(std::max<int>(wordCounter, numberOfModules),
0534                                                            threadsPerBlockOrElementsPerThread);
0535         const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0536 
0537         if (debug) {
0538           alpaka::exec<Acc1D>(queue,
0539                               workDiv,
0540                               CalibDigis<true>{},
0541                               clusterThresholds,
0542                               digis_d->view(),
0543                               clusters_d->view(),
0544                               gains,
0545                               wordCounter);
0546         } else {
0547           alpaka::exec<Acc1D>(queue,
0548                               workDiv,
0549                               CalibDigis<false>{},
0550                               clusterThresholds,
0551                               digis_d->view(),
0552                               clusters_d->view(),
0553                               gains,
0554                               wordCounter);
0555         }
0556 #ifdef GPU_DEBUG
0557         alpaka::wait(queue);
0558         std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0559                   << " threadsPerBlockOrElementsPerThread\n";
0560 #endif
0561 
0562         alpaka::exec<Acc1D>(
0563             queue, workDiv, CountModules<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0564 
0565         auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u);
0566         alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0567 
0568         const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0569         const auto workDivMaxNumModules =
0570             cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0571 #ifdef GPU_DEBUG
0572         std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0573                   << " threadsPerBlockOrElementsPerThread\n";
0574 #endif
0575         alpaka::exec<Acc1D>(
0576             queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_d->view(), clusters_d->view(), wordCounter);
0577 #ifdef GPU_DEBUG
0578         alpaka::wait(queue);
0579 #endif
0580 
0581         constexpr auto threadsPerBlockChargeCut = 256;
0582         const auto workDivChargeCut = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockChargeCut);
0583         // apply charge cut
0584         alpaka::exec<Acc1D>(queue,
0585                             workDivChargeCut,
0586                             ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0587                             digis_d->view(),
0588                             clusters_d->view(),
0589                             clusterThresholds,
0590                             wordCounter);
0591         // count the module start indices already here (instead of
0592         // rechits) so that the number of clusters/hits can be made
0593         // available in the rechit producer without additional points of
0594         // synchronization/ExternalWork
0595 
0596         constexpr auto threadsPrefixScan = 1024;
0597         constexpr auto blocksPrefixScan = (TrackerTraits::numberOfModules + threadsPrefixScan - 1) / threadsPrefixScan;
0598         auto workDivPrefixScan = cms::alpakatools::make_workdiv<Acc1D>(blocksPrefixScan, threadsPrefixScan);
0599         auto bCounter = cms::alpakatools::make_device_buffer<int32_t>(queue);
0600         alpaka::memset(queue, bCounter, 0);
0601 
0602         alpaka::exec<Acc1D>(queue,
0603                             workDivPrefixScan,
0604                             cms::alpakatools::multiBlockPrefixScan<uint32_t>(),
0605                             clusters_d->view().clusInModule(),
0606                             clusters_d->view().clusModuleStart() + 1,
0607                             TrackerTraits::numberOfModules,
0608                             blocksPrefixScan,
0609                             bCounter.data(),
0610                             alpaka::getPreferredWarpSize(alpaka::getDev(queue)));
0611 
0612         // last element holds the number of all clusters
0613         const auto clusModuleStartLastElement =
0614             cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0615         constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0616         // element startBPIX2 hold the number of clusters until BPIX2
0617         const auto bpix2ClusterStart =
0618             cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0619         auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0620         alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0621 
0622         auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0623         alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0624 
0625 #ifdef GPU_DEBUG
0626         alpaka::wait(queue);
0627         std::cout << "SiPixelClusterizerAlpaka results:" << std::endl
0628                   << " > no. of digis: " << nDigis << std::endl
0629                   << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0630                   << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0631                   << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0632 #endif
0633 
0634       }  // end clusterizer scope
0635     }
0636 
0637     template <typename TrackerTraits>
0638     void SiPixelRawToClusterKernel<TrackerTraits>::makePhase2ClustersAsync(
0639         Queue &queue,
0640         const SiPixelClusterThresholds clusterThresholds,
0641         SiPixelDigisSoAView &digis_view,
0642         const uint32_t numDigis) {
0643       using namespace pixelClustering;
0644       using pixelTopology::Phase2;
0645       nDigis = numDigis;
0646       constexpr int numberOfModules = pixelTopology::Phase2::numberOfModules;
0647       clusters_d = SiPixelClustersSoACollection(numberOfModules, queue);
0648       const auto threadsPerBlockOrElementsPerThread = 512;
0649       const auto blocks =
0650           cms::alpakatools::divide_up_by(std::max<int>(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread);
0651       const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(blocks, threadsPerBlockOrElementsPerThread);
0652 
0653       alpaka::exec<Acc1D>(
0654           queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis);
0655 
0656 #ifdef GPU_DEBUG
0657       alpaka::wait(queue);
0658       std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread
0659                 << " threadsPerBlockOrElementsPerThread\n";
0660 #endif
0661       alpaka::exec<Acc1D>(
0662           queue, workDiv, CountModules<pixelTopology::Phase2>{}, digis_view, clusters_d->view(), numDigis);
0663 
0664       auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u);
0665       alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
0666 
0667       const auto elementsPerBlockFindClus = FindClus<TrackerTraits>::maxElementsPerBlock;
0668       const auto workDivMaxNumModules =
0669           cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, elementsPerBlockFindClus);
0670 #ifdef GPU_DEBUG
0671       alpaka::wait(queue);
0672       std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus
0673                 << " threadsPerBlockOrElementsPerThread\n";
0674 #endif
0675       alpaka::exec<Acc1D>(
0676           queue, workDivMaxNumModules, FindClus<TrackerTraits>{}, digis_view, clusters_d->view(), numDigis);
0677 #ifdef GPU_DEBUG
0678       alpaka::wait(queue);
0679 #endif
0680 
0681       // apply charge cut
0682       alpaka::exec<Acc1D>(queue,
0683                           workDivMaxNumModules,
0684                           ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
0685                           digis_view,
0686                           clusters_d->view(),
0687                           clusterThresholds,
0688                           numDigis);
0689 
0690       // count the module start indices already here (instead of
0691       // rechits) so that the number of clusters/hits can be made
0692       // available in the rechit producer without additional points of
0693       // synchronization/ExternalWork
0694 
0695       constexpr auto threadsPrefixScan = 1024;
0696       constexpr auto blocksPrefixScan = (TrackerTraits::numberOfModules + threadsPrefixScan - 1) / threadsPrefixScan;
0697       auto workDivPrefixScan = cms::alpakatools::make_workdiv<Acc1D>(blocksPrefixScan, threadsPrefixScan);
0698       auto bCounter = cms::alpakatools::make_device_buffer<int32_t>(queue);
0699       alpaka::memset(queue, bCounter, 0);
0700 
0701       alpaka::exec<Acc1D>(queue,
0702                           workDivPrefixScan,
0703                           cms::alpakatools::multiBlockPrefixScan<uint32_t>(),
0704                           clusters_d->view().clusInModule(),
0705                           clusters_d->view().clusModuleStart() + 1,
0706                           TrackerTraits::numberOfModules,
0707                           blocksPrefixScan,
0708                           bCounter.data(),
0709                           alpaka::getPreferredWarpSize(alpaka::getDev(queue)));
0710 
0711       // last element holds the number of all clusters
0712       const auto clusModuleStartLastElement =
0713           cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u);
0714       constexpr int startBPIX2 = pixelTopology::Phase2::layerStart[1];
0715       // element startBPIX2 hold the number of clusters until BPIX2
0716       const auto bpix2ClusterStart =
0717           cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u);
0718       auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u);
0719       alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement);
0720 
0721       auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u);
0722       alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart);
0723 
0724 #ifdef GPU_DEBUG
0725       alpaka::wait(queue);
0726       std::cout << "SiPixelPhase2DigiToCluster: results \n"
0727                 << " > no. of digis: " << numDigis << std::endl
0728                 << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl
0729                 << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl
0730                 << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl;
0731 #endif
0732     }  //
0733 
0734     template class SiPixelRawToClusterKernel<pixelTopology::Phase1>;
0735     template class SiPixelRawToClusterKernel<pixelTopology::Phase2>;
0736     template class SiPixelRawToClusterKernel<pixelTopology::HIonPhase1>;
0737 
0738   }  // namespace pixelDetails
0739 
0740 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE