Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-02-14 03:16:53

0001 #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_ClusterChargeCut_h
0002 #define RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_ClusterChargeCut_h
0003 
0004 #include <cstdint>
0005 #include <cstdio>
0006 
0007 #include <alpaka/alpaka.hpp>
0008 
0009 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0010 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0011 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/warpsize.h"
0014 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
0015 
0016 //#define GPU_DEBUG
0017 
0018 namespace pixelClustering {
0019 
0020   template <typename TrackerTraits>
0021   struct ClusterChargeCut {
0022     template <typename TAcc>
0023     ALPAKA_FN_ACC void operator()(TAcc const& acc,
0024                                   SiPixelDigisSoAView digi_view,
0025                                   SiPixelClustersSoAView clus_view,
0026                                   // charge cut on cluster in electrons (for layer 1 and for other layers)
0027                                   SiPixelClusterThresholds clusterThresholds,
0028                                   const uint32_t numElements) const {
0029       constexpr int32_t maxNumClustersPerModules = TrackerTraits::maxNumClustersPerModules;
0030 
0031 #ifdef GPU_DEBUG
0032       if (cms::alpakatools::once_per_grid(acc)) {
0033         printf("All digis before cut: \n");
0034         for (uint32_t i = 0; i < numElements; i++)
0035           printf("%d %d %d %d %d \n",
0036                  i,
0037                  digi_view[i].rawIdArr(),
0038                  digi_view[i].clus(),
0039                  digi_view[i].pdigi(),
0040                  digi_view[i].adc());
0041       }
0042       alpaka::syncBlockThreads(acc);
0043 #endif
0044 
0045       auto& charge = alpaka::declareSharedVar<int32_t[maxNumClustersPerModules], __COUNTER__>(acc);
0046       auto& ok = alpaka::declareSharedVar<uint8_t[maxNumClustersPerModules], __COUNTER__>(acc);
0047       auto& newclusId = alpaka::declareSharedVar<uint16_t[maxNumClustersPerModules], __COUNTER__>(acc);
0048 
0049       constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0050 
0051       ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < maxNumModules);
0052       ALPAKA_ASSERT_ACC(startBPIX2 < TrackerTraits::numberOfModules);
0053 
0054       auto endModule = clus_view[0].moduleStart();
0055 
0056       for (auto module : cms::alpakatools::independent_groups(acc, endModule)) {
0057         auto firstPixel = clus_view[1 + module].moduleStart();
0058         auto thisModuleId = digi_view[firstPixel].moduleId();
0059         while (thisModuleId == invalidModuleId and firstPixel < numElements) {
0060           // skip invalid or duplicate pixels
0061           ++firstPixel;
0062           thisModuleId = digi_view[firstPixel].moduleId();
0063         }
0064         if (firstPixel >= numElements) {
0065           // reached the end of the input while skipping the invalid pixels, nothing left to do
0066           break;
0067         }
0068         if (thisModuleId != clus_view[module].moduleId()) {
0069           // reached the end of the module while skipping the invalid pixels, skip this module
0070           continue;
0071         }
0072         ALPAKA_ASSERT_ACC(thisModuleId < TrackerTraits::numberOfModules);
0073 
0074         uint32_t nclus = clus_view[thisModuleId].clusInModule();
0075         if (nclus == 0)
0076           return;
0077 
0078         if (cms::alpakatools::once_per_block(acc) && nclus > maxNumClustersPerModules)
0079           printf("Warning: too many clusters in module %u in block %u: %u > %d\n",
0080                  thisModuleId,
0081                  module,
0082                  nclus,
0083                  maxNumClustersPerModules);
0084 
0085         if (nclus > maxNumClustersPerModules) {
0086           // remove excess  FIXME find a way to cut charge first....
0087           for (auto i : cms::alpakatools::independent_group_elements(acc, firstPixel, numElements)) {
0088             if (digi_view[i].moduleId() == invalidModuleId)
0089               continue;  // not valid
0090             if (digi_view[i].moduleId() != thisModuleId)
0091               break;  // end of module
0092             if (digi_view[i].clus() >= maxNumClustersPerModules) {
0093               digi_view[i].moduleId() = invalidModuleId;
0094               digi_view[i].clus() = invalidModuleId;
0095             }
0096           }
0097           nclus = maxNumClustersPerModules;
0098         }
0099 
0100 #ifdef GPU_DEBUG
0101         if (thisModuleId % 100 == 1)
0102           if (cms::alpakatools::once_per_block(acc))
0103             printf("start cluster charge cut for module %d in block %d\n", thisModuleId, module);
0104 #endif
0105 
0106         ALPAKA_ASSERT_ACC(nclus <= maxNumClustersPerModules);
0107         for (auto i : cms::alpakatools::independent_group_elements(acc, nclus)) {
0108           charge[i] = 0;
0109         }
0110         alpaka::syncBlockThreads(acc);
0111 
0112         for (auto i : cms::alpakatools::independent_group_elements(acc, firstPixel, numElements)) {
0113           if (digi_view[i].moduleId() == invalidModuleId)
0114             continue;  // not valid
0115           if (digi_view[i].moduleId() != thisModuleId)
0116             break;  // end of module
0117           alpaka::atomicAdd(acc,
0118                             &charge[digi_view[i].clus()],
0119                             static_cast<int32_t>(digi_view[i].adc()),
0120                             alpaka::hierarchy::Threads{});
0121         }
0122         alpaka::syncBlockThreads(acc);
0123 
0124         auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
0125 
0126         bool good = true;
0127         for (auto i : cms::alpakatools::independent_group_elements(acc, nclus)) {
0128           newclusId[i] = ok[i] = (charge[i] >= chargeCut) ? 1 : 0;
0129           if (0 == ok[i])
0130             good = false;
0131 #ifdef GPU_DEBUG
0132           printf("Cutting pix %d in module %d newId %d ok? %d charge %d cut %d -> good %d \n",
0133                  i,
0134                  thisModuleId,
0135                  newclusId[i],
0136                  ok[i],
0137                  charge[i],
0138                  chargeCut,
0139                  good);
0140 #endif
0141         }
0142         // if all clusters are above threshold, do nothing
0143         if (alpaka::syncBlockThreadsPredicate<alpaka::BlockAnd>(acc, good))
0144           continue;
0145 
0146         // renumber
0147         // FIXME move this logic inside a single prefixscan() function ?
0148         if constexpr (cms::alpakatools::requires_single_thread_per_block_v<TAcc>) {
0149           // for a single-threaded accelerator, use a simple loop
0150           for (uint32_t i = 1; i < nclus; ++i) {
0151             newclusId[i] += newclusId[i - 1];
0152           }
0153         } else {
0154           // for a multi-threaded accelerator, use an iterative block-based prefix scan
0155           constexpr int warpSize = cms::alpakatools::warpSize;
0156           // FIXME this value should come from cms::alpakatools::blockPrefixScan itself
0157           constexpr uint32_t maxThreads = warpSize * warpSize;
0158 
0159           auto& ws = alpaka::declareSharedVar<uint16_t[warpSize], __COUNTER__>(acc);
0160           auto minClust = std::min(nclus, maxThreads);
0161 
0162           // process the first maxThreads elements
0163           cms::alpakatools::blockPrefixScan(acc, newclusId, minClust, ws);
0164 
0165           // if there may be more than maxThreads elements, repeat the prefix scan and update the intermediat sums
0166           if constexpr (maxNumClustersPerModules > maxThreads) {
0167             for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) {
0168               cms::alpakatools::blockPrefixScan(acc, newclusId + offset, nclus - offset, ws);
0169               for (uint32_t i : cms::alpakatools::independent_group_elements(acc, offset, nclus)) {
0170                 uint32_t prevBlockEnd = (i / maxThreads) * maxThreads - 1;
0171                 newclusId[i] += newclusId[prevBlockEnd];
0172               }
0173               alpaka::syncBlockThreads(acc);
0174             }
0175           }
0176         }
0177 
0178         ALPAKA_ASSERT_ACC(nclus >= newclusId[nclus - 1]);
0179 
0180         clus_view[thisModuleId].clusInModule() = newclusId[nclus - 1];
0181 
0182         // reassign id
0183         for (auto i : cms::alpakatools::independent_group_elements(acc, firstPixel, numElements)) {
0184           if (digi_view[i].moduleId() == invalidModuleId)
0185             continue;  // not valid
0186           if (digi_view[i].moduleId() != thisModuleId)
0187             break;  // end of module
0188           if (0 == ok[digi_view[i].clus()])
0189             digi_view[i].moduleId() = digi_view[i].clus() = invalidModuleId;
0190           else
0191             digi_view[i].clus() = newclusId[digi_view[i].clus()] - 1;
0192         }
0193 
0194         // done
0195         alpaka::syncBlockThreads(acc);
0196 #ifdef GPU_DEBUG
0197         if (cms::alpakatools::once_per_grid(acc)) {
0198           printf("All digis AFTER cut: \n");
0199           for (uint32_t i = 0; i < numElements; i++)
0200             printf("%d %d %d %d %d \n",
0201                    i,
0202                    digi_view[i].rawIdArr(),
0203                    digi_view[i].clus(),
0204                    digi_view[i].pdigi(),
0205                    digi_view[i].adc());
0206         }
0207 #endif
0208       }
0209     }
0210   };
0211 
0212 }  // namespace pixelClustering
0213 
0214 #endif  // RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_ClusterChargeCut_h