File indexing completed on 2025-02-14 03:16:53
0001 #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_ClusterChargeCut_h
0002 #define RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_ClusterChargeCut_h
0003
0004 #include <cstdint>
0005 #include <cstdio>
0006
0007 #include <alpaka/alpaka.hpp>
0008
0009 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0010 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0011 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/warpsize.h"
0014 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
0015
0016
0017
0018 namespace pixelClustering {
0019
0020 template <typename TrackerTraits>
0021 struct ClusterChargeCut {
0022 template <typename TAcc>
0023 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0024 SiPixelDigisSoAView digi_view,
0025 SiPixelClustersSoAView clus_view,
0026
0027 SiPixelClusterThresholds clusterThresholds,
0028 const uint32_t numElements) const {
0029 constexpr int32_t maxNumClustersPerModules = TrackerTraits::maxNumClustersPerModules;
0030
0031 #ifdef GPU_DEBUG
0032 if (cms::alpakatools::once_per_grid(acc)) {
0033 printf("All digis before cut: \n");
0034 for (uint32_t i = 0; i < numElements; i++)
0035 printf("%d %d %d %d %d \n",
0036 i,
0037 digi_view[i].rawIdArr(),
0038 digi_view[i].clus(),
0039 digi_view[i].pdigi(),
0040 digi_view[i].adc());
0041 }
0042 alpaka::syncBlockThreads(acc);
0043 #endif
0044
0045 auto& charge = alpaka::declareSharedVar<int32_t[maxNumClustersPerModules], __COUNTER__>(acc);
0046 auto& ok = alpaka::declareSharedVar<uint8_t[maxNumClustersPerModules], __COUNTER__>(acc);
0047 auto& newclusId = alpaka::declareSharedVar<uint16_t[maxNumClustersPerModules], __COUNTER__>(acc);
0048
0049 constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0050
0051 ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < maxNumModules);
0052 ALPAKA_ASSERT_ACC(startBPIX2 < TrackerTraits::numberOfModules);
0053
0054 auto endModule = clus_view[0].moduleStart();
0055
0056 for (auto module : cms::alpakatools::independent_groups(acc, endModule)) {
0057 auto firstPixel = clus_view[1 + module].moduleStart();
0058 auto thisModuleId = digi_view[firstPixel].moduleId();
0059 while (thisModuleId == invalidModuleId and firstPixel < numElements) {
0060
0061 ++firstPixel;
0062 thisModuleId = digi_view[firstPixel].moduleId();
0063 }
0064 if (firstPixel >= numElements) {
0065
0066 break;
0067 }
0068 if (thisModuleId != clus_view[module].moduleId()) {
0069
0070 continue;
0071 }
0072 ALPAKA_ASSERT_ACC(thisModuleId < TrackerTraits::numberOfModules);
0073
0074 uint32_t nclus = clus_view[thisModuleId].clusInModule();
0075 if (nclus == 0)
0076 return;
0077
0078 if (cms::alpakatools::once_per_block(acc) && nclus > maxNumClustersPerModules)
0079 printf("Warning: too many clusters in module %u in block %u: %u > %d\n",
0080 thisModuleId,
0081 module,
0082 nclus,
0083 maxNumClustersPerModules);
0084
0085 if (nclus > maxNumClustersPerModules) {
0086
0087 for (auto i : cms::alpakatools::independent_group_elements(acc, firstPixel, numElements)) {
0088 if (digi_view[i].moduleId() == invalidModuleId)
0089 continue;
0090 if (digi_view[i].moduleId() != thisModuleId)
0091 break;
0092 if (digi_view[i].clus() >= maxNumClustersPerModules) {
0093 digi_view[i].moduleId() = invalidModuleId;
0094 digi_view[i].clus() = invalidModuleId;
0095 }
0096 }
0097 nclus = maxNumClustersPerModules;
0098 }
0099
0100 #ifdef GPU_DEBUG
0101 if (thisModuleId % 100 == 1)
0102 if (cms::alpakatools::once_per_block(acc))
0103 printf("start cluster charge cut for module %d in block %d\n", thisModuleId, module);
0104 #endif
0105
0106 ALPAKA_ASSERT_ACC(nclus <= maxNumClustersPerModules);
0107 for (auto i : cms::alpakatools::independent_group_elements(acc, nclus)) {
0108 charge[i] = 0;
0109 }
0110 alpaka::syncBlockThreads(acc);
0111
0112 for (auto i : cms::alpakatools::independent_group_elements(acc, firstPixel, numElements)) {
0113 if (digi_view[i].moduleId() == invalidModuleId)
0114 continue;
0115 if (digi_view[i].moduleId() != thisModuleId)
0116 break;
0117 alpaka::atomicAdd(acc,
0118 &charge[digi_view[i].clus()],
0119 static_cast<int32_t>(digi_view[i].adc()),
0120 alpaka::hierarchy::Threads{});
0121 }
0122 alpaka::syncBlockThreads(acc);
0123
0124 auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
0125
0126 bool good = true;
0127 for (auto i : cms::alpakatools::independent_group_elements(acc, nclus)) {
0128 newclusId[i] = ok[i] = (charge[i] >= chargeCut) ? 1 : 0;
0129 if (0 == ok[i])
0130 good = false;
0131 #ifdef GPU_DEBUG
0132 printf("Cutting pix %d in module %d newId %d ok? %d charge %d cut %d -> good %d \n",
0133 i,
0134 thisModuleId,
0135 newclusId[i],
0136 ok[i],
0137 charge[i],
0138 chargeCut,
0139 good);
0140 #endif
0141 }
0142
0143 if (alpaka::syncBlockThreadsPredicate<alpaka::BlockAnd>(acc, good))
0144 continue;
0145
0146
0147
0148 if constexpr (cms::alpakatools::requires_single_thread_per_block_v<TAcc>) {
0149
0150 for (uint32_t i = 1; i < nclus; ++i) {
0151 newclusId[i] += newclusId[i - 1];
0152 }
0153 } else {
0154
0155 constexpr int warpSize = cms::alpakatools::warpSize;
0156
0157 constexpr uint32_t maxThreads = warpSize * warpSize;
0158
0159 auto& ws = alpaka::declareSharedVar<uint16_t[warpSize], __COUNTER__>(acc);
0160 auto minClust = std::min(nclus, maxThreads);
0161
0162
0163 cms::alpakatools::blockPrefixScan(acc, newclusId, minClust, ws);
0164
0165
0166 if constexpr (maxNumClustersPerModules > maxThreads) {
0167 for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) {
0168 cms::alpakatools::blockPrefixScan(acc, newclusId + offset, nclus - offset, ws);
0169 for (uint32_t i : cms::alpakatools::independent_group_elements(acc, offset, nclus)) {
0170 uint32_t prevBlockEnd = (i / maxThreads) * maxThreads - 1;
0171 newclusId[i] += newclusId[prevBlockEnd];
0172 }
0173 alpaka::syncBlockThreads(acc);
0174 }
0175 }
0176 }
0177
0178 ALPAKA_ASSERT_ACC(nclus >= newclusId[nclus - 1]);
0179
0180 clus_view[thisModuleId].clusInModule() = newclusId[nclus - 1];
0181
0182
0183 for (auto i : cms::alpakatools::independent_group_elements(acc, firstPixel, numElements)) {
0184 if (digi_view[i].moduleId() == invalidModuleId)
0185 continue;
0186 if (digi_view[i].moduleId() != thisModuleId)
0187 break;
0188 if (0 == ok[digi_view[i].clus()])
0189 digi_view[i].moduleId() = digi_view[i].clus() = invalidModuleId;
0190 else
0191 digi_view[i].clus() = newclusId[digi_view[i].clus()] - 1;
0192 }
0193
0194
0195 alpaka::syncBlockThreads(acc);
0196 #ifdef GPU_DEBUG
0197 if (cms::alpakatools::once_per_grid(acc)) {
0198 printf("All digis AFTER cut: \n");
0199 for (uint32_t i = 0; i < numElements; i++)
0200 printf("%d %d %d %d %d \n",
0201 i,
0202 digi_view[i].rawIdArr(),
0203 digi_view[i].clus(),
0204 digi_view[i].pdigi(),
0205 digi_view[i].adc());
0206 }
0207 #endif
0208 }
0209 }
0210 };
0211
0212 }
0213
0214 #endif