File indexing completed on 2024-05-10 02:21:08
0001 #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_ClusterChargeCut_h
0002 #define RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_ClusterChargeCut_h
0003
0004 #include <cstdint>
0005 #include <cstdio>
0006
0007 #include <alpaka/alpaka.hpp>
0008
0009 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0010 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0011 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0013 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
0014
0015
0016
0017 namespace pixelClustering {
0018
0019 template <typename TrackerTraits>
0020 struct ClusterChargeCut {
0021 template <typename TAcc>
0022 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0023 SiPixelDigisSoAView digi_view,
0024 SiPixelClustersSoAView clus_view,
0025
0026 SiPixelClusterThresholds clusterThresholds,
0027 const uint32_t numElements) const {
0028 constexpr int32_t maxNumClustersPerModules = TrackerTraits::maxNumClustersPerModules;
0029
0030 #ifdef GPU_DEBUG
0031 if (cms::alpakatools::once_per_grid(acc)) {
0032 printf("All digis before cut: \n");
0033 for (uint32_t i = 0; i < numElements; i++)
0034 printf("%d %d %d %d %d \n",
0035 i,
0036 digi_view[i].rawIdArr(),
0037 digi_view[i].clus(),
0038 digi_view[i].pdigi(),
0039 digi_view[i].adc());
0040 }
0041 #endif
0042
0043 auto& charge = alpaka::declareSharedVar<int32_t[maxNumClustersPerModules], __COUNTER__>(acc);
0044 auto& ok = alpaka::declareSharedVar<uint8_t[maxNumClustersPerModules], __COUNTER__>(acc);
0045 auto& newclusId = alpaka::declareSharedVar<uint16_t[maxNumClustersPerModules], __COUNTER__>(acc);
0046
0047 constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0048
0049 ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < maxNumModules);
0050 ALPAKA_ASSERT_ACC(startBPIX2 < TrackerTraits::numberOfModules);
0051
0052 auto endModule = clus_view[0].moduleStart();
0053
0054 for (auto module : cms::alpakatools::independent_groups(acc, endModule)) {
0055 auto firstPixel = clus_view[1 + module].moduleStart();
0056 auto thisModuleId = digi_view[firstPixel].moduleId();
0057 while (thisModuleId == invalidModuleId and firstPixel < numElements) {
0058
0059 ++firstPixel;
0060 thisModuleId = digi_view[firstPixel].moduleId();
0061 }
0062 if (firstPixel >= numElements) {
0063
0064 break;
0065 }
0066 if (thisModuleId != clus_view[module].moduleId()) {
0067
0068 continue;
0069 }
0070 ALPAKA_ASSERT_ACC(thisModuleId < TrackerTraits::numberOfModules);
0071
0072 uint32_t nclus = clus_view[thisModuleId].clusInModule();
0073 if (nclus == 0)
0074 return;
0075
0076 if (cms::alpakatools::once_per_block(acc) && nclus > maxNumClustersPerModules)
0077 printf("Warning: too many clusters in module %u in block %u: %u > %d\n",
0078 thisModuleId,
0079 module,
0080 nclus,
0081 maxNumClustersPerModules);
0082
0083 if (nclus > maxNumClustersPerModules) {
0084
0085 for (auto i : cms::alpakatools::independent_group_elements(acc, firstPixel, numElements)) {
0086 if (digi_view[i].moduleId() == invalidModuleId)
0087 continue;
0088 if (digi_view[i].moduleId() != thisModuleId)
0089 break;
0090 if (digi_view[i].clus() >= maxNumClustersPerModules) {
0091 digi_view[i].moduleId() = invalidModuleId;
0092 digi_view[i].clus() = invalidModuleId;
0093 }
0094 }
0095 nclus = maxNumClustersPerModules;
0096 }
0097
0098 #ifdef GPU_DEBUG
0099 if (thisModuleId % 100 == 1)
0100 if (cms::alpakatools::once_per_block(acc))
0101 printf("start cluster charge cut for module %d in block %d\n", thisModuleId, module);
0102 #endif
0103
0104 ALPAKA_ASSERT_ACC(nclus <= maxNumClustersPerModules);
0105 for (auto i : cms::alpakatools::independent_group_elements(acc, nclus)) {
0106 charge[i] = 0;
0107 }
0108 alpaka::syncBlockThreads(acc);
0109
0110 for (auto i : cms::alpakatools::independent_group_elements(acc, firstPixel, numElements)) {
0111 if (digi_view[i].moduleId() == invalidModuleId)
0112 continue;
0113 if (digi_view[i].moduleId() != thisModuleId)
0114 break;
0115 alpaka::atomicAdd(acc,
0116 &charge[digi_view[i].clus()],
0117 static_cast<int32_t>(digi_view[i].adc()),
0118 alpaka::hierarchy::Threads{});
0119 }
0120 alpaka::syncBlockThreads(acc);
0121
0122 auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
0123
0124 bool good = true;
0125 for (auto i : cms::alpakatools::independent_group_elements(acc, nclus)) {
0126 newclusId[i] = ok[i] = (charge[i] >= chargeCut) ? 1 : 0;
0127 if (0 == ok[i])
0128 good = false;
0129 #ifdef GPU_DEBUG
0130 printf("Cutting pix %d in module %d newId %d ok? %d charge %d cut %d -> good %d \n",
0131 i,
0132 thisModuleId,
0133 newclusId[i],
0134 ok[i],
0135 charge[i],
0136 chargeCut,
0137 good);
0138 #endif
0139 }
0140
0141 if (alpaka::syncBlockThreadsPredicate<alpaka::BlockAnd>(acc, good))
0142 continue;
0143
0144
0145 auto& ws = alpaka::declareSharedVar<uint16_t[32], __COUNTER__>(acc);
0146
0147 constexpr uint32_t maxThreads = 1024;
0148 auto minClust = std::min(nclus, maxThreads);
0149
0150 cms::alpakatools::blockPrefixScan(acc, newclusId, minClust, ws);
0151 if constexpr (maxNumClustersPerModules > maxThreads)
0152 {
0153 for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) {
0154 cms::alpakatools::blockPrefixScan(acc, newclusId + offset, nclus - offset, ws);
0155 for (uint32_t i : cms::alpakatools::independent_group_elements(acc, offset, nclus)) {
0156 uint32_t prevBlockEnd = (i / maxThreads) * maxThreads - 1;
0157 newclusId[i] += newclusId[prevBlockEnd];
0158 }
0159 alpaka::syncBlockThreads(acc);
0160 }
0161 }
0162
0163 ALPAKA_ASSERT_ACC(nclus >= newclusId[nclus - 1]);
0164
0165 clus_view[thisModuleId].clusInModule() = newclusId[nclus - 1];
0166
0167
0168 for (auto i : cms::alpakatools::independent_group_elements(acc, firstPixel, numElements)) {
0169 if (digi_view[i].moduleId() == invalidModuleId)
0170 continue;
0171 if (digi_view[i].moduleId() != thisModuleId)
0172 break;
0173 if (0 == ok[digi_view[i].clus()])
0174 digi_view[i].moduleId() = digi_view[i].clus() = invalidModuleId;
0175 else
0176 digi_view[i].clus() = newclusId[digi_view[i].clus()] - 1;
0177 }
0178
0179
0180 alpaka::syncBlockThreads(acc);
0181 #ifdef GPU_DEBUG
0182 if (cms::alpakatools::once_per_grid(acc)) {
0183 printf("All digis AFTER cut: \n");
0184 for (uint32_t i = 0; i < numElements; i++)
0185 printf("%d %d %d %d %d \n",
0186 i,
0187 digi_view[i].rawIdArr(),
0188 digi_view[i].clus(),
0189 digi_view[i].pdigi(),
0190 digi_view[i].adc());
0191 }
0192 #endif
0193 }
0194 }
0195 };
0196
0197 }
0198
0199 #endif