File indexing completed on 2024-09-13 22:52:46
0001 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
0002 #define RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
0003
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0015
0016 #include "vertexFinder.h"
0017
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0019
0020
0021
0022
0023
0024 ALPAKA_FN_ACC ALPAKA_FN_INLINE void clusterTracksByDensity(Acc1D const& acc,
0025 VtxSoAView& data,
0026 TrkSoAView& trkdata,
0027 WsSoAView& ws,
0028 int minT,
0029 float eps,
0030 float errmax,
0031 float chi2max
0032 ) {
0033 constexpr bool verbose = false;
0034
0035 if constexpr (verbose) {
0036 if (cms::alpakatools::once_per_block(acc))
0037 printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0038 }
0039
0040 auto nt = ws.ntrks();
0041 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
0042 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
0043
0044 float const* __restrict__ zt = ws.zt();
0045 float const* __restrict__ ezt2 = ws.ezt2();
0046 uint8_t* __restrict__ izt = ws.izt();
0047 int32_t* __restrict__ iv = ws.iv();
0048 int32_t* __restrict__ nn = trkdata.ndof();
0049 ALPAKA_ASSERT_ACC(zt);
0050 ALPAKA_ASSERT_ACC(ezt2);
0051 ALPAKA_ASSERT_ACC(izt);
0052 ALPAKA_ASSERT_ACC(iv);
0053 ALPAKA_ASSERT_ACC(nn);
0054
0055 using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0056 auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0057 auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0058
0059 for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0060 hist.off[j] = 0;
0061 }
0062 for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
0063 hws[j] = 0;
0064 }
0065 alpaka::syncBlockThreads(acc);
0066
0067 if constexpr (verbose) {
0068 if (cms::alpakatools::once_per_block(acc))
0069 printf("booked hist with %d bins, size %d for %d tracks\n", hist.totbins(), hist.capacity(), nt);
0070 }
0071
0072 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= std::numeric_limits<Hist::index_type>::max());
0073 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0074 ALPAKA_ASSERT_ACC(eps <= 0.1f);
0075
0076
0077 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0078 int iz = static_cast<int>(zt[i] * 10.f);
0079 iz = std::clamp(iz, INT8_MIN, INT8_MAX);
0080 ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0081 ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0082 izt[i] = iz - INT8_MIN;
0083 hist.count(acc, izt[i]);
0084 iv[i] = i;
0085 nn[i] = 0;
0086 }
0087 alpaka::syncBlockThreads(acc);
0088
0089 hist.finalize(acc, hws);
0090 alpaka::syncBlockThreads(acc);
0091
0092 ALPAKA_ASSERT_ACC(hist.size() == nt);
0093 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0094 hist.fill(acc, izt[i], uint16_t(i));
0095 }
0096 alpaka::syncBlockThreads(acc);
0097
0098
0099 const auto errmax2 = errmax * errmax;
0100 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0101 if (ezt2[i] > errmax2)
0102 continue;
0103 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0104 if (i == j)
0105 return;
0106 auto dist = std::abs(zt[i] - zt[j]);
0107 if (dist > eps)
0108 return;
0109 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0110 return;
0111 nn[i]++;
0112 });
0113 }
0114 alpaka::syncBlockThreads(acc);
0115
0116
0117 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0118 float mdist = eps;
0119 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0120 if (nn[j] < nn[i])
0121 return;
0122 if (nn[j] == nn[i] && zt[j] >= zt[i])
0123 return;
0124 auto dist = std::abs(zt[i] - zt[j]);
0125 if (dist > mdist)
0126 return;
0127 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0128 return;
0129 mdist = dist;
0130 iv[i] = j;
0131 });
0132 }
0133 alpaka::syncBlockThreads(acc);
0134
0135 #ifdef GPU_DEBUG
0136
0137 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0138 if (iv[i] != int(i))
0139 ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0140 }
0141 alpaka::syncBlockThreads(acc);
0142 #endif
0143
0144
0145 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0146 auto m = iv[i];
0147 while (m != iv[m])
0148 m = iv[m];
0149 iv[i] = m;
0150 }
0151
0152 #ifdef GPU_DEBUG
0153 alpaka::syncBlockThreads(acc);
0154
0155 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0156 if (iv[i] != int(i))
0157 ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0158 }
0159 #endif
0160
0161 #ifdef GPU_DEBUG
0162
0163 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0164 auto minJ = i;
0165 auto mdist = eps;
0166 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0167 if (nn[j] < nn[i])
0168 return;
0169 if (nn[j] == nn[i] && zt[j] >= zt[i])
0170 return;
0171 auto dist = std::abs(zt[i] - zt[j]);
0172 if (dist > mdist)
0173 return;
0174 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0175 return;
0176 mdist = dist;
0177 minJ = j;
0178 });
0179
0180
0181 ALPAKA_ASSERT_ACC(iv[i] == iv[minJ]);
0182 ALPAKA_ASSERT_ACC(nn[i] <= nn[iv[i]]);
0183 }
0184 alpaka::syncBlockThreads(acc);
0185 #endif
0186
0187 auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0188 foundClusters = 0;
0189 alpaka::syncBlockThreads(acc);
0190
0191
0192
0193 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0194 if (iv[i] == int(i)) {
0195 if (nn[i] >= minT) {
0196 auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0197 iv[i] = -(old + 1);
0198 } else {
0199 iv[i] = -9998;
0200 }
0201 }
0202 }
0203 alpaka::syncBlockThreads(acc);
0204
0205 ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
0206
0207
0208 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0209 if (iv[i] >= 0) {
0210
0211 iv[i] = iv[iv[i]];
0212 }
0213 }
0214 alpaka::syncBlockThreads(acc);
0215
0216
0217 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0218 iv[i] = -iv[i] - 1;
0219 }
0220
0221 ws.nvIntermediate() = foundClusters;
0222 data.nvFinal() = foundClusters;
0223
0224 if constexpr (verbose) {
0225 if (cms::alpakatools::once_per_block(acc))
0226 printf("found %d proto vertices\n", foundClusters);
0227 }
0228 }
0229
0230 class ClusterTracksByDensityKernel {
0231 public:
0232 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0233 VtxSoAView data,
0234 TrkSoAView trkdata,
0235 WsSoAView ws,
0236 int minT,
0237 float eps,
0238 float errmax,
0239 float chi2max
0240 ) const {
0241 clusterTracksByDensity(acc, data, trkdata, ws, minT, eps, errmax, chi2max);
0242 }
0243 };
0244
0245 }
0246
0247 #endif