File indexing completed on 2024-09-13 22:52:46
0001 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
0002 #define RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
0003
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0015
0016 #include "vertexFinder.h"
0017
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0019
0020
0021
0022 class ClusterTracksDBSCAN {
0023 public:
0024 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0025 VtxSoAView data,
0026 TrkSoAView trkdata,
0027 WsSoAView ws,
0028 int minT,
0029 float eps,
0030 float errmax,
0031 float chi2max
0032 ) const {
0033 constexpr bool verbose = false;
0034
0035 if constexpr (verbose) {
0036 if (cms::alpakatools::once_per_block(acc))
0037 printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0038 }
0039
0040 auto nt = ws.ntrks();
0041 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
0042 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
0043
0044 float const* __restrict__ zt = ws.zt();
0045 float const* __restrict__ ezt2 = ws.ezt2();
0046 uint8_t* __restrict__ izt = ws.izt();
0047 int32_t* __restrict__ iv = ws.iv();
0048 int32_t* __restrict__ nn = trkdata.ndof();
0049 ALPAKA_ASSERT_ACC(zt);
0050 ALPAKA_ASSERT_ACC(ezt2);
0051 ALPAKA_ASSERT_ACC(izt);
0052 ALPAKA_ASSERT_ACC(iv);
0053 ALPAKA_ASSERT_ACC(nn);
0054
0055 using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0056 auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0057 auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0058
0059 for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0060 hist.off[j] = 0;
0061 }
0062 for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
0063 hws[j] = 0;
0064 }
0065 alpaka::syncBlockThreads(acc);
0066
0067 if constexpr (verbose) {
0068 if (cms::alpakatools::once_per_block(acc))
0069 printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
0070 }
0071
0072 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= std::numeric_limits<Hist::index_type>::max());
0073 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0074 ALPAKA_ASSERT_ACC(eps <= 0.1f);
0075
0076
0077 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0078 int iz = static_cast<int>(zt[i] * 10.f);
0079 iz = std::clamp(iz, INT8_MIN, INT8_MAX);
0080 izt[i] = iz - INT8_MIN;
0081 ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0082 ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0083 hist.count(acc, izt[i]);
0084 iv[i] = i;
0085 nn[i] = 0;
0086 }
0087 alpaka::syncBlockThreads(acc);
0088
0089 hist.finalize(acc, hws);
0090 alpaka::syncBlockThreads(acc);
0091
0092 ALPAKA_ASSERT_ACC(hist.size() == nt);
0093 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0094 hist.fill(acc, izt[i], uint16_t(i));
0095 }
0096 alpaka::syncBlockThreads(acc);
0097
0098
0099 const auto errmax2 = errmax * errmax;
0100 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0101 if (ezt2[i] > errmax2)
0102 continue;
0103 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0104 if (i == j)
0105 return;
0106 auto dist = std::abs(zt[i] - zt[j]);
0107 if (dist > eps)
0108 return;
0109 nn[i]++;
0110 });
0111 }
0112 alpaka::syncBlockThreads(acc);
0113
0114
0115 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0116 if (nn[i] < minT)
0117 continue;
0118 float mz = zt[i];
0119 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0120 if (zt[j] >= mz)
0121 return;
0122 if (nn[j] < minT)
0123 return;
0124 auto dist = std::abs(zt[i] - zt[j]);
0125 if (dist > eps)
0126 return;
0127 mz = zt[j];
0128 iv[i] = j;
0129 });
0130 }
0131 alpaka::syncBlockThreads(acc);
0132
0133 #ifdef GPU_DEBUG
0134
0135 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0136 if (iv[i] != int(i))
0137 ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0138 }
0139 alpaka::syncBlockThreads(acc);
0140 #endif
0141
0142
0143 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0144 auto m = iv[i];
0145 while (m != iv[m])
0146 m = iv[m];
0147 iv[i] = m;
0148 }
0149 alpaka::syncBlockThreads(acc);
0150
0151 #ifdef GPU_DEBUG
0152
0153 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0154 if (iv[i] != int(i))
0155 ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0156 }
0157 alpaka::syncBlockThreads(acc);
0158 #endif
0159
0160 #ifdef GPU_DEBUG
0161
0162 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0163 if (nn[i] < minT)
0164 continue;
0165 ALPAKA_ASSERT_ACC(zt[iv[i]] <= zt[i]);
0166 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0167 if (nn[j] < minT)
0168 return;
0169 auto dist = std::abs(zt[i] - zt[j]);
0170 if (dist > eps)
0171 return;
0172
0173 if (iv[i] != iv[j]) {
0174 printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]);
0175 printf(" %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]);
0176 }
0177 ALPAKA_ASSERT_ACC(iv[i] == iv[j]);
0178 });
0179 }
0180 alpaka::syncBlockThreads(acc);
0181 #endif
0182
0183
0184 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0185
0186 if (nn[i] >= minT)
0187 continue;
0188 float mdist = eps;
0189 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0190 if (nn[j] < minT)
0191 return;
0192 auto dist = std::abs(zt[i] - zt[j]);
0193 if (dist > mdist)
0194 return;
0195 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0196 return;
0197 mdist = dist;
0198 iv[i] = iv[j];
0199 });
0200 }
0201
0202 auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0203 foundClusters = 0;
0204 alpaka::syncBlockThreads(acc);
0205
0206
0207
0208 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0209 if (iv[i] == int(i)) {
0210 if (nn[i] >= minT) {
0211 auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0212 iv[i] = -(old + 1);
0213 } else {
0214 iv[i] = -9998;
0215 }
0216 }
0217 }
0218 alpaka::syncBlockThreads(acc);
0219
0220 ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
0221
0222
0223 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0224 if (iv[i] >= 0) {
0225
0226 iv[i] = iv[iv[i]];
0227 }
0228 }
0229 alpaka::syncBlockThreads(acc);
0230
0231
0232 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0233 iv[i] = -iv[i] - 1;
0234 }
0235
0236 ws.nvIntermediate() = foundClusters;
0237 data.nvFinal() = foundClusters;
0238
0239 if constexpr (verbose) {
0240 if (cms::alpakatools::once_per_block(acc))
0241 printf("found %d proto vertices\n", foundClusters);
0242 }
0243 }
0244 };
0245
0246 }
0247
0248 #endif