File indexing completed on 2025-04-29 02:41:34
0001 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
0002 #define RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
0003
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/debug.h"
0014 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0015 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0016
0017 #include "vertexFinder.h"
0018
0019 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0020
0021
0022
0023 class ClusterTracksDBSCAN {
0024 public:
0025 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0026 VtxSoAView data,
0027 TrkSoAView trkdata,
0028 WsSoAView ws,
0029 int minT,
0030 float eps,
0031 float errmax,
0032 float chi2max
0033 ) const {
0034
0035 debug::do_not_optimise(ws);
0036
0037 constexpr bool verbose = false;
0038
0039 if constexpr (verbose) {
0040 if (cms::alpakatools::once_per_block(acc))
0041 printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0042 }
0043
0044 auto nt = ws.ntrks();
0045 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
0046 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
0047
0048 float const* __restrict__ zt = ws.zt();
0049 float const* __restrict__ ezt2 = ws.ezt2();
0050 uint8_t* __restrict__ izt = ws.izt();
0051 int32_t* __restrict__ iv = ws.iv();
0052 int32_t* __restrict__ nn = trkdata.ndof();
0053 ALPAKA_ASSERT_ACC(zt);
0054 ALPAKA_ASSERT_ACC(ezt2);
0055 ALPAKA_ASSERT_ACC(izt);
0056 ALPAKA_ASSERT_ACC(iv);
0057 ALPAKA_ASSERT_ACC(nn);
0058
0059 using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0060 auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0061 auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0062
0063 for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0064 hist.off[j] = 0;
0065 }
0066 for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
0067 hws[j] = 0;
0068 }
0069 alpaka::syncBlockThreads(acc);
0070
0071 if constexpr (verbose) {
0072 if (cms::alpakatools::once_per_block(acc))
0073 printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
0074 }
0075
0076 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= std::numeric_limits<Hist::index_type>::max());
0077 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0078 ALPAKA_ASSERT_ACC(eps <= 0.1f);
0079
0080
0081 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0082 int iz = static_cast<int>(zt[i] * 10.f);
0083
0084
0085
0086 int tmp_max = std::max<int>(iz, INT8_MIN);
0087 iz = std::min<int>(tmp_max, INT8_MAX);
0088 izt[i] = iz - INT8_MIN;
0089 ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0090 ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0091 hist.count(acc, izt[i]);
0092 iv[i] = i;
0093 nn[i] = 0;
0094 }
0095 alpaka::syncBlockThreads(acc);
0096
0097 hist.finalize(acc, hws);
0098 alpaka::syncBlockThreads(acc);
0099
0100 ALPAKA_ASSERT_ACC(hist.size() == nt);
0101 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0102 hist.fill(acc, izt[i], uint16_t(i));
0103 }
0104 alpaka::syncBlockThreads(acc);
0105
0106
0107 const auto errmax2 = errmax * errmax;
0108 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0109 if (ezt2[i] > errmax2)
0110 continue;
0111 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0112 if (i == j)
0113 return;
0114 auto dist = std::abs(zt[i] - zt[j]);
0115 if (dist > eps)
0116 return;
0117 nn[i]++;
0118 });
0119 }
0120 alpaka::syncBlockThreads(acc);
0121
0122
0123 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0124 if (nn[i] < minT)
0125 continue;
0126 float mz = zt[i];
0127 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0128 if (zt[j] >= mz)
0129 return;
0130 if (nn[j] < minT)
0131 return;
0132 auto dist = std::abs(zt[i] - zt[j]);
0133 if (dist > eps)
0134 return;
0135 mz = zt[j];
0136 iv[i] = j;
0137 });
0138 }
0139 alpaka::syncBlockThreads(acc);
0140
0141 #ifdef GPU_DEBUG
0142
0143 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0144 if (iv[i] != int(i))
0145 ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0146 }
0147 alpaka::syncBlockThreads(acc);
0148 #endif
0149
0150
0151 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0152 auto m = iv[i];
0153 while (m != iv[m])
0154 m = iv[m];
0155 iv[i] = m;
0156 }
0157 alpaka::syncBlockThreads(acc);
0158
0159 #ifdef GPU_DEBUG
0160
0161 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0162 if (iv[i] != int(i))
0163 ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0164 }
0165 alpaka::syncBlockThreads(acc);
0166 #endif
0167
0168 #ifdef GPU_DEBUG
0169
0170 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0171 if (nn[i] < minT)
0172 continue;
0173 ALPAKA_ASSERT_ACC(zt[iv[i]] <= zt[i]);
0174 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0175 if (nn[j] < minT)
0176 return;
0177 auto dist = std::abs(zt[i] - zt[j]);
0178 if (dist > eps)
0179 return;
0180
0181 if (iv[i] != iv[j]) {
0182 printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]);
0183 printf(" %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]);
0184 }
0185 ALPAKA_ASSERT_ACC(iv[i] == iv[j]);
0186 });
0187 }
0188 alpaka::syncBlockThreads(acc);
0189 #endif
0190
0191
0192 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0193
0194 if (nn[i] >= minT)
0195 continue;
0196 float mdist = eps;
0197 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0198 if (nn[j] < minT)
0199 return;
0200 auto dist = std::abs(zt[i] - zt[j]);
0201 if (dist > mdist)
0202 return;
0203 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0204 return;
0205 mdist = dist;
0206 iv[i] = iv[j];
0207 });
0208 }
0209
0210 auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0211 foundClusters = 0;
0212 alpaka::syncBlockThreads(acc);
0213
0214
0215
0216 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0217 if (iv[i] == int(i)) {
0218 if (nn[i] >= minT) {
0219 auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0220 iv[i] = -(old + 1);
0221 } else {
0222 iv[i] = -9998;
0223 }
0224 }
0225 }
0226 alpaka::syncBlockThreads(acc);
0227
0228 ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
0229
0230
0231 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0232 if (iv[i] >= 0) {
0233
0234 iv[i] = iv[iv[i]];
0235 }
0236 }
0237 alpaka::syncBlockThreads(acc);
0238
0239
0240 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0241 iv[i] = -iv[i] - 1;
0242 }
0243
0244 ws.nvIntermediate() = foundClusters;
0245 data.nvFinal() = foundClusters;
0246
0247 if constexpr (verbose) {
0248 if (cms::alpakatools::once_per_block(acc))
0249 printf("found %d proto vertices\n", foundClusters);
0250 }
0251 }
0252 };
0253
0254 }
0255
0256 #endif