File indexing completed on 2024-04-06 12:28:40
0001 #ifndef RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
0002 #define RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
0003
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "RecoTracker/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0015
0016 #include "vertexFinder.h"
0017
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0019
0020 using VtxSoAView = ::reco::ZVertexSoAView;
0021 using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView;
0022
0023
0024 class ClusterTracksDBSCAN {
0025 public:
0026 template <typename TAcc>
0027 ALPAKA_FN_ACC void operator()(const TAcc& acc,
0028 VtxSoAView pdata,
0029 WsSoAView pws,
0030 int minT,
0031 float eps,
0032 float errmax,
0033 float chi2max
0034 ) const {
0035 constexpr bool verbose = false;
0036 const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
0037 if constexpr (verbose) {
0038 if (cms::alpakatools::once_per_block(acc))
0039 printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0040 }
0041 auto er2mx = errmax * errmax;
0042
0043 auto& __restrict__ data = pdata;
0044 auto& __restrict__ ws = pws;
0045 auto nt = ws.ntrks();
0046 float const* __restrict__ zt = ws.zt();
0047 float const* __restrict__ ezt2 = ws.ezt2();
0048
0049 uint32_t& nvFinal = data.nvFinal();
0050 uint32_t& nvIntermediate = ws.nvIntermediate();
0051
0052 uint8_t* __restrict__ izt = ws.izt();
0053 int32_t* __restrict__ nn = data.ndof();
0054 int32_t* __restrict__ iv = ws.iv();
0055
0056 ALPAKA_ASSERT_ACC(zt);
0057 ALPAKA_ASSERT_ACC(iv);
0058 ALPAKA_ASSERT_ACC(nn);
0059 ALPAKA_ASSERT_ACC(ezt2);
0060
0061 using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0062 auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0063 auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0064
0065 for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0066 hist.off[j] = 0;
0067 }
0068 alpaka::syncBlockThreads(acc);
0069
0070 if constexpr (verbose) {
0071 if (cms::alpakatools::once_per_block(acc))
0072 printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
0073 }
0074
0075 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0076
0077
0078 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0079 ALPAKA_ASSERT_ACC(i < ::zVertex::MAXTRACKS);
0080 int iz = int(zt[i] * 10.);
0081 iz = std::clamp(iz, INT8_MIN, INT8_MAX);
0082 izt[i] = iz - INT8_MIN;
0083 ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0084 ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0085 hist.count(acc, izt[i]);
0086 iv[i] = i;
0087 nn[i] = 0;
0088 }
0089 alpaka::syncBlockThreads(acc);
0090 if (threadIdxLocal < 32)
0091 hws[threadIdxLocal] = 0;
0092 alpaka::syncBlockThreads(acc);
0093 hist.finalize(acc, hws);
0094 alpaka::syncBlockThreads(acc);
0095 ALPAKA_ASSERT_ACC(hist.size() == nt);
0096 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0097 hist.fill(acc, izt[i], uint32_t(i));
0098 }
0099 alpaka::syncBlockThreads(acc);
0100
0101
0102 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0103 if (ezt2[i] > er2mx)
0104 continue;
0105 auto loop = [&](uint32_t j) {
0106 if (i == j)
0107 return;
0108 auto dist = std::abs(zt[i] - zt[j]);
0109 if (dist > eps)
0110 return;
0111
0112 nn[i]++;
0113 };
0114
0115 cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0116 }
0117
0118 alpaka::syncBlockThreads(acc);
0119
0120
0121 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0122 if (nn[i] < minT)
0123 continue;
0124 float mz = zt[i];
0125 auto loop = [&](uint32_t j) {
0126 if (zt[j] >= mz)
0127 return;
0128 if (nn[j] < minT)
0129 return;
0130 auto dist = std::abs(zt[i] - zt[j]);
0131 if (dist > eps)
0132 return;
0133
0134 mz = zt[j];
0135 iv[i] = j;
0136 };
0137 cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0138 }
0139
0140 alpaka::syncBlockThreads(acc);
0141
0142 #ifdef GPU_DEBUG
0143
0144 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0145 if (iv[i] != int(i))
0146 ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0147 }
0148 alpaka::syncBlockThreads(acc);
0149 #endif
0150
0151
0152 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0153 auto m = iv[i];
0154 while (m != iv[m])
0155 m = iv[m];
0156 iv[i] = m;
0157 }
0158
0159 alpaka::syncBlockThreads(acc);
0160
0161 #ifdef GPU_DEBUG
0162
0163 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0164 if (iv[i] != int(i))
0165 ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0166 }
0167 alpaka::syncBlockThreads(acc);
0168 #endif
0169
0170 #ifdef GPU_DEBUG
0171
0172 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0173 if (nn[i] < minT)
0174 continue;
0175 ALPAKA_ASSERT_ACC(zt[iv[i]] <= zt[i]);
0176 auto loop = [&](uint32_t j) {
0177 if (nn[j] < minT)
0178 return;
0179 auto dist = std::abs(zt[i] - zt[j]);
0180 if (dist > eps)
0181 return;
0182
0183
0184 if (iv[i] != iv[j]) {
0185 printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]);
0186 printf(" %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]);
0187 ;
0188 }
0189 ALPAKA_ASSERT_ACC(iv[i] == iv[j]);
0190 };
0191 cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0192 }
0193 alpaka::syncBlockThreads(acc);
0194 #endif
0195
0196
0197 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0198
0199 if (nn[i] >= minT)
0200 continue;
0201 float mdist = eps;
0202 auto loop = [&](uint32_t j) {
0203 if (nn[j] < minT)
0204 return;
0205 auto dist = std::abs(zt[i] - zt[j]);
0206 if (dist > mdist)
0207 return;
0208 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0209 return;
0210 mdist = dist;
0211 iv[i] = iv[j];
0212 };
0213 cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0214 }
0215
0216 auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0217 foundClusters = 0;
0218 alpaka::syncBlockThreads(acc);
0219
0220
0221
0222 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0223 if (iv[i] == int(i)) {
0224 if (nn[i] >= minT) {
0225 auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0226 iv[i] = -(old + 1);
0227 } else {
0228 iv[i] = -9998;
0229 }
0230 }
0231 }
0232 alpaka::syncBlockThreads(acc);
0233
0234 ALPAKA_ASSERT_ACC(foundClusters < ::zVertex::MAXVTX);
0235
0236
0237 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0238 if (iv[i] >= 0) {
0239
0240 iv[i] = iv[iv[i]];
0241 }
0242 }
0243 alpaka::syncBlockThreads(acc);
0244
0245
0246 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0247 iv[i] = -iv[i] - 1;
0248 }
0249
0250 nvIntermediate = nvFinal = foundClusters;
0251
0252 if constexpr (verbose) {
0253 if (cms::alpakatools::once_per_block(acc))
0254 printf("found %d proto vertices\n", foundClusters);
0255 }
0256 }
0257 };
0258
0259 }
0260
0261 #endif