File indexing completed on 2025-04-29 02:41:34
0001 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
0002 #define RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
0003
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/debug.h"
0014 #include "HeterogeneousCore/AlpakaInterface/interface/warpsize.h"
0015 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0016 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0017
0018 #include "vertexFinder.h"
0019
0020 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0021
0022
0023
0024
0025
0026 ALPAKA_FN_ACC ALPAKA_FN_INLINE void clusterTracksByDensity(Acc1D const& acc,
0027 VtxSoAView& data,
0028 TrkSoAView& trkdata,
0029 WsSoAView& ws,
0030 int minT,
0031 float eps,
0032 float errmax,
0033 float chi2max
0034 ) {
0035
0036 debug::do_not_optimise(ws);
0037
0038 constexpr bool verbose = false;
0039
0040 if constexpr (verbose) {
0041 if (cms::alpakatools::once_per_block(acc))
0042 printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0043 }
0044
0045 auto nt = ws.ntrks();
0046 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
0047 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
0048
0049 float const* __restrict__ zt = ws.zt();
0050 float const* __restrict__ ezt2 = ws.ezt2();
0051 uint8_t* __restrict__ izt = ws.izt();
0052 int32_t* __restrict__ iv = ws.iv();
0053 int32_t* __restrict__ nn = trkdata.ndof();
0054 ALPAKA_ASSERT_ACC(zt);
0055 ALPAKA_ASSERT_ACC(ezt2);
0056 ALPAKA_ASSERT_ACC(izt);
0057 ALPAKA_ASSERT_ACC(iv);
0058 ALPAKA_ASSERT_ACC(nn);
0059
0060 using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0061 auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0062 constexpr int warpSize = cms::alpakatools::warpSize;
0063 auto& hws = alpaka::declareSharedVar<Hist::Counter[warpSize], __COUNTER__>(acc);
0064
0065 for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0066 hist.off[j] = 0;
0067 }
0068 for (auto j : cms::alpakatools::uniform_elements(acc, warpSize)) {
0069 hws[j] = 0;
0070 }
0071 alpaka::syncBlockThreads(acc);
0072
0073 if constexpr (verbose) {
0074 if (cms::alpakatools::once_per_block(acc))
0075 printf("booked hist with %d bins, size %d for %d tracks\n", hist.totbins(), hist.capacity(), nt);
0076 }
0077
0078 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= std::numeric_limits<Hist::index_type>::max());
0079 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0080 ALPAKA_ASSERT_ACC(eps <= 0.1f);
0081
0082
0083 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0084 int iz = static_cast<int>(zt[i] * 10.f);
0085
0086
0087
0088 int tmp_max = std::max<int>(iz, INT8_MIN);
0089 iz = std::min<int>(tmp_max, INT8_MAX);
0090 ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0091 ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0092 izt[i] = iz - INT8_MIN;
0093 hist.count(acc, izt[i]);
0094 iv[i] = i;
0095 nn[i] = 0;
0096 }
0097 alpaka::syncBlockThreads(acc);
0098
0099 hist.finalize(acc, hws);
0100 alpaka::syncBlockThreads(acc);
0101
0102 ALPAKA_ASSERT_ACC(hist.size() == nt);
0103 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0104 hist.fill(acc, izt[i], uint16_t(i));
0105 }
0106 alpaka::syncBlockThreads(acc);
0107
0108
0109 const auto errmax2 = errmax * errmax;
0110 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0111 if (ezt2[i] > errmax2)
0112 continue;
0113 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0114 if (i == j)
0115 return;
0116 auto dist = std::abs(zt[i] - zt[j]);
0117 if (dist > eps)
0118 return;
0119 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0120 return;
0121 nn[i]++;
0122 });
0123 }
0124 alpaka::syncBlockThreads(acc);
0125
0126
0127 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0128 float mdist = eps;
0129 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0130 if (nn[j] < nn[i])
0131 return;
0132 if (nn[j] == nn[i] && zt[j] >= zt[i])
0133 return;
0134 auto dist = std::abs(zt[i] - zt[j]);
0135 if (dist > mdist)
0136 return;
0137 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0138 return;
0139 mdist = dist;
0140 iv[i] = j;
0141 });
0142 }
0143 alpaka::syncBlockThreads(acc);
0144
0145 #ifdef GPU_DEBUG
0146
0147 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0148 if (iv[i] != int(i))
0149 ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0150 }
0151 alpaka::syncBlockThreads(acc);
0152 #endif
0153
0154
0155 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0156 auto m = iv[i];
0157 while (m != iv[m])
0158 m = iv[m];
0159 iv[i] = m;
0160 }
0161
0162 #ifdef GPU_DEBUG
0163 alpaka::syncBlockThreads(acc);
0164
0165 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0166 if (iv[i] != int(i))
0167 ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0168 }
0169 #endif
0170
0171 #ifdef GPU_DEBUG
0172
0173 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0174 auto minJ = i;
0175 auto mdist = eps;
0176 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0177 if (nn[j] < nn[i])
0178 return;
0179 if (nn[j] == nn[i] && zt[j] >= zt[i])
0180 return;
0181 auto dist = std::abs(zt[i] - zt[j]);
0182 if (dist > mdist)
0183 return;
0184 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0185 return;
0186 mdist = dist;
0187 minJ = j;
0188 });
0189
0190
0191 ALPAKA_ASSERT_ACC(iv[i] == iv[minJ]);
0192 ALPAKA_ASSERT_ACC(nn[i] <= nn[iv[i]]);
0193 }
0194 alpaka::syncBlockThreads(acc);
0195 #endif
0196
0197 auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0198 foundClusters = 0;
0199 alpaka::syncBlockThreads(acc);
0200
0201
0202
0203 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0204 if (iv[i] == int(i)) {
0205 if (nn[i] >= minT) {
0206 auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0207 iv[i] = -(old + 1);
0208 } else {
0209 iv[i] = -9998;
0210 }
0211 }
0212 }
0213 alpaka::syncBlockThreads(acc);
0214
0215 ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
0216
0217
0218 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0219 if (iv[i] >= 0) {
0220
0221 iv[i] = iv[iv[i]];
0222 }
0223 }
0224 alpaka::syncBlockThreads(acc);
0225
0226
0227 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0228 iv[i] = -iv[i] - 1;
0229 }
0230
0231 ws.nvIntermediate() = foundClusters;
0232 data.nvFinal() = foundClusters;
0233
0234 if constexpr (verbose) {
0235 if (cms::alpakatools::once_per_block(acc))
0236 printf("found %d proto vertices\n", foundClusters);
0237 }
0238 }
0239
0240 class ClusterTracksByDensityKernel {
0241 public:
0242 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0243 VtxSoAView data,
0244 TrkSoAView trkdata,
0245 WsSoAView ws,
0246 int minT,
0247 float eps,
0248 float errmax,
0249 float chi2max
0250 ) const {
0251 clusterTracksByDensity(acc, data, trkdata, ws, minT, eps, errmax, chi2max);
0252 }
0253 };
0254
0255 }
0256
0257 #endif